417 files changed, 20919 insertions, 8373 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 3a718f51350..920c975bb6d 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -72,7 +72,6 @@ obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_NEW_LEDS)		+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
-obj-$(CONFIG_IPATH_CORE)	+= infiniband/
 obj-$(CONFIG_SGI_SN)		+= sn/
 obj-y				+= firmware/
 obj-$(CONFIG_CRYPTO)		+= crypto/
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index 3c372e08f77..59651abfa4f 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -821,7 +821,7 @@ static inline void fill_rx_pool (amb_dev * dev, unsigned char pool,
     }
     // cast needed as there is no %? for pointer differences
     PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
-	    skb, skb->head, (long) (skb->end - skb->head));
+	    skb, skb->head, (long) (skb_end_pointer(skb) - skb->head));
     rx.handle = virt_to_bus (skb);
     rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
     if (rx_give (dev, &rx, pool))
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index fc518d85543..02ad83d6b56 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -221,7 +221,7 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb)
 	hdr->vpi = htons(vcc->vpi);
 	hdr->vci = htons(vcc->vci);
 	hdr->length = htonl(skb->len);
-	memcpy(skb_put(new_skb,skb->len),skb->data,skb->len);
+	skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len);
 	if (vcc->pop) vcc->pop(vcc,skb);
 	else dev_kfree_skb(skb);
 	out_vcc->push(out_vcc,new_skb);
@@ -310,7 +310,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb)
 		goto done;
 	}
 	__net_timestamp(new_skb);
-	memcpy(skb_put(new_skb,skb->len),skb->data,skb->len);
+	skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len);
 	out_vcc->push(out_vcc,new_skb);
 	atomic_inc(&vcc->stats->tx);
 	atomic_inc(&out_vcc->stats->rx);
@@ -352,7 +352,7 @@ static struct atm_dev atmtcp_control_dev = {
 	.ops		= &atmtcp_c_dev_ops,
 	.type		= "atmtcp",
 	.number		= 999,
-	.lock		= SPIN_LOCK_UNLOCKED
+	.lock		= __SPIN_LOCK_UNLOCKED(atmtcp_control_dev.lock)
 };
 
 
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 8fccf018f16..0d3a38b1cb0 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -536,7 +536,7 @@ static int rx_aal0(struct atm_vcc *vcc)
 		return 0;
 	}
 	skb_put(skb,length);
-	skb_set_timestamp(skb, &eni_vcc->timestamp);
+	skb->tstamp = eni_vcc->timestamp;
 	DPRINTK("got len %ld\n",length);
 	if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1;
 	eni_vcc->rxing++;
@@ -701,7 +701,7 @@ static void get_service(struct atm_dev *dev)
 			DPRINTK("Grr, servicing VCC %ld twice\n",vci);
 			continue;
 		}
-		do_gettimeofday(&ENI_VCC(vcc)->timestamp);
+		ENI_VCC(vcc)->timestamp = ktime_get_real();
 		ENI_VCC(vcc)->next = NULL;
 		if (vcc->qos.rxtp.traffic_class == ATM_CBR) {
 			if (eni_dev->fast)
diff --git a/drivers/atm/eni.h b/drivers/atm/eni.h
index 385090c2a58..d04fefb0841 100644
--- a/drivers/atm/eni.h
+++ b/drivers/atm/eni.h
@@ -59,7 +59,7 @@ struct eni_vcc {
 	int rxing;			/* number of pending PDUs */
 	int servicing;			/* number of waiting VCs (0 or 1) */
 	int txing;			/* number of pending TX bytes */
-	struct timeval timestamp;	/* for RX timing */
+	ktime_t timestamp;		/* for RX timing */
 	struct atm_vcc *next;		/* next pending RX */
 	struct sk_buff *last;		/* last PDU being DMAed (used to carry
 					   discard information) */
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index a7c0ed3107e..405ee5e0922 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1,6 +1,4 @@
 /*
-  $Id: fore200e.c,v 1.5 2000/04/14 10:10:34 davem Exp $
-
   A FORE Systems 200E-series driver for ATM on Linux.
   Christophe Lizzi (lizzi@cnam.fr), October 1999-March 2003.
 
@@ -1502,9 +1500,9 @@ fore200e_open(struct atm_vcc *vcc)
     /* pseudo-CBR bandwidth requested? */
     if ((vcc->qos.txtp.traffic_class == ATM_CBR) && (vcc->qos.txtp.max_pcr > 0)) {
 	
-	down(&fore200e->rate_sf);
+	mutex_lock(&fore200e->rate_mtx);
 	if (fore200e->available_cell_rate < vcc->qos.txtp.max_pcr) {
-	    up(&fore200e->rate_sf);
+	    mutex_unlock(&fore200e->rate_mtx);
 
 	    kfree(fore200e_vcc);
 	    vc_map->vcc = NULL;
@@ -1513,7 +1511,7 @@ fore200e_open(struct atm_vcc *vcc)
 
 	/* reserve bandwidth */
 	fore200e->available_cell_rate -= vcc->qos.txtp.max_pcr;
-	up(&fore200e->rate_sf);
+	mutex_unlock(&fore200e->rate_mtx);
     }
     
     vcc->itf = vcc->dev->number;
@@ -1599,9 +1597,9 @@ fore200e_close(struct atm_vcc* vcc)
     /* release reserved bandwidth, if any */
     if ((vcc->qos.txtp.traffic_class == ATM_CBR) && (vcc->qos.txtp.max_pcr > 0)) {
 
-	down(&fore200e->rate_sf);
+	mutex_lock(&fore200e->rate_mtx);
 	fore200e->available_cell_rate += vcc->qos.txtp.max_pcr;
-	up(&fore200e->rate_sf);
+	mutex_unlock(&fore200e->rate_mtx);
 
 	clear_bit(ATM_VF_HASQOS, &vcc->flags);
     }
@@ -2064,16 +2062,16 @@ fore200e_change_qos(struct atm_vcc* vcc,struct atm_qos* qos, int flags)
 
     if ((qos->txtp.traffic_class == ATM_CBR) && (qos->txtp.max_pcr > 0)) {
 
-	down(&fore200e->rate_sf);
+	mutex_lock(&fore200e->rate_mtx);
 	if (fore200e->available_cell_rate + vcc->qos.txtp.max_pcr < qos->txtp.max_pcr) {
-	    up(&fore200e->rate_sf);
+	    mutex_unlock(&fore200e->rate_mtx);
 	    return -EAGAIN;
 	}
 
 	fore200e->available_cell_rate += vcc->qos.txtp.max_pcr;
 	fore200e->available_cell_rate -= qos->txtp.max_pcr;
 
-	up(&fore200e->rate_sf);
+	mutex_unlock(&fore200e->rate_mtx);
 	
 	memcpy(&vcc->qos, qos, sizeof(struct atm_qos));
 	
@@ -2459,7 +2457,7 @@ fore200e_initialize(struct fore200e* fore200e)
 
     DPRINTK(2, "device %s being initialized\n", fore200e->name);
 
-    init_MUTEX(&fore200e->rate_sf);
+    mutex_init(&fore200e->rate_mtx);
     spin_lock_init(&fore200e->q_lock);
 
     cpq = fore200e->cp_queues = fore200e->virt_base + FORE200E_CP_QUEUES_OFFSET;
diff --git a/drivers/atm/fore200e.h b/drivers/atm/fore200e.h
index f9abfdac33e..b85a54613de 100644
--- a/drivers/atm/fore200e.h
+++ b/drivers/atm/fore200e.h
@@ -869,7 +869,7 @@ typedef struct fore200e {
 
     struct stats*              stats;                  /* last snapshot of the stats         */
     
-    struct semaphore           rate_sf;                /* protects rate reservation ops      */
+    struct mutex               rate_mtx;               /* protects rate reservation ops      */
     spinlock_t                 q_lock;                 /* protects queue ops                 */
 #ifdef FORE200E_USE_TASKLET
     struct tasklet_struct      tx_tasklet;             /* performs tx interrupt work         */
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 8510026b690..d33aba6864c 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1901,13 +1901,13 @@ he_service_rbrq(struct he_dev *he_dev, int group)
 			case ATM_AAL0:
 				/* 2.10.1.5 raw cell receive */
 				skb->len = ATM_AAL0_SDU;
-				skb->tail = skb->data + skb->len;
+				skb_set_tail_pointer(skb, skb->len);
 				break;
 			case ATM_AAL5:
 				/* 2.10.1.2 aal5 receive */
 
 				skb->len = AAL5_LEN(skb->data, he_vcc->pdu_len);
-				skb->tail = skb->data + skb->len;
+				skb_set_tail_pointer(skb, skb->len);
 #ifdef USE_CHECKSUM_HW
 				if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) {
 					skb->ip_summed = CHECKSUM_COMPLETE;
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index b4b80140c39..057efbc55d3 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1065,7 +1065,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 	vcc = vc->rx_vcc;
 
 	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(skb),
-				    skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				    skb_end_pointer(skb) - skb->data,
+				    PCI_DMA_FROMDEVICE);
 
 	if ((vcc->qos.aal == ATM_AAL0) ||
 	    (vcc->qos.aal == ATM_AAL34)) {
@@ -1194,7 +1195,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 		}
 
 		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				 skb_end_pointer(skb) - skb->data,
+				 PCI_DMA_FROMDEVICE);
 		sb_pool_remove(card, skb);
 
 		skb_trim(skb, len);
@@ -1267,7 +1269,7 @@ idt77252_rx_raw(struct idt77252_dev *card)
 	tail = readl(SAR_REG_RAWCT);
 
 	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(queue),
-				    queue->end - queue->head - 16,
+				    skb_end_pointer(queue) - queue->head - 16,
 				    PCI_DMA_FROMDEVICE);
 
 	while (head != tail) {
@@ -1363,7 +1365,8 @@ drop:
 				queue = card->raw_cell_head;
 				pci_dma_sync_single_for_cpu(card->pcidev,
 							    IDT77252_PRV_PADDR(queue),
-							    queue->end - queue->data,
+							    (skb_end_pointer(queue) -
+							     queue->data),
 							    PCI_DMA_FROMDEVICE);
 			} else {
 				card->raw_cell_head = NULL;
@@ -1816,7 +1819,8 @@ push_rx_skb(struct idt77252_dev *card, struct sk_buff *skb, int queue)
 	u32 handle;
 	u32 addr;
 
-	skb->data = skb->tail = skb->head;
+	skb->data = skb->head;
+	skb_reset_tail_pointer(skb);
 	skb->len = 0;
 
 	skb_reserve(skb, 16);
@@ -1835,7 +1839,6 @@ push_rx_skb(struct idt77252_dev *card, struct sk_buff *skb, int queue)
 		skb_put(skb, SAR_FB_SIZE_3);
 		break;
 	default:
-		dev_kfree_skb(skb);
 		return -1;
 	}
 
@@ -1874,7 +1877,7 @@ add_rx_skb(struct idt77252_dev *card, int queue,
 		}
 
 		paddr = pci_map_single(card->pcidev, skb->data,
-				       skb->end - skb->data,
+				       skb_end_pointer(skb) - skb->data,
 				       PCI_DMA_FROMDEVICE);
 		IDT77252_PRV_PADDR(skb) = paddr;
 
@@ -1888,7 +1891,7 @@ add_rx_skb(struct idt77252_dev *card, int queue,
 
 outunmap:
 	pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-			 skb->end - skb->data, PCI_DMA_FROMDEVICE);
+			 skb_end_pointer(skb) - skb->data, PCI_DMA_FROMDEVICE);
 
 	handle = IDT77252_PRV_POOL(skb);
 	card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL;
@@ -1905,12 +1908,14 @@ recycle_rx_skb(struct idt77252_dev *card, struct sk_buff *skb)
 	int err;
 
 	pci_dma_sync_single_for_device(card->pcidev, IDT77252_PRV_PADDR(skb),
-				       skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				       skb_end_pointer(skb) - skb->data,
+				       PCI_DMA_FROMDEVICE);
 
 	err = push_rx_skb(card, skb, POOL_QUEUE(handle));
 	if (err) {
 		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				 skb_end_pointer(skb) - skb->data,
+				 PCI_DMA_FROMDEVICE);
 		sb_pool_remove(card, skb);
 		dev_kfree_skb(skb);
 	}
@@ -3122,7 +3127,8 @@ deinit_card(struct idt77252_dev *card)
 			if (skb) {
 				pci_unmap_single(card->pcidev,
 						 IDT77252_PRV_PADDR(skb),
-						 skb->end - skb->data,
+						 (skb_end_pointer(skb) -
+						  skb->data),
 						 PCI_DMA_FROMDEVICE);
 				card->sbpool[i].skb[j] = NULL;
 				dev_kfree_skb(skb);
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index aab9b3733d5..14ced85b3f5 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -2208,7 +2208,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
          if (i == 1 && ns_rsqe_eopdu(rsqe))
             *((u32 *) sb->data) |= 0x00000002;
          skb_put(sb, NS_AAL0_HEADER);
-         memcpy(sb->tail, cell, ATM_CELL_PAYLOAD);
+         memcpy(skb_tail_pointer(sb), cell, ATM_CELL_PAYLOAD);
          skb_put(sb, ATM_CELL_PAYLOAD);
          ATM_SKB(sb)->vcc = vcc;
 	 __net_timestamp(sb);
@@ -2252,7 +2252,8 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
       vc->rx_iov = iovb;
       NS_SKB(iovb)->iovcnt = 0;
       iovb->len = 0;
-      iovb->tail = iovb->data = iovb->head;
+      iovb->data = iovb->head;
+      skb_reset_tail_pointer(iovb);
       NS_SKB(iovb)->vcc = vcc;
       /* IMPORTANT: a pointer to the sk_buff containing the small or large
                     buffer is stored as iovec base, NOT a pointer to the 
@@ -2265,7 +2266,8 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
       recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_MAX_IOVECS);
       NS_SKB(iovb)->iovcnt = 0;
       iovb->len = 0;
-      iovb->tail = iovb->data = iovb->head;
+      iovb->data = iovb->head;
+      skb_reset_tail_pointer(iovb);
       NS_SKB(iovb)->vcc = vcc;
    }
    iov = &((struct iovec *) iovb->data)[NS_SKB(iovb)->iovcnt++];
@@ -2393,7 +2395,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
                skb->destructor = ns_lb_destructor;
 #endif /* NS_USE_DESTRUCTORS */
                skb_push(skb, NS_SMBUFSIZE);
-               memcpy(skb->data, sb->data, NS_SMBUFSIZE);
+               skb_copy_from_linear_data(sb, skb->data, NS_SMBUFSIZE);
                skb_put(skb, len - NS_SMBUFSIZE);
                ATM_SKB(skb)->vcc = vcc;
 	       __net_timestamp(skb);
@@ -2477,7 +2479,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
 	 {
             /* Copy the small buffer to the huge buffer */
             sb = (struct sk_buff *) iov->iov_base;
-            memcpy(hb->data, sb->data, iov->iov_len);
+            skb_copy_from_linear_data(sb, hb->data, iov->iov_len);
             skb_put(hb, iov->iov_len);
             remaining = len - iov->iov_len;
             iov++;
@@ -2489,7 +2491,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
             {
                lb = (struct sk_buff *) iov->iov_base;
                tocopy = min_t(int, remaining, iov->iov_len);
-               memcpy(hb->tail, lb->data, tocopy);
+               skb_copy_from_linear_data(lb, skb_tail_pointer(hb), tocopy);
                skb_put(hb, tocopy);
                iov++;
                remaining -= tocopy;
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 2308e83e5f3..1d846681794 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -48,6 +48,15 @@ struct aoe_hdr {
 	__be32 tag;
 };
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct aoe_hdr *aoe_hdr(const struct sk_buff *skb)
+{
+	return (struct aoe_hdr *)skb_mac_header(skb);
+}
+#endif
+
 struct aoe_atahdr {
 	unsigned char aflags;
 	unsigned char errfeat;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 8d17d8df366..1a6aeac5a1c 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -27,7 +27,8 @@ new_skb(ulong len)
 
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb) {
-		skb->nh.raw = skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
+		skb_reset_network_header(skb);
 		skb->protocol = __constant_htons(ETH_P_AOE);
 		skb->priority = 0;
 		skb->next = skb->prev = NULL;
@@ -118,7 +119,7 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
 
 	/* initialize the headers & frame */
 	skb = f->skb;
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 	skb_put(skb, sizeof *h + sizeof *ah);
 	memset(h, 0, skb->len);
@@ -207,7 +208,7 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
 		skb->dev = ifp;
 		if (sl_tail == NULL)
 			sl_tail = skb;
-		h = (struct aoe_hdr *) skb->mac.raw;
+		h = aoe_hdr(skb);
 		memset(h, 0, sizeof *h + sizeof *ch);
 
 		memset(h->dst, 0xff, sizeof h->dst);
@@ -300,7 +301,7 @@ rexmit(struct aoedev *d, struct frame *f)
 	aoechr_error(buf);
 
 	skb = f->skb;
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 	f->tag = n;
 	h->tag = cpu_to_be32(n);
@@ -529,7 +530,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	char ebuf[128];
 	u16 aoemajor;
 
-	hin = (struct aoe_hdr *) skb->mac.raw;
+	hin = aoe_hdr(skb);
 	aoemajor = be16_to_cpu(get_unaligned(&hin->major));
 	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
 	if (d == NULL) {
@@ -561,7 +562,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	calc_rttavg(d, tsince(f->tag));
 
 	ahin = (struct aoe_atahdr *) (hin+1);
-	hout = (struct aoe_hdr *) f->skb->mac.raw;
+	hout = aoe_hdr(f->skb);
 	ahout = (struct aoe_atahdr *) (hout+1);
 	buf = f->buf;
 
@@ -695,7 +696,7 @@ aoecmd_ata_id(struct aoedev *d)
 
 	/* initialize the headers & frame */
 	skb = f->skb;
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 	skb_put(skb, sizeof *h + sizeof *ah);
 	memset(h, 0, skb->len);
@@ -726,7 +727,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
 	enum { MAXFRAMES = 16 };
 	u16 n;
 
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ch = (struct aoe_cfghdr *) (h+1);
 
 	/*
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index aab6d91a2c2..f9ddfda4d9c 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -123,7 +123,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt,
 		goto exit;
 	skb_push(skb, ETH_HLEN);	/* (1) */
 
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	n = be32_to_cpu(get_unaligned(&h->tag));
 	if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31))
 		goto exit;
diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c
index 4c766f36d88..b990805806a 100644
--- a/drivers/bluetooth/bfusb.c
+++ b/drivers/bluetooth/bfusb.c
@@ -527,7 +527,7 @@ static int bfusb_send_frame(struct sk_buff *skb)
 		buf[2] = (size == BFUSB_MAX_BLOCK_SIZE) ? 0 : size;
 
 		memcpy(skb_put(nskb, 3), buf, 3);
-		memcpy(skb_put(nskb, size), skb->data + sent, size);
+		skb_copy_from_linear_data_offset(skb, sent, skb_put(nskb, size), size);
 
 		sent  += size;
 		count -= size;
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index acfb6a430dc..851de4d5b7d 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -461,20 +461,20 @@ static void bluecard_receive(bluecard_info_t *info, unsigned int offset)
 				switch (info->rx_state) {
 
 				case RECV_WAIT_EVENT_HEADER:
-					eh = (struct hci_event_hdr *)(info->rx_skb->data);
+					eh = hci_event_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = eh->plen;
 					break;
 
 				case RECV_WAIT_ACL_HEADER:
-					ah = (struct hci_acl_hdr *)(info->rx_skb->data);
+					ah = hci_acl_hdr(info->rx_skb);
 					dlen = __le16_to_cpu(ah->dlen);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = dlen;
 					break;
 
 				case RECV_WAIT_SCO_HEADER:
-					sh = (struct hci_sco_hdr *)(info->rx_skb->data);
+					sh = hci_sco_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = sh->dlen;
 					break;
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index 9fca6513562..e8ebd5d3de8 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -231,7 +231,7 @@ static void bpa10x_wakeup(struct bpa10x_data *data)
 		cr = (struct usb_ctrlrequest *) urb->setup_packet;
 		cr->wLength = __cpu_to_le16(skb->len);
 
-		memcpy(urb->transfer_buffer, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
 		urb->transfer_buffer_length = skb->len;
 
 		err = usb_submit_urb(urb, GFP_ATOMIC);
@@ -250,7 +250,7 @@ static void bpa10x_wakeup(struct bpa10x_data *data)
 		skb = skb_dequeue(&data->tx_queue);
 
 	if (skb) {
-		memcpy(urb->transfer_buffer, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
 		urb->transfer_buffer_length = skb->len;
 
 		err = usb_submit_urb(urb, GFP_ATOMIC);
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 18b0f3992c5..39516074636 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -303,20 +303,20 @@ static void bt3c_receive(bt3c_info_t *info)
 				switch (info->rx_state) {
 
 				case RECV_WAIT_EVENT_HEADER:
-					eh = (struct hci_event_hdr *)(info->rx_skb->data);
+					eh = hci_event_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = eh->plen;
 					break;
 
 				case RECV_WAIT_ACL_HEADER:
-					ah = (struct hci_acl_hdr *)(info->rx_skb->data);
+					ah = hci_acl_hdr(info->rx_skb);
 					dlen = __le16_to_cpu(ah->dlen);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = dlen;
 					break;
 
 				case RECV_WAIT_SCO_HEADER:
-					sh = (struct hci_sco_hdr *)(info->rx_skb->data);
+					sh = hci_sco_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = sh->dlen;
 					break;
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index c1bce75148f..d7d2ea0d86a 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -250,20 +250,20 @@ static void btuart_receive(btuart_info_t *info)
 				switch (info->rx_state) {
 
 				case RECV_WAIT_EVENT_HEADER:
-					eh = (struct hci_event_hdr *)(info->rx_skb->data);
+					eh = hci_event_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = eh->plen;
 					break;
 
 				case RECV_WAIT_ACL_HEADER:
-					ah = (struct hci_acl_hdr *)(info->rx_skb->data);
+					ah = hci_acl_hdr(info->rx_skb);
 					dlen = __le16_to_cpu(ah->dlen);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = dlen;
 					break;
 
 				case RECV_WAIT_SCO_HEADER:
-					sh = (struct hci_sco_hdr *)(info->rx_skb->data);
+					sh = hci_sco_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = sh->dlen;
 					break;
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 459aa97937a..7f9c54b9964 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -425,7 +425,7 @@ static int dtl1_hci_send_frame(struct sk_buff *skb)
 		return -ENOMEM;
 
 	skb_reserve(s, NSHL);
-	memcpy(skb_put(s, skb->len), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, skb_put(s, skb->len), skb->len);
 	if (skb->len & 0x0001)
 		*skb_put(s, 1) = 0;	/* PAD */
 
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index 34f0afc4240..bfbae14cf93 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -188,7 +188,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
 				continue;
 
 			case H4_W4_EVENT_HDR:
-				eh = (struct hci_event_hdr *) h4->rx_skb->data;
+				eh = hci_event_hdr(h4->rx_skb);
 
 				BT_DBG("Event header: evt 0x%2.2x plen %d", eh->evt, eh->plen);
 
@@ -196,7 +196,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
 				continue;
 
 			case H4_W4_ACL_HDR:
-				ah = (struct hci_acl_hdr *) h4->rx_skb->data;
+				ah = hci_acl_hdr(h4->rx_skb);
 				dlen = __le16_to_cpu(ah->dlen);
 
 				BT_DBG("ACL header: dlen %d", dlen);
@@ -205,7 +205,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
 				continue;
 
 			case H4_W4_SCO_HDR:
-				sh = (struct hci_sco_hdr *) h4->rx_skb->data;
+				sh = hci_sco_hdr(h4->rx_skb);
 
 				BT_DBG("SCO header: dlen %d", sh->dlen);
 
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 8d025e9b5bc..157b1d09ab5 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4169,7 +4169,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
 	netif_stop_queue(dev);
 
 	/* copy data to device buffers */
-	memcpy(info->tx_buf, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, info->tx_buf, skb->len);
 	info->tx_get = 0;
 	info->tx_put = info->tx_count = skb->len;
 
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b9dc7aa1dfb..46c1b97748b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -881,15 +881,15 @@ EXPORT_SYMBOL(get_random_bytes);
  */
 static void init_std_data(struct entropy_store *r)
 {
-	struct timeval tv;
+	ktime_t now;
 	unsigned long flags;
 
 	spin_lock_irqsave(&r->lock, flags);
 	r->entropy_count = 0;
 	spin_unlock_irqrestore(&r->lock, flags);
 
-	do_gettimeofday(&tv);
-	add_entropy_words(r, (__u32 *)&tv, sizeof(tv)/4);
+	now = ktime_get_real();
+	add_entropy_words(r, (__u32 *)&now, sizeof(now)/4);
 	add_entropy_words(r, (__u32 *)utsname(),
 			  sizeof(*(utsname()))/4);
 }
@@ -911,14 +911,12 @@ void rand_initialize_irq(int irq)
 		return;
 
 	/*
-	 * If kmalloc returns null, we just won't use that entropy
+	 * If kzalloc returns null, we just won't use that entropy
 	 * source.
 	 */
-	state = kmalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
-	if (state) {
-		memset(state, 0, sizeof(struct timer_rand_state));
+	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+	if (state)
 		irq_timer_state[irq] = state;
-	}
 }
 
 #ifdef CONFIG_BLOCK
@@ -927,14 +925,12 @@ void rand_initialize_disk(struct gendisk *disk)
 	struct timer_rand_state *state;
 
 	/*
-	 * If kmalloc returns null, we just won't use that entropy
+	 * If kzalloc returns null, we just won't use that entropy
 	 * source.
 	 */
-	state = kmalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
-	if (state) {
-		memset(state, 0, sizeof(struct timer_rand_state));
+	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+	if (state)
 		disk->random = state;
-	}
 }
 #endif
 
@@ -1469,7 +1465,6 @@ late_initcall(seqgen_init);
 __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 				   __be16 sport, __be16 dport)
 {
-	struct timeval tv;
 	__u32 seq;
 	__u32 hash[12];
 	struct keydata *keyptr = get_keyptr();
@@ -1485,8 +1480,7 @@ __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 	seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK;
 	seq += keyptr->count;
 
-	do_gettimeofday(&tv);
-	seq += tv.tv_usec + tv.tv_sec * 1000000;
+	seq += ktime_get_real().tv64;
 
 	return seq;
 }
@@ -1521,7 +1515,6 @@ __u32 secure_ip_id(__be32 daddr)
 __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 				 __be16 sport, __be16 dport)
 {
-	struct timeval tv;
 	__u32 seq;
 	__u32 hash[4];
 	struct keydata *keyptr = get_keyptr();
@@ -1543,12 +1536,11 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 	 *	As close as possible to RFC 793, which
 	 *	suggests using a 250 kHz clock.
 	 *	Further reading shows this assumes 2 Mb/s networks.
-	 *	For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
+	 *	For 10 Gb/s Ethernet, a 1 GHz clock is appropriate.
 	 *	That's funny, Linux has one built in!  Use it!
 	 *	(Networks are faster now - should this be increased?)
 	 */
-	do_gettimeofday(&tv);
-	seq += tv.tv_usec + tv.tv_sec * 1000000;
+	seq += ktime_get_real().tv64;
 #if 0
 	printk("init_seq(%lx, %lx, %d, %d) = %d\n",
 	       saddr, daddr, sport, dport, seq);
@@ -1556,8 +1548,6 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 	return seq;
 }
 
-EXPORT_SYMBOL(secure_tcp_sequence_number);
-
 /* Generate secure starting point for ephemeral IPV4 transport port search */
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 {
@@ -1598,7 +1588,6 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16
 u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 				__be16 sport, __be16 dport)
 {
-	struct timeval tv;
 	u64 seq;
 	__u32 hash[4];
 	struct keydata *keyptr = get_keyptr();
@@ -1611,8 +1600,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 	seq = half_md4_transform(hash, keyptr->secret);
 	seq |= ((u64)keyptr->count) << (32 - HASH_BITS);
 
-	do_gettimeofday(&tv);
-	seq += tv.tv_usec + tv.tv_sec * 1000000;
+	seq += ktime_get_real().tv64;
 	seq &= (1ull << 48) - 1;
 #if 0
 	printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n",
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index a905f782033..a7b9e9bb3e8 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -212,7 +212,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
 	skb = skb_get(__skb);
 
 	if (skb->len >= NLMSG_SPACE(0)) {
-		nlh = (struct nlmsghdr *)skb->data;
+		nlh = nlmsg_hdr(skb);
 
 		if (nlh->nlmsg_len < sizeof(struct cn_msg) ||
 		    skb->len < nlh->nlmsg_len ||
@@ -448,7 +448,7 @@ static int __devinit cn_init(void)
 
 	dev->nls = netlink_kernel_create(NETLINK_CONNECTOR,
 					 CN_NETLINK_USERS + 0xf,
-					 dev->input, THIS_MODULE);
+					 dev->input, NULL, THIS_MODULE);
 	if (!dev->nls)
 		return -EIO;
 
diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c
index 03e44b337eb..a364003ba47 100644
--- a/drivers/ieee1394/eth1394.c
+++ b/drivers/ieee1394/eth1394.c
@@ -834,7 +834,7 @@ static inline u16 ether1394_type_trans(struct sk_buff *skb,
 	struct eth1394hdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull (skb, ETH1394_HLEN);
 	eth = eth1394_hdr(skb);
 
@@ -1668,7 +1668,7 @@ static int ether1394_tx (struct sk_buff *skb, struct net_device *dev)
 	if (memcmp(eth->h_dest, dev->broadcast, ETH1394_ALEN) == 0 ||
 	    proto == htons(ETH_P_ARP) ||
 	    (proto == htons(ETH_P_IP) &&
-	     IN_MULTICAST(ntohl(skb->nh.iph->daddr)))) {
+	     IN_MULTICAST(ntohl(ip_hdr(skb)->daddr)))) {
 		tx_type = ETH1394_GASP;
 		dest_node = LOCAL_BUS | ALL_NODES;
 		max_payload = priv->bc_maxpayload - ETHER1394_GASP_OVERHEAD;
diff --git a/drivers/ieee1394/eth1394.h b/drivers/ieee1394/eth1394.h
index c45cbff9138..1e835653514 100644
--- a/drivers/ieee1394/eth1394.h
+++ b/drivers/ieee1394/eth1394.h
@@ -90,7 +90,7 @@ struct eth1394hdr {
 
 static inline struct eth1394hdr *eth1394_hdr(const struct sk_buff *skb)
 {
-	return (struct eth1394hdr *)skb->mac.raw;
+	return (struct eth1394hdr *)skb_mac_header(skb);
 }
 #endif
 
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 13efd417034..6edfecf1be7 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
  *
@@ -31,7 +31,6 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
  */
 #include <linux/dma-mapping.h>
 #include <rdma/ib_cache.h>
@@ -668,7 +667,7 @@ static void build_smp_wc(struct ib_qp *qp,
 static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 				  struct ib_mad_send_wr_private *mad_send_wr)
 {
-	int ret;
+	int ret = 0;
 	struct ib_smp *smp = mad_send_wr->send_buf.mad;
 	unsigned long flags;
 	struct ib_mad_local_private *local;
@@ -688,14 +687,15 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 	 */
 	if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
 	     IB_LID_PERMISSIVE &&
-	    !smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
+	     smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
+	     IB_SMI_DISCARD) {
 		ret = -EINVAL;
 		printk(KERN_ERR PFX "Invalid directed route\n");
 		goto out;
 	}
+
 	/* Check to post send on QP or process locally */
-	ret = smi_check_local_smp(smp, device);
-	if (!ret)
+	if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD)
 		goto out;
 
 	local = kmalloc(sizeof *local, GFP_ATOMIC);
@@ -1874,18 +1874,22 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
 
 	if (recv->mad.mad.mad_hdr.mgmt_class ==
 	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-		if (!smi_handle_dr_smp_recv(&recv->mad.smp,
-					    port_priv->device->node_type,
-					    port_priv->port_num,
-					    port_priv->device->phys_port_cnt))
+		if (smi_handle_dr_smp_recv(&recv->mad.smp,
+					   port_priv->device->node_type,
+					   port_priv->port_num,
+					   port_priv->device->phys_port_cnt) ==
+					   IB_SMI_DISCARD)
 			goto out;
-		if (!smi_check_forward_dr_smp(&recv->mad.smp))
+
+		if (smi_check_forward_dr_smp(&recv->mad.smp) == IB_SMI_LOCAL)
 			goto local;
-		if (!smi_handle_dr_smp_send(&recv->mad.smp,
-					    port_priv->device->node_type,
-					    port_priv->port_num))
+
+		if (smi_handle_dr_smp_send(&recv->mad.smp,
+					   port_priv->device->node_type,
+					   port_priv->port_num) == IB_SMI_DISCARD)
 			goto out;
-		if (!smi_check_local_smp(&recv->mad.smp, port_priv->device))
+
+		if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
 			goto out;
 	}
 
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 68db633711c..9a7eaadb168 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -57,6 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 struct ib_sa_sm_ah {
 	struct ib_ah        *ah;
 	struct kref          ref;
+	u8		     src_path_mask;
 };
 
 struct ib_sa_port {
@@ -380,6 +381,7 @@ static void update_sm_ah(struct work_struct *work)
 	}
 
 	kref_init(&new_ah->ref);
+	new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
 
 	memset(&ah_attr, 0, sizeof ah_attr);
 	ah_attr.dlid     = port_attr.sm_lid;
@@ -460,6 +462,25 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
 }
 EXPORT_SYMBOL(ib_sa_cancel_query);
 
+static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
+{
+	struct ib_sa_device *sa_dev;
+	struct ib_sa_port   *port;
+	unsigned long flags;
+	u8 src_path_mask;
+
+	sa_dev = ib_get_client_data(device, &sa_client);
+	if (!sa_dev)
+		return 0x7f;
+
+	port  = &sa_dev->port[port_num - sa_dev->start_port];
+	spin_lock_irqsave(&port->ah_lock, flags);
+	src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
+	spin_unlock_irqrestore(&port->ah_lock, flags);
+
+	return src_path_mask;
+}
+
 int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 			 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
 {
@@ -469,7 +490,8 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 	memset(ah_attr, 0, sizeof *ah_attr);
 	ah_attr->dlid = be16_to_cpu(rec->dlid);
 	ah_attr->sl = rec->sl;
-	ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
+	ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
+				 get_src_path_mask(device, port_num);
 	ah_attr->port_num = port_num;
 	ah_attr->static_rate = rec->rate;
 
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 54b81e17ad5..2bca753eb62 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -34,7 +34,6 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: smi.c 1389 2004-12-27 22:56:47Z roland $
  */
 
 #include <rdma/ib_smi.h>
@@ -44,9 +43,8 @@
  * Fixup a directed route SMP for sending
  * Return 0 if the SMP should be discarded
  */
-int smi_handle_dr_smp_send(struct ib_smp *smp,
-			   u8 node_type,
-			   int port_num)
+enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
+				       u8 node_type, int port_num)
 {
 	u8 hop_ptr, hop_cnt;
 
@@ -59,18 +57,18 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 		if (hop_cnt && hop_ptr == 0) {
 			smp->hop_ptr++;
 			return (smp->initial_path[smp->hop_ptr] ==
-				port_num);
+				port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-9:2 */
 		if (hop_ptr && hop_ptr < hop_cnt) {
 			if (node_type != RDMA_NODE_IB_SWITCH)
-				return 0;
+				return IB_SMI_DISCARD;
 
 			/* smp->return_path set when received */
 			smp->hop_ptr++;
 			return (smp->initial_path[smp->hop_ptr] ==
-				port_num);
+				port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-9:3 -- We're at the end of the DR segment of path */
@@ -78,29 +76,30 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 			/* smp->return_path set when received */
 			smp->hop_ptr++;
 			return (node_type == RDMA_NODE_IB_SWITCH ||
-				smp->dr_dlid == IB_LID_PERMISSIVE);
+				smp->dr_dlid == IB_LID_PERMISSIVE ?
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
 		/* C14-9:5 -- Fail unreasonable hop pointer */
-		return (hop_ptr == hop_cnt + 1);
+		return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 
 	} else {
 		/* C14-13:1 */
 		if (hop_cnt && hop_ptr == hop_cnt + 1) {
 			smp->hop_ptr--;
 			return (smp->return_path[smp->hop_ptr] ==
-				port_num);
+				port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
 			if (node_type != RDMA_NODE_IB_SWITCH)
-				return 0;
+				return IB_SMI_DISCARD;
 
 			smp->hop_ptr--;
 			return (smp->return_path[smp->hop_ptr] ==
-				port_num);
+				port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-13:3 -- at the end of the DR segment of path */
@@ -108,15 +107,16 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 			smp->hop_ptr--;
 			/* C14-13:3 -- SMPs destined for SM shouldn't be here */
 			return (node_type == RDMA_NODE_IB_SWITCH ||
-				smp->dr_slid == IB_LID_PERMISSIVE);
+				smp->dr_slid == IB_LID_PERMISSIVE ?
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */
 		if (hop_ptr == 0)
-			return 1;
+			return IB_SMI_HANDLE;
 
 		/* C14-13:5 -- Check for unreasonable hop pointer */
-		return 0;
+		return IB_SMI_DISCARD;
 	}
 }
 
@@ -124,10 +124,8 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
  * Adjust information for a received SMP
  * Return 0 if the SMP should be dropped
  */
-int smi_handle_dr_smp_recv(struct ib_smp *smp,
-			   u8 node_type,
-			   int port_num,
-			   int phys_port_cnt)
+enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
+				       int port_num, int phys_port_cnt)
 {
 	u8 hop_ptr, hop_cnt;
 
@@ -138,16 +136,17 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 	if (!ib_get_smp_direction(smp)) {
 		/* C14-9:1 -- sender should have incremented hop_ptr */
 		if (hop_cnt && hop_ptr == 0)
-			return 0;
+			return IB_SMI_DISCARD;
 
 		/* C14-9:2 -- intermediate hop */
 		if (hop_ptr && hop_ptr < hop_cnt) {
 			if (node_type != RDMA_NODE_IB_SWITCH)
-				return 0;
+				return IB_SMI_DISCARD;
 
 			smp->return_path[hop_ptr] = port_num;
 			/* smp->hop_ptr updated when sending */
-			return (smp->initial_path[hop_ptr+1] <= phys_port_cnt);
+			return (smp->initial_path[hop_ptr+1] <= phys_port_cnt ?
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-9:3 -- We're at the end of the DR segment of path */
@@ -157,12 +156,13 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 			/* smp->hop_ptr updated when sending */
 
 			return (node_type == RDMA_NODE_IB_SWITCH ||
-				smp->dr_dlid == IB_LID_PERMISSIVE);
+				smp->dr_dlid == IB_LID_PERMISSIVE ?
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
 		/* C14-9:5 -- fail unreasonable hop pointer */
-		return (hop_ptr == hop_cnt + 1);
+		return (hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 
 	} else {
 
@@ -170,16 +170,17 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 		if (hop_cnt && hop_ptr == hop_cnt + 1) {
 			smp->hop_ptr--;
 			return (smp->return_path[smp->hop_ptr] ==
-				port_num);
+				port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-13:2 */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
 			if (node_type != RDMA_NODE_IB_SWITCH)
-				return 0;
+				return IB_SMI_DISCARD;
 
 			/* smp->hop_ptr updated when sending */
-			return (smp->return_path[hop_ptr-1] <= phys_port_cnt);
+			return (smp->return_path[hop_ptr-1] <= phys_port_cnt ?
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 
 		/* C14-13:3 -- We're at the end of the DR segment of path */
@@ -187,23 +188,20 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 			if (smp->dr_slid == IB_LID_PERMISSIVE) {
 				/* giving SMP to SM - update hop_ptr */
 				smp->hop_ptr--;
-				return 1;
+				return IB_SMI_HANDLE;
 			}
 			/* smp->hop_ptr updated when sending */
-			return (node_type == RDMA_NODE_IB_SWITCH);
+			return (node_type == RDMA_NODE_IB_SWITCH ?
+				IB_SMI_HANDLE: IB_SMI_DISCARD);
 		}
 
 		/* C14-13:4 -- hop_ptr = 0 -> give to SM */
 		/* C14-13:5 -- Check for unreasonable hop pointer */
-		return (hop_ptr == 0);
+		return (hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD);
 	}
 }
 
-/*
- * Return 1 if the received DR SMP should be forwarded to the send queue
- * Return 0 if the SMP should be completed up the stack
- */
-int smi_check_forward_dr_smp(struct ib_smp *smp)
+enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
 {
 	u8 hop_ptr, hop_cnt;
 
@@ -213,23 +211,25 @@ int smi_check_forward_dr_smp(struct ib_smp *smp)
 	if (!ib_get_smp_direction(smp)) {
 		/* C14-9:2 -- intermediate hop */
 		if (hop_ptr && hop_ptr < hop_cnt)
-			return 1;
+			return IB_SMI_SEND;
 
 		/* C14-9:3 -- at the end of the DR segment of path */
 		if (hop_ptr == hop_cnt)
-			return (smp->dr_dlid == IB_LID_PERMISSIVE);
+			return (smp->dr_dlid == IB_LID_PERMISSIVE ?
+				IB_SMI_SEND : IB_SMI_LOCAL);
 
 		/* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */
 		if (hop_ptr == hop_cnt + 1)
-			return 1;
+			return IB_SMI_SEND;
 	} else {
-		/* C14-13:2 */
+		/* C14-13:2  -- intermediate hop */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt)
-			return 1;
+			return IB_SMI_SEND;
 
 		/* C14-13:3 -- at the end of the DR segment of path */
 		if (hop_ptr == 1)
-			return (smp->dr_slid != IB_LID_PERMISSIVE);
+			return (smp->dr_slid != IB_LID_PERMISSIVE ?
+				IB_SMI_SEND : IB_SMI_LOCAL);
 	}
-	return 0;
+	return IB_SMI_LOCAL;
 }
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 3011bfd86dc..9a4b349efc3 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -3,7 +3,7 @@
  * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
  * Copyright (c) 2004 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -33,7 +33,6 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
- * $Id: smi.h 1389 2004-12-27 22:56:47Z roland $
  */
 
 #ifndef __SMI_H_
@@ -41,26 +40,33 @@
 
 #include <rdma/ib_smi.h>
 
-int smi_handle_dr_smp_recv(struct ib_smp *smp,
-			   u8 node_type,
-			   int port_num,
-			   int phys_port_cnt);
-extern int smi_check_forward_dr_smp(struct ib_smp *smp);
-extern int smi_handle_dr_smp_send(struct ib_smp *smp,
-				  u8 node_type,
-				  int port_num);
+enum smi_action {
+	IB_SMI_DISCARD,
+	IB_SMI_HANDLE
+};
+
+enum smi_forward_action {
+	IB_SMI_LOCAL,	/* SMP should be completed up the stack */
+	IB_SMI_SEND,	/* received DR SMP should be forwarded to the send queue */
+};
+
+enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
+				       int port_num, int phys_port_cnt);
+extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
+extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
+					      u8 node_type, int port_num);
 
 /*
  * Return 1 if the SMP should be handled by the local SMA/SM via process_mad
  */
-static inline int smi_check_local_smp(struct ib_smp *smp,
-				      struct ib_device *device)
+static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
+						  struct ib_device *device)
 {
 	/* C14-9:3 -- We're at the end of the DR segment of path */
 	/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
 	return ((device->process_mad &&
 		!ib_get_smp_direction(smp) &&
-		(smp->hop_ptr == smp->hop_cnt + 1)));
+		(smp->hop_ptr == smp->hop_cnt + 1)) ?
+		IB_SMI_HANDLE : IB_SMI_DISCARD);
 }
-
 #endif	/* __SMI_H_ */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 000c086bf2e..08c299ebf4a 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -683,6 +683,7 @@ int ib_device_register_sysfs(struct ib_device *device)
 
 	class_dev->class      = &ib_class;
 	class_dev->class_data = device;
+	class_dev->dev	      = device->dma_device;
 	strlcpy(class_dev->class_id, device->name, BUS_ID_SIZE);
 
 	INIT_LIST_HEAD(&device->port_list);
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index ee51d79a7ad..2586a3ee8eb 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -407,29 +407,18 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
 
 	mutex_lock(&file->file_mutex);
 	while (list_empty(&file->events)) {
+		mutex_unlock(&file->file_mutex);
 
-		if (file->filp->f_flags & O_NONBLOCK) {
-			result = -EAGAIN;
-			break;
-		}
+		if (file->filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
 
-		if (signal_pending(current)) {
-			result = -ERESTARTSYS;
-			break;
-		}
+		if (wait_event_interruptible(file->poll_wait,
+					     !list_empty(&file->events)))
+			return -ERESTARTSYS;
 
-		prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
-
-		mutex_unlock(&file->file_mutex);
-		schedule();
 		mutex_lock(&file->file_mutex);
-
-		finish_wait(&file->poll_wait, &wait);
 	}
 
-	if (result)
-		goto done;
-
 	uevent = list_entry(file->events.next, struct ib_ucm_event, file_list);
 
 	if (ib_ucm_new_cm_id(uevent->resp.event)) {
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index c859134c1da..53b4c94a7eb 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -306,26 +306,18 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
 
 	mutex_lock(&file->mut);
 	while (list_empty(&file->event_list)) {
-		if (file->filp->f_flags & O_NONBLOCK) {
-			ret = -EAGAIN;
-			break;
-		}
+		mutex_unlock(&file->mut);
 
-		if (signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
+		if (file->filp->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		if (wait_event_interruptible(file->poll_wait,
+					     !list_empty(&file->event_list)))
+			return -ERESTARTSYS;
 
-		prepare_to_wait(&file->poll_wait, &wait, TASK_INTERRUPTIBLE);
-		mutex_unlock(&file->mut);
-		schedule();
 		mutex_lock(&file->mut);
-		finish_wait(&file->poll_wait, &wait);
 	}
 
-	if (ret)
-		goto done;
-
 	uevent = list_entry(file->event_list.next, struct ucma_event, list);
 
 	if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c069ebeba8e..8199b83052a 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -135,7 +135,7 @@ static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
 
 static DEFINE_SPINLOCK(port_lock);
 static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
-static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2);
+static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
 
 static void ib_umad_add_one(struct ib_device *device);
 static void ib_umad_remove_one(struct ib_device *device);
@@ -231,12 +231,17 @@ static void recv_handler(struct ib_mad_agent *agent,
 	packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
 	packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
 	if (packet->mad.hdr.grh_present) {
-		/* XXX parse GRH */
-		packet->mad.hdr.gid_index 	= 0;
-		packet->mad.hdr.hop_limit 	= 0;
-		packet->mad.hdr.traffic_class	= 0;
-		memset(packet->mad.hdr.gid, 0, 16);
-		packet->mad.hdr.flow_label	= 0;
+		struct ib_ah_attr ah_attr;
+
+		ib_init_ah_from_wc(agent->device, agent->port_num,
+				   mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
+				   &ah_attr);
+
+		packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
+		packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
+		packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
+		memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
+		packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
 	}
 
 	if (queue_packet(file, agent, packet))
@@ -473,6 +478,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 	if (packet->mad.hdr.grh_present) {
 		ah_attr.ah_flags = IB_AH_GRH;
 		memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
+		ah_attr.grh.sgid_index	   = packet->mad.hdr.gid_index;
 		ah_attr.grh.flow_label 	   = be32_to_cpu(packet->mad.hdr.flow_label);
 		ah_attr.grh.hop_limit  	   = packet->mad.hdr.hop_limit;
 		ah_attr.grh.traffic_class  = packet->mad.hdr.traffic_class;
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 59243d9aedd..58bc272bd40 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -439,7 +439,8 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
 	}
 
 	/* Setup the skb for reuse since we're dropping this pkt */
-	elem->skb->tail = elem->skb->data = elem->skb->head;
+	elem->skb->data = elem->skb->head;
+	skb_reset_tail_pointer(elem->skb);
 
 	/* Zero out the rxp hdr in the sk_buff */
 	memset(elem->skb->data, 0, sizeof(*rxp_hdr));
@@ -521,9 +522,8 @@ static void c2_rx_interrupt(struct net_device *netdev)
 		 * "sizeof(struct c2_rxp_hdr)".
 		 */
 		skb->data += sizeof(*rxp_hdr);
-		skb->tail = skb->data + buflen;
+		skb_set_tail_pointer(skb, buflen);
 		skb->len = buflen;
-		skb->dev = netdev;
 		skb->protocol = eth_type_trans(skb, netdev);
 
 		netif_rx(skb);
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index fef97275291..607c09bf764 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -796,7 +796,6 @@ int c2_register_device(struct c2_dev *dev)
 	memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
 	dev->ibdev.phys_port_cnt = 1;
 	dev->ibdev.dma_device = &dev->pcidev->dev;
-	dev->ibdev.class_dev.dev = &dev->pcidev->dev;
 	dev->ibdev.query_device = c2_query_device;
 	dev->ibdev.query_port = c2_query_port;
 	dev->ibdev.modify_port = c2_modify_port;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 2d2de9b8b72..3b4b0acd707 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -477,7 +477,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 	BUG_ON(skb_cloned(skb));
 
 	mpalen = sizeof(*mpa) + ep->plen;
-	if (skb->data + mpalen + sizeof(*req) > skb->end) {
+	if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
 		kfree_skb(skb);
 		skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
 		if (!skb) {
@@ -507,7 +507,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 	 */
 	skb_get(skb);
 	set_arp_failure_handler(skb, arp_failure_discard);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	len = skb->len;
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
@@ -559,7 +559,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 	skb_get(skb);
 	skb->priority = CPL_PRIORITY_DATA;
 	set_arp_failure_handler(skb, arp_failure_discard);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
 	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
@@ -610,7 +610,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 	 */
 	skb_get(skb);
 	set_arp_failure_handler(skb, arp_failure_discard);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	len = skb->len;
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
@@ -821,7 +821,8 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
 	/*
 	 * copy the new data into our accumulation buffer.
 	 */
-	memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+				  skb->len);
 	ep->mpa_pkt_len += skb->len;
 
 	/*
@@ -940,7 +941,8 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
 	/*
 	 * Copy the new data into our accumulation buffer.
 	 */
-	memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+				  skb->len);
 	ep->mpa_pkt_len += skb->len;
 
 	/*
@@ -1619,7 +1621,8 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	PDBG("%s ep %p\n", __FUNCTION__, ep);
 	skb_pull(skb, sizeof(struct cpl_rdma_terminate));
 	PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len);
-	memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
+				  skb->len);
 	ep->com.qp->attr.terminate_msg_len = skb->len;
 	ep->com.qp->attr.is_terminate_local = 0;
 	return CPL_RET_BUF_DONE;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 24e0df04f7d..af28a317016 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1108,7 +1108,6 @@ int iwch_register_device(struct iwch_dev *dev)
 	memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
 	dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
 	dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
-	dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev);
 	dev->ibdev.query_device = iwch_query_device;
 	dev->ibdev.query_port = iwch_query_port;
 	dev->ibdev.modify_port = iwch_modify_port;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 82ded44c6ce..10fb8fbafa0 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -106,6 +106,7 @@ struct ehca_shca {
 	struct ehca_mr *maxmr;
 	struct ehca_pd *pd;
 	struct h_galpas galpas;
+	struct mutex modify_mutex;
 };
 
 struct ehca_pd {
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 30eb45df9f0..32b55a4f0e5 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -147,6 +147,7 @@ int ehca_query_port(struct ib_device *ibdev,
 		break;
 	}
 
+	props->port_cap_flags  = rblock->capability_mask;
 	props->gid_tbl_len     = rblock->gid_tbl_len;
 	props->max_msg_sz      = rblock->max_msg_sz;
 	props->bad_pkey_cntr   = rblock->bad_pkey_cntr;
@@ -236,10 +237,60 @@ query_gid1:
 	return ret;
 }
 
+const u32 allowed_port_caps = (
+	IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP |
+	IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP |
+	IB_PORT_VENDOR_CLASS_SUP);
+
 int ehca_modify_port(struct ib_device *ibdev,
 		     u8 port, int port_modify_mask,
 		     struct ib_port_modify *props)
 {
-	/* Not implemented yet */
-	return -EFAULT;
+	int ret = 0;
+	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
+	struct hipz_query_port *rblock;
+	u32 cap;
+	u64 hret;
+
+	if ((props->set_port_cap_mask | props->clr_port_cap_mask)
+	    & ~allowed_port_caps) {
+		ehca_err(&shca->ib_device, "Non-changeable bits set in masks  "
+			 "set=%x  clr=%x  allowed=%x", props->set_port_cap_mask,
+			 props->clr_port_cap_mask, allowed_port_caps);
+		return -EINVAL;
+	}
+
+	if (mutex_lock_interruptible(&shca->modify_mutex))
+                return -ERESTARTSYS;
+
+	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+	if (!rblock) {
+		ehca_err(&shca->ib_device,  "Can't allocate rblock memory.");
+		ret = -ENOMEM;
+		goto modify_port1;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto modify_port2;
+	}
+
+	cap = (rblock->capability_mask | props->set_port_cap_mask)
+		& ~props->clr_port_cap_mask;
+
+	hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
+				  cap, props->init_type, port_modify_mask);
+	if (hret != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Modify port failed  hret=%lx", hret);
+		ret = -EINVAL;
+	}
+
+modify_port2:
+	ehca_free_fw_ctrlblock(rblock);
+
+modify_port1:
+        mutex_unlock(&shca->modify_mutex);
+
+	return ret;
 }
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 059da9628bb..3b23d677cb8 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -587,6 +587,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
 		ehca_gen_err("Cannot allocate shca memory.");
 		return -ENOMEM;
 	}
+	mutex_init(&shca->modify_mutex);
 
 	shca->ibmebus_dev = dev;
 	shca->ipz_hca_handle.handle = *handle;
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 3fb46e67df8..b564fcd3b28 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -70,6 +70,10 @@
 #define H_ALL_RES_QP_SQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(0, 31)
 #define H_ALL_RES_QP_RQUEUE_SIZE_PAGES  EHCA_BMASK_IBM(32, 63)
 
+#define H_MP_INIT_TYPE                  EHCA_BMASK_IBM(44, 47)
+#define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
+#define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
+
 /* direct access qp controls */
 #define DAQP_CTRL_ENABLE    0x01
 #define DAQP_CTRL_SEND_COMP 0x20
@@ -364,6 +368,26 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
 	return ret;
 }
 
+u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
+		       const u8 port_id, const u32 port_cap,
+		       const u8 init_type, const int modify_mask)
+{
+	u64 port_attributes = port_cap;
+
+	if (modify_mask & IB_PORT_SHUTDOWN)
+		port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1);
+	if (modify_mask & IB_PORT_INIT_TYPE)
+		port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type);
+	if (modify_mask & IB_PORT_RESET_QKEY_CNTR)
+		port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1);
+
+	return ehca_plpar_hcall_norets(H_MODIFY_PORT,
+				       adapter_handle.handle, /* r4 */
+				       port_id,               /* r5 */
+				       port_attributes,       /* r6 */
+				       0, 0, 0, 0);
+}
+
 u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
 		     struct hipz_query_hca *query_hca_rblock)
 {
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
index 587ebd47095..2869f7dd619 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.h
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -85,6 +85,10 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
 		      const u8 port_id,
 		      struct hipz_query_port *query_port_response_block);
 
+u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
+		       const u8 port_id, const u32 port_cap,
+		       const u8 init_type, const int modify_mask);
+
 u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
 		     struct hipz_query_hca *query_hca_rblock);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 54139d39818..10c008f22ba 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -78,6 +78,8 @@
 #define IPATH_IB_LINKINIT		3
 #define IPATH_IB_LINKDOWN_SLEEP		4
 #define IPATH_IB_LINKDOWN_DISABLE	5
+#define IPATH_IB_LINK_LOOPBACK	6 /* enable local loopback */
+#define IPATH_IB_LINK_EXTERNAL	7 /* normal, disable local loopback */
 
 /*
  * stats maintained by the driver.  For now, at least, this is global
@@ -316,11 +318,17 @@ struct ipath_base_info {
 	/* address of readonly memory copy of the rcvhdrq tail register. */
 	__u64 spi_rcvhdr_tailaddr;
 
-	/* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */
+	/* shared memory pages for subports if port is shared */
 	__u64 spi_subport_uregbase;
 	__u64 spi_subport_rcvegrbuf;
 	__u64 spi_subport_rcvhdr_base;
 
+	/* shared memory page for hardware port if it is shared */
+	__u64 spi_port_uregbase;
+	__u64 spi_port_rcvegrbuf;
+	__u64 spi_port_rcvhdr_base;
+	__u64 spi_port_rcvhdr_tailaddr;
+
 } __attribute__ ((aligned(8)));
 
 
@@ -344,7 +352,7 @@ struct ipath_base_info {
  * may not be implemented; the user code must deal with this if it
  * cares, or it must abort after initialization reports the difference.
  */
-#define IPATH_USER_SWMINOR 3
+#define IPATH_USER_SWMINOR 5
 
 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
 
@@ -418,11 +426,14 @@ struct ipath_user_info {
 #define IPATH_CMD_TID_UPDATE	19	/* update expected TID entries */
 #define IPATH_CMD_TID_FREE	20	/* free expected TID entries */
 #define IPATH_CMD_SET_PART_KEY	21	/* add partition key */
-#define IPATH_CMD_SLAVE_INFO	22	/* return info on slave processes */
+#define __IPATH_CMD_SLAVE_INFO	22	/* return info on slave processes (for old user code) */
 #define IPATH_CMD_ASSIGN_PORT	23	/* allocate HCA and port */
 #define IPATH_CMD_USER_INIT 	24	/* set up userspace */
+#define IPATH_CMD_UNUSED_1	25
+#define IPATH_CMD_UNUSED_2	26
+#define IPATH_CMD_PIOAVAILUPD	27	/* force an update of PIOAvail reg */
 
-#define IPATH_CMD_MAX		24
+#define IPATH_CMD_MAX		27
 
 struct ipath_port_info {
 	__u32 num_active;	/* number of active units */
@@ -430,7 +441,7 @@ struct ipath_port_info {
 	__u16 port;		/* port on unit assigned to caller */
 	__u16 subport;		/* subport on unit assigned to caller */
 	__u16 num_ports;	/* number of ports available on unit */
-	__u16 num_subports;	/* number of subport slaves opened on port */
+	__u16 num_subports;	/* number of subports opened on port */
 };
 
 struct ipath_tid_info {
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 87462e0cb4d..ea78e6dddc9 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -76,7 +76,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 		}
 		return;
 	}
-	wc->queue[head] = *entry;
+	wc->queue[head].wr_id = entry->wr_id;
+	wc->queue[head].status = entry->status;
+	wc->queue[head].opcode = entry->opcode;
+	wc->queue[head].vendor_err = entry->vendor_err;
+	wc->queue[head].byte_len = entry->byte_len;
+	wc->queue[head].imm_data = (__u32 __force)entry->imm_data;
+	wc->queue[head].qp_num = entry->qp->qp_num;
+	wc->queue[head].src_qp = entry->src_qp;
+	wc->queue[head].wc_flags = entry->wc_flags;
+	wc->queue[head].pkey_index = entry->pkey_index;
+	wc->queue[head].slid = entry->slid;
+	wc->queue[head].sl = entry->sl;
+	wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
+	wc->queue[head].port_num = entry->port_num;
 	wc->head = next;
 
 	if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -122,9 +135,30 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 	if (tail > (u32) cq->ibcq.cqe)
 		tail = (u32) cq->ibcq.cqe;
 	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
+		struct ipath_qp *qp;
+
 		if (tail == wc->head)
 			break;
-		*entry = wc->queue[tail];
+
+		qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
+				      wc->queue[tail].qp_num);
+		entry->qp = &qp->ibqp;
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+
+		entry->wr_id = wc->queue[tail].wr_id;
+		entry->status = wc->queue[tail].status;
+		entry->opcode = wc->queue[tail].opcode;
+		entry->vendor_err = wc->queue[tail].vendor_err;
+		entry->byte_len = wc->queue[tail].byte_len;
+		entry->imm_data = wc->queue[tail].imm_data;
+		entry->src_qp = wc->queue[tail].src_qp;
+		entry->wc_flags = wc->queue[tail].wc_flags;
+		entry->pkey_index = wc->queue[tail].pkey_index;
+		entry->slid = wc->queue[tail].slid;
+		entry->sl = wc->queue[tail].sl;
+		entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
+		entry->port_num = wc->queue[tail].port_num;
 		if (tail >= cq->ibcq.cqe)
 			tail = 0;
 		else
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index df69f0d80b8..42bfbdb0d3e 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -57,6 +57,7 @@
 #define __IPATH_PROCDBG     0x100
 /* print mmap/nopage stuff, not using VDBG any more */
 #define __IPATH_MMDBG       0x200
+#define __IPATH_ERRPKTDBG   0x400
 #define __IPATH_USER_SEND   0x1000	/* use user mode send */
 #define __IPATH_KERNEL_SEND 0x2000	/* use kernel mode send */
 #define __IPATH_EPKTDBG     0x4000	/* print ethernet packet data */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 0f13a2182cc..63e8368b0e9 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -296,7 +296,7 @@ static int ipath_diag_open(struct inode *in, struct file *fp)
 	}
 
 	fp->private_data = dd;
-	ipath_diag_inuse = 1;
+	ipath_diag_inuse = -2;
 	diag_set_link = 0;
 	ret = 0;
 
@@ -461,6 +461,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
 	else if ((count % 4) || (*off % 4))
 		/* address or length is not 32-bit aligned, hence invalid */
 		ret = -EINVAL;
+	else if (ipath_diag_inuse < 1 && (*off || count != 8))
+		ret = -EINVAL;  /* prevent cat /dev/ipath_diag* */
 	else if ((count % 8) || (*off % 8))
 		/* address or length not 64-bit aligned; do 32-bit reads */
 		ret = ipath_read_umem32(dd, data, kreg_base + *off, count);
@@ -470,6 +472,8 @@ static ssize_t ipath_diag_read(struct file *fp, char __user *data,
 	if (ret >= 0) {
 		*off += count;
 		ret = count;
+		if (ipath_diag_inuse == -2)
+			ipath_diag_inuse++;
 	}
 
 	return ret;
@@ -489,6 +493,9 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
 	else if ((count % 4) || (*off % 4))
 		/* address or length is not 32-bit aligned, hence invalid */
 		ret = -EINVAL;
+	else if ((ipath_diag_inuse == -1 && (*off || count != 8)) ||
+		 ipath_diag_inuse == -2)  /* read qw off 0, write qw off 0 */
+		ret = -EINVAL;  /* before any other write allowed */
 	else if ((count % 8) || (*off % 8))
 		/* address or length not 64-bit aligned; do 32-bit writes */
 		ret = ipath_write_umem32(dd, kreg_base + *off, data, count);
@@ -498,6 +505,8 @@ static ssize_t ipath_diag_write(struct file *fp, const char __user *data,
 	if (ret >= 0) {
 		*off += count;
 		ret = count;
+		if (ipath_diag_inuse == -1)
+			ipath_diag_inuse = 1; /* all read/write OK now */
 	}
 
 	return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ae7f21a0cdc..e3a22320971 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -390,15 +390,23 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 
 	/* setup the chip-specific functions, as early as possible. */
 	switch (ent->device) {
-#ifdef CONFIG_HT_IRQ
 	case PCI_DEVICE_ID_INFINIPATH_HT:
+#ifdef CONFIG_HT_IRQ
 		ipath_init_iba6110_funcs(dd);
 		break;
+#else
+		ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
+			      "CONFIG_HT_IRQ is not enabled\n", ent->device);
+		return -ENODEV;
 #endif
-#ifdef CONFIG_PCI_MSI
 	case PCI_DEVICE_ID_INFINIPATH_PE800:
+#ifdef CONFIG_PCI_MSI
 		ipath_init_iba6120_funcs(dd);
 		break;
+#else
+		ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
+			      "CONFIG_PCI_MSI is not enabled\n", ent->device);
+		return -ENODEV;
 #endif
 	default:
 		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -486,7 +494,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 
 	ret = ipath_init_chip(dd, 0);	/* do the chip-specific init */
 	if (ret)
-		goto bail_iounmap;
+		goto bail_irqsetup;
 
 	ret = ipath_enable_wc(dd);
 
@@ -505,6 +513,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 
 	goto bail;
 
+bail_irqsetup:
+	if (pdev->irq) free_irq(pdev->irq, dd);
+
 bail_iounmap:
 	iounmap((volatile void __iomem *) dd->ipath_kregbase);
 
@@ -525,8 +536,6 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
 {
 	int port;
 
-	ipath_shutdown_device(dd);
-
 	if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
 		/* can't do anything more with chip; needs re-init */
 		*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
@@ -594,8 +603,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
 
 		ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
 			   dd->ipath_pageshadow);
-		vfree(dd->ipath_pageshadow);
+		tmpp = dd->ipath_pageshadow;
 		dd->ipath_pageshadow = NULL;
+		vfree(tmpp);
 	}
 
 	/*
@@ -622,6 +632,12 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
 
 	ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
 
+	/*
+	 * disable the IB link early, to be sure no new packets arrive, which
+	 * complicates the shutdown process
+	 */
+	ipath_shutdown_device(dd);
+
 	if (dd->verbs_dev)
 		ipath_unregister_ib_device(dd->verbs_dev);
 
@@ -754,9 +770,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
 	return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
 }
 
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+/*
+ * Decode the error status into strings, deciding whether to always
+ * print * it or not depending on "normal packet errors" vs everything
+ * else.   Return 1 if "real" errors, otherwise 0 if only packet
+ * errors, so caller can decide what to print with the string.
+ */
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 {
+	int iserr = 1;
 	*buf = '\0';
+	if (err & INFINIPATH_E_PKTERRS) {
+		if (!(err & ~INFINIPATH_E_PKTERRS))
+			iserr = 0; // if only packet errors.
+		if (ipath_debug & __IPATH_ERRPKTDBG) {
+			if (err & INFINIPATH_E_REBP)
+				strlcat(buf, "EBP ", blen);
+			if (err & INFINIPATH_E_RVCRC)
+				strlcat(buf, "VCRC ", blen);
+			if (err & INFINIPATH_E_RICRC) {
+				strlcat(buf, "CRC ", blen);
+				// clear for check below, so only once
+				err &= INFINIPATH_E_RICRC;
+			}
+			if (err & INFINIPATH_E_RSHORTPKTLEN)
+				strlcat(buf, "rshortpktlen ", blen);
+			if (err & INFINIPATH_E_SDROPPEDDATAPKT)
+				strlcat(buf, "sdroppeddatapkt ", blen);
+			if (err & INFINIPATH_E_SPKTLEN)
+				strlcat(buf, "spktlen ", blen);
+		}
+		if ((err & INFINIPATH_E_RICRC) &&
+			!(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
+			strlcat(buf, "CRC ", blen);
+		if (!iserr)
+			goto done;
+	}
 	if (err & INFINIPATH_E_RHDRLEN)
 		strlcat(buf, "rhdrlen ", blen);
 	if (err & INFINIPATH_E_RBADTID)
@@ -767,12 +816,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 		strlcat(buf, "rhdr ", blen);
 	if (err & INFINIPATH_E_RLONGPKTLEN)
 		strlcat(buf, "rlongpktlen ", blen);
-	if (err & INFINIPATH_E_RSHORTPKTLEN)
-		strlcat(buf, "rshortpktlen ", blen);
 	if (err & INFINIPATH_E_RMAXPKTLEN)
 		strlcat(buf, "rmaxpktlen ", blen);
 	if (err & INFINIPATH_E_RMINPKTLEN)
 		strlcat(buf, "rminpktlen ", blen);
+	if (err & INFINIPATH_E_SMINPKTLEN)
+		strlcat(buf, "sminpktlen ", blen);
 	if (err & INFINIPATH_E_RFORMATERR)
 		strlcat(buf, "rformaterr ", blen);
 	if (err & INFINIPATH_E_RUNSUPVL)
@@ -781,32 +830,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 		strlcat(buf, "runexpchar ", blen);
 	if (err & INFINIPATH_E_RIBFLOW)
 		strlcat(buf, "ribflow ", blen);
-	if (err & INFINIPATH_E_REBP)
-		strlcat(buf, "EBP ", blen);
 	if (err & INFINIPATH_E_SUNDERRUN)
 		strlcat(buf, "sunderrun ", blen);
 	if (err & INFINIPATH_E_SPIOARMLAUNCH)
 		strlcat(buf, "spioarmlaunch ", blen);
 	if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
 		strlcat(buf, "sunexperrpktnum ", blen);
-	if (err & INFINIPATH_E_SDROPPEDDATAPKT)
-		strlcat(buf, "sdroppeddatapkt ", blen);
 	if (err & INFINIPATH_E_SDROPPEDSMPPKT)
 		strlcat(buf, "sdroppedsmppkt ", blen);
 	if (err & INFINIPATH_E_SMAXPKTLEN)
 		strlcat(buf, "smaxpktlen ", blen);
-	if (err & INFINIPATH_E_SMINPKTLEN)
-		strlcat(buf, "sminpktlen ", blen);
 	if (err & INFINIPATH_E_SUNSUPVL)
 		strlcat(buf, "sunsupVL ", blen);
-	if (err & INFINIPATH_E_SPKTLEN)
-		strlcat(buf, "spktlen ", blen);
 	if (err & INFINIPATH_E_INVALIDADDR)
 		strlcat(buf, "invalidaddr ", blen);
-	if (err & INFINIPATH_E_RICRC)
-		strlcat(buf, "CRC ", blen);
-	if (err & INFINIPATH_E_RVCRC)
-		strlcat(buf, "VCRC ", blen);
 	if (err & INFINIPATH_E_RRCVEGRFULL)
 		strlcat(buf, "rcvegrfull ", blen);
 	if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -819,6 +856,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 		strlcat(buf, "hardware ", blen);
 	if (err & INFINIPATH_E_RESET)
 		strlcat(buf, "reset ", blen);
+done:
+	return iserr;
 }
 
 /**
@@ -1662,6 +1701,22 @@ int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
 		lstate = IPATH_LINKACTIVE;
 		break;
 
+	case IPATH_IB_LINK_LOOPBACK:
+		dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
+		dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+		ret = 0;
+		goto bail; // no state change to wait for
+
+	case IPATH_IB_LINK_EXTERNAL:
+		dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
+		dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+				 dd->ipath_ibcctrl);
+		ret = 0;
+		goto bail; // no state change to wait for
+
 	default:
 		ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
 		ret = -EINVAL;
@@ -1765,29 +1820,6 @@ int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
 	return 0;
 }
 
-/**
- * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
- * @dd: the infinipath device
- * @regno: the register number to read
- * @port: the port containing the register
- *
- * Registers that vary with the chip implementation constants (port)
- * use this routine.
- */
-u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
-			   unsigned port)
-{
-	u16 where;
-
-	if (port < dd->ipath_portcnt &&
-	    (regno == dd->ipath_kregs->kr_rcvhdraddr ||
-	     regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
-		where = regno + port;
-	else
-		where = -1;
-
-	return ipath_read_kreg64(dd, where);
-}
 
 /**
  * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
@@ -1973,7 +2005,8 @@ static int __init infinipath_init(void)
 {
 	int ret;
 
-	ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
+	if (ipath_debug & __IPATH_DBG)
+		printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
 
 	/*
 	 * These must be called before the driver is registered with
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index a4019a6b756..030185f90ee 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
 	} else
 		memcpy(dd->ipath_serial, ifp->if_serial,
 		       sizeof ifp->if_serial);
+	if (!strstr(ifp->if_comment, "Tested successfully"))
+		ipath_dev_err(dd, "Board SN %s did not pass functional "
+			"test: %s\n", dd->ipath_serial,
+			ifp->if_comment);
 
 	ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
 		   (unsigned long long) be64_to_cpu(dd->ipath_guid));
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 5d64ff87529..1272aaf2a78 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -41,12 +41,6 @@
 #include "ipath_kernel.h"
 #include "ipath_common.h"
 
-/*
- * mmap64 doesn't allow all 64 bits for 32-bit applications
- * so only use the low 43 bits.
- */
-#define MMAP64_MASK	0x7FFFFFFFFFFUL
-
 static int ipath_open(struct inode *, struct file *);
 static int ipath_close(struct inode *, struct file *);
 static ssize_t ipath_write(struct file *, const char __user *, size_t,
@@ -63,6 +57,24 @@ static const struct file_operations ipath_file_ops = {
 	.mmap = ipath_mmap
 };
 
+/*
+ * Convert kernel virtual addresses to physical addresses so they don't
+ * potentially conflict with the chip addresses used as mmap offsets.
+ * It doesn't really matter what mmap offset we use as long as we can
+ * interpret it correctly.
+ */
+static u64 cvt_kvaddr(void *p)
+{
+	struct page *page;
+	u64 paddr = 0;
+
+	page = vmalloc_to_page(p);
+	if (page)
+		paddr = page_to_pfn(page) << PAGE_SHIFT;
+
+	return paddr;
+}
+
 static int ipath_get_base_info(struct file *fp,
 			       void __user *ubase, size_t ubase_size)
 {
@@ -87,7 +99,7 @@ static int ipath_get_base_info(struct file *fp,
 	sz = sizeof(*kinfo);
 	/* If port sharing is not requested, allow the old size structure */
 	if (!shared)
-		sz -= 3 * sizeof(u64);
+		sz -= 7 * sizeof(u64);
 	if (ubase_size < sz) {
 		ipath_cdbg(PROC,
 			   "Base size %zu, need %zu (version mismatch?)\n",
@@ -165,24 +177,41 @@ static int ipath_get_base_info(struct file *fp,
 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
 			dd->ipath_palign *
 			(dd->ipath_pbufsport - kinfo->spi_piocnt);
-		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
-			dd->ipath_palign * pd->port_port;
 	} else {
 		unsigned slave = subport_fp(fp) - 1;
 
 		kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
 			dd->ipath_palign * kinfo->spi_piocnt * slave;
-		kinfo->__spi_uregbase = ((u64) pd->subport_uregbase +
-			PAGE_SIZE * slave) & MMAP64_MASK;
+	}
+	if (shared) {
+		kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
+			dd->ipath_palign * pd->port_port;
+		kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs;
+		kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base;
+		kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr;
 
-		kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base +
-			pd->port_rcvhdrq_size * slave) & MMAP64_MASK;
-		kinfo->spi_rcvhdr_tailaddr =
-			(u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK;
-		kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf +
-			dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) &
-			MMAP64_MASK;
+		kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase +
+			PAGE_SIZE * subport_fp(fp));
+
+		kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base +
+			pd->port_rcvhdrq_size * subport_fp(fp));
+		kinfo->spi_rcvhdr_tailaddr = 0;
+		kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf +
+			pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size *
+			subport_fp(fp));
+
+		kinfo->spi_subport_uregbase =
+			cvt_kvaddr(pd->subport_uregbase);
+		kinfo->spi_subport_rcvegrbuf =
+			cvt_kvaddr(pd->subport_rcvegrbuf);
+		kinfo->spi_subport_rcvhdr_base =
+			cvt_kvaddr(pd->subport_rcvhdr_base);
+		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+			kinfo->spi_port, kinfo->spi_runtime_flags,
+			(unsigned long long) kinfo->spi_subport_uregbase,
+			(unsigned long long) kinfo->spi_subport_rcvegrbuf,
+			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
 	}
 
 	kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
@@ -199,20 +228,10 @@ static int ipath_get_base_info(struct file *fp,
 
 	if (master) {
 		kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
-		kinfo->spi_subport_uregbase =
-			(u64) pd->subport_uregbase & MMAP64_MASK;
-		kinfo->spi_subport_rcvegrbuf =
-			(u64) pd->subport_rcvegrbuf & MMAP64_MASK;
-		kinfo->spi_subport_rcvhdr_base =
-			(u64) pd->subport_rcvhdr_base & MMAP64_MASK;
-		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
-			kinfo->spi_port, kinfo->spi_runtime_flags,
-			(unsigned long long) kinfo->spi_subport_uregbase,
-			(unsigned long long) kinfo->spi_subport_rcvegrbuf,
-			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
 	}
 
-	if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
+	sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo);
+	if (copy_to_user(ubase, kinfo, sz))
 		ret = -EFAULT;
 
 bail:
@@ -1132,67 +1151,55 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 	struct ipath_devdata *dd;
 	void *addr;
 	size_t size;
-	int ret;
+	int ret = 0;
 
 	/* If the port is not shared, all addresses should be physical */
-	if (!pd->port_subport_cnt) {
-		ret = -EINVAL;
+	if (!pd->port_subport_cnt)
 		goto bail;
-	}
 
 	dd = pd->port_dd;
 	size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
 
 	/*
-	 * Master has all the slave uregbase, rcvhdrq, and
-	 * rcvegrbufs mmapped.
+	 * Each process has all the subport uregbase, rcvhdrq, and
+	 * rcvegrbufs mmapped - as an array for all the processes,
+	 * and also separately for this process.
 	 */
-	if (subport == 0) {
-		unsigned num_slaves = pd->port_subport_cnt - 1;
-
-		if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) {
-			addr = pd->subport_uregbase;
-			size = PAGE_SIZE * num_slaves;
-		} else if (pgaddr == ((u64) pd->subport_rcvhdr_base &
-				      MMAP64_MASK)) {
-			addr = pd->subport_rcvhdr_base;
-			size = pd->port_rcvhdrq_size * num_slaves;
-		} else if (pgaddr == ((u64) pd->subport_rcvegrbuf &
-				      MMAP64_MASK)) {
-			addr = pd->subport_rcvegrbuf;
-			size *= num_slaves;
-		} else {
-			ret = -EINVAL;
-			goto bail;
-		}
-	} else if (pgaddr == (((u64) pd->subport_uregbase +
-			       PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) {
-		addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1);
-		size = PAGE_SIZE;
-	} else if (pgaddr == (((u64) pd->subport_rcvhdr_base +
-			       pd->port_rcvhdrq_size * (subport - 1)) &
-			      MMAP64_MASK)) {
-		addr = pd->subport_rcvhdr_base +
-			pd->port_rcvhdrq_size * (subport - 1);
-		size = pd->port_rcvhdrq_size;
-	} else if (pgaddr == (((u64) pd->subport_rcvegrbuf +
-			       size * (subport - 1)) & MMAP64_MASK)) {
-		addr = pd->subport_rcvegrbuf + size * (subport - 1);
-		/* rcvegrbufs are read-only on the slave */
-		if (vma->vm_flags & VM_WRITE) {
-			dev_info(&dd->pcidev->dev,
-				 "Can't map eager buffers as "
-				 "writable (flags=%lx)\n", vma->vm_flags);
-			ret = -EPERM;
-			goto bail;
-		}
-		/*
-		 * Don't allow permission to later change to writeable
-		 * with mprotect.
-		 */
-		vma->vm_flags &= ~VM_MAYWRITE;
+	if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) {
+		addr = pd->subport_uregbase;
+		size = PAGE_SIZE * pd->port_subport_cnt;
+	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) {
+		addr = pd->subport_rcvhdr_base;
+		size = pd->port_rcvhdrq_size * pd->port_subport_cnt;
+	} else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) {
+		addr = pd->subport_rcvegrbuf;
+		size *= pd->port_subport_cnt;
+        } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase +
+                                        PAGE_SIZE * subport)) {
+                addr = pd->subport_uregbase + PAGE_SIZE * subport;
+                size = PAGE_SIZE;
+        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base +
+                                pd->port_rcvhdrq_size * subport)) {
+                addr = pd->subport_rcvhdr_base +
+                        pd->port_rcvhdrq_size * subport;
+                size = pd->port_rcvhdrq_size;
+        } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf +
+                               size * subport)) {
+                addr = pd->subport_rcvegrbuf + size * subport;
+                /* rcvegrbufs are read-only on the slave */
+                if (vma->vm_flags & VM_WRITE) {
+                        dev_info(&dd->pcidev->dev,
+                                 "Can't map eager buffers as "
+                                 "writable (flags=%lx)\n", vma->vm_flags);
+                        ret = -EPERM;
+                        goto bail;
+                }
+                /*
+                 * Don't allow permission to later change to writeable
+                 * with mprotect.
+                 */
+                vma->vm_flags &= ~VM_MAYWRITE;
 	} else {
-		ret = -EINVAL;
 		goto bail;
 	}
 	len = vma->vm_end - vma->vm_start;
@@ -1205,7 +1212,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
 	vma->vm_ops = &ipath_file_vm_ops;
 	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
-	ret = 0;
+	ret = 1;
 
 bail:
 	return ret;
@@ -1265,19 +1272,20 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 	 * Check for kernel virtual addresses first, anything else must
 	 * match a HW or memory address.
 	 */
-	if (pgaddr >= (1ULL<<40)) {
-		ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+	ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+	if (ret) {
+		if (ret > 0)
+			ret = 0;
 		goto bail;
 	}
 
+	ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
 	if (!pd->port_subport_cnt) {
 		/* port is not shared */
-		ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
 		piocnt = dd->ipath_pbufsport;
 		piobufs = pd->port_piobufs;
 	} else if (!subport_fp(fp)) {
 		/* caller is the master */
-		ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
 		piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
 			 (dd->ipath_pbufsport % pd->port_subport_cnt);
 		piobufs = pd->port_piobufs +
@@ -1286,7 +1294,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 		unsigned slave = subport_fp(fp) - 1;
 
 		/* caller is a slave */
-		ureg = 0;
 		piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
 		piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
 	}
@@ -1300,9 +1307,6 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
 			      	     (void *) dd->ipath_pioavailregs_dma,
 				     "pioavail registers");
-	else if (subport_fp(fp))
-		/* Subports don't mmap the physical receive buffers */
-		ret = -EINVAL;
 	else if (pgaddr == pd->port_rcvegr_phys)
 		ret = mmap_rcvegrbufs(vma, pd);
 	else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
@@ -1400,32 +1404,41 @@ static int init_subports(struct ipath_devdata *dd,
 			 const struct ipath_user_info *uinfo)
 {
 	int ret = 0;
-	unsigned num_slaves;
+	unsigned num_subports;
 	size_t size;
 
-	/* Old user binaries don't know about subports */
-	if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
-		goto bail;
 	/*
 	 * If the user is requesting zero or one port,
 	 * skip the subport allocation.
 	 */
 	if (uinfo->spu_subport_cnt <= 1)
 		goto bail;
-	if (uinfo->spu_subport_cnt > 4) {
+
+	/* Old user binaries don't know about new subport implementation */
+	if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
+		dev_info(&dd->pcidev->dev,
+			 "Mismatched user minor version (%d) and driver "
+                         "minor version (%d) while port sharing. Ensure "
+                         "that driver and library are from the same "
+                         "release.\n",
+                         (int) (uinfo->spu_userversion & 0xffff),
+	                 IPATH_USER_SWMINOR);
+		goto bail;
+	}
+	if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) {
 		ret = -EINVAL;
 		goto bail;
 	}
 
-	num_slaves = uinfo->spu_subport_cnt - 1;
-	pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves);
+	num_subports = uinfo->spu_subport_cnt;
+	pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports);
 	if (!pd->subport_uregbase) {
 		ret = -ENOMEM;
 		goto bail;
 	}
 	/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
 	size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
-		     sizeof(u32), PAGE_SIZE) * num_slaves;
+		     sizeof(u32), PAGE_SIZE) * num_subports;
 	pd->subport_rcvhdr_base = vmalloc(size);
 	if (!pd->subport_rcvhdr_base) {
 		ret = -ENOMEM;
@@ -1434,7 +1447,7 @@ static int init_subports(struct ipath_devdata *dd,
 
 	pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
 					pd->port_rcvegrbuf_size *
-					num_slaves);
+					num_subports);
 	if (!pd->subport_rcvegrbuf) {
 		ret = -ENOMEM;
 		goto bail_rhdr;
@@ -1443,6 +1456,12 @@ static int init_subports(struct ipath_devdata *dd,
 	pd->port_subport_cnt = uinfo->spu_subport_cnt;
 	pd->port_subport_id = uinfo->spu_subport_id;
 	pd->active_slaves = 1;
+	set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+	memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports);
+	memset(pd->subport_rcvhdr_base, 0, size);
+	memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks *
+				         pd->port_rcvegrbuf_size *
+				         num_subports);
 	goto bail;
 
 bail_rhdr:
@@ -1573,18 +1592,19 @@ static int find_best_unit(struct file *fp,
 	 */
 	if (!cpus_empty(current->cpus_allowed) &&
 	    !cpus_full(current->cpus_allowed)) {
-		int ncpus = num_online_cpus(), curcpu = -1;
+		int ncpus = num_online_cpus(), curcpu = -1, nset = 0;
 		for (i = 0; i < ncpus; i++)
 			if (cpu_isset(i, current->cpus_allowed)) {
 				ipath_cdbg(PROC, "%s[%u] affinity set for "
-					   "cpu %d\n", current->comm,
-					   current->pid, i);
+					   "cpu %d/%d\n", current->comm,
+					   current->pid, i, ncpus);
 				curcpu = i;
+				nset++;
 			}
-		if (curcpu != -1) {
+		if (curcpu != -1 && nset != ncpus) {
 			if (npresent) {
 				prefunit = curcpu / (ncpus / npresent);
-				ipath_dbg("%s[%u] %d chips, %d cpus, "
+				ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, "
 					  "%d cpus/chip, select unit %d\n",
 					  current->comm, current->pid,
 					  npresent, ncpus, ncpus / npresent,
@@ -1764,11 +1784,17 @@ static int ipath_do_user_init(struct file *fp,
 			      const struct ipath_user_info *uinfo)
 {
 	int ret;
-	struct ipath_portdata *pd;
+	struct ipath_portdata *pd = port_fp(fp);
 	struct ipath_devdata *dd;
 	u32 head32;
 
-	pd = port_fp(fp);
+	/* Subports don't need to initialize anything since master did it. */
+	if (subport_fp(fp)) {
+		ret = wait_event_interruptible(pd->port_wait,
+			!test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag));
+		goto done;
+	}
+
 	dd = pd->port_dd;
 
 	if (uinfo->spu_rcvhdrsize) {
@@ -1826,6 +1852,11 @@ static int ipath_do_user_init(struct file *fp,
 			 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
+	/* Notify any waiting slaves */
+	if (pd->port_subport_cnt) {
+		clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag);
+		wake_up(&pd->port_wait);
+	}
 done:
 	return ret;
 }
@@ -2017,6 +2048,17 @@ static int ipath_get_slave_info(struct ipath_portdata *pd,
 	return ret;
 }
 
+static int ipath_force_pio_avail_update(struct ipath_devdata *dd)
+{
+	u64 reg = dd->ipath_sendctrl;
+
+	clear_bit(IPATH_S_PIOBUFAVAILUPD, &reg);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, reg);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+
+	return 0;
+}
+
 static ssize_t ipath_write(struct file *fp, const char __user *data,
 			   size_t count, loff_t *off)
 {
@@ -2071,27 +2113,35 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
 		dest = &cmd.cmd.part_key;
 		src = &ucmd->cmd.part_key;
 		break;
-	case IPATH_CMD_SLAVE_INFO:
+	case __IPATH_CMD_SLAVE_INFO:
 		copy = sizeof(cmd.cmd.slave_mask_addr);
 		dest = &cmd.cmd.slave_mask_addr;
 		src = &ucmd->cmd.slave_mask_addr;
 		break;
+	case IPATH_CMD_PIOAVAILUPD:	// force an update of PIOAvail reg
+		copy = 0;
+		src = NULL;
+		dest = NULL;
+		break;
 	default:
 		ret = -EINVAL;
 		goto bail;
 	}
 
-	if ((count - consumed) < copy) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (copy) {
+		if ((count - consumed) < copy) {
+			ret = -EINVAL;
+			goto bail;
+		}
 
-	if (copy_from_user(dest, src, copy)) {
-		ret = -EFAULT;
-		goto bail;
+		if (copy_from_user(dest, src, copy)) {
+			ret = -EFAULT;
+			goto bail;
+		}
+
+		consumed += copy;
 	}
 
-	consumed += copy;
 	pd = port_fp(fp);
 	if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
 		cmd.type != IPATH_CMD_ASSIGN_PORT) {
@@ -2137,11 +2187,14 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
 	case IPATH_CMD_SET_PART_KEY:
 		ret = ipath_set_part_key(pd, cmd.cmd.part_key);
 		break;
-	case IPATH_CMD_SLAVE_INFO:
+	case __IPATH_CMD_SLAVE_INFO:
 		ret = ipath_get_slave_info(pd,
 					   (void __user *) (unsigned long)
 					   cmd.cmd.slave_mask_addr);
 		break;
+	case IPATH_CMD_PIOAVAILUPD:
+		ret = ipath_force_pio_avail_update(pd->port_dd);
+		break;
 	}
 
 	if (ret >= 0)
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 99348254502..4171198fc20 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -43,6 +43,9 @@
 #include "ipath_kernel.h"
 #include "ipath_registers.h"
 
+static void ipath_setup_ht_setextled(struct ipath_devdata *, u64, u64);
+
+
 /*
  * This lists the InfiniPath registers, in the actual chip layout.
  * This structure should never be directly accessed.
@@ -208,8 +211,8 @@ static const struct ipath_kregs ipath_ht_kregs = {
 	.kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus),
 	.kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig),
 	/*
-	 * These should not be used directly via ipath_read_kreg64(),
-	 * use them with ipath_read_kreg64_port(),
+	 * These should not be used directly via ipath_write_kreg64(),
+	 * use them with ipath_write_kreg64_port(),
 	 */
 	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
 	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0)
@@ -284,6 +287,14 @@ static const struct ipath_cregs ipath_ht_cregs = {
 #define INFINIPATH_EXTS_MEMBIST_ENDTEST     0x0000000000004000
 #define INFINIPATH_EXTS_MEMBIST_CORRECT     0x0000000000008000
 
+
+/* TID entries (memory), HT-only */
+#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */
+#define INFINIPATH_RT_VALID 0x8000000000000000ULL
+#define INFINIPATH_RT_ADDR_SHIFT 0
+#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
+#define INFINIPATH_RT_BUFSIZE_SHIFT 48
+
 /*
  * masks and bits that are different in different chips, or present only
  * in one
@@ -402,6 +413,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
 	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
 };
 
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+		        INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+		        << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
+			  << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
+
+static int ipath_ht_txe_recover(struct ipath_devdata *);
+
 /**
  * ipath_ht_handle_hwerrors - display hardware errors.
  * @dd: the infinipath device
@@ -450,13 +469,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 
 	/*
 	 * make sure we get this much out, unless told to be quiet,
+	 * it's a parity error we may recover from,
 	 * or it's occurred within the last 5 seconds
 	 */
-	if ((hwerrs & ~(dd->ipath_lasthwerror |
-			((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-			  INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-			<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
-	    (ipath_debug & __IPATH_VERBDBG))
+	if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
+		RXE_EAGER_PARITY)) ||
+		(ipath_debug & __IPATH_VERBDBG))
 		dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
 			 "(cleared)\n", (unsigned long long) hwerrs);
 	dd->ipath_lasthwerror |= hwerrs;
@@ -467,7 +485,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			      (hwerrs & ~dd->ipath_hwe_bitsextant));
 
 	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
-	if (ctrl & INFINIPATH_C_FREEZEMODE) {
+	if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
 		/*
 		 * parity errors in send memory are recoverable,
 		 * just cancel the send (if indicated in * sendbuffererror),
@@ -476,50 +494,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 		 * occur if a processor speculative read is done to the PIO
 		 * buffer while we are sending a packet, for example.
 		 */
-		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-			      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
-			ipath_stats.sps_txeparity++;
-			ipath_dbg("Recovering from TXE parity error (%llu), "
-			    	  "hwerrstatus=%llx\n",
-				  (unsigned long long) ipath_stats.sps_txeparity,
-				  (unsigned long long) hwerrs);
-			ipath_disarm_senderrbufs(dd);
-			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
-			if (!hwerrs) { /* else leave in freeze mode */
-				ipath_write_kreg(dd,
-						 dd->ipath_kregs->kr_control,
-						 dd->ipath_control);
-				return;
-			}
-		}
-		if (hwerrs) {
-			/*
-			 * if any set that we aren't ignoring; only
-			 * make the complaint once, in case it's stuck
-			 * or recurring, and we get here multiple
-			 * times.
-			 */
-			if (dd->ipath_flags & IPATH_INITTED) {
-				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
-					      "mode), no longer usable, SN %.16s\n",
-						  dd->ipath_serial);
-				isfatal = 1;
-			}
-			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-			/* mark as having had error */
-			*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
-			/*
-			 * mark as not usable, at a minimum until driver
-			 * is reloaded, probably until reboot, since no
-			 * other reset is possible.
-			 */
-			dd->ipath_flags &= ~IPATH_INITTED;
-		} else {
-			ipath_dbg("Clearing freezemode on ignored hardware "
-				  "error\n");
+		if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
+			hwerrs &= ~TXE_PIO_PARITY;
+		if (hwerrs & RXE_EAGER_PARITY)
+			ipath_dev_err(dd, "RXE parity, Eager TID error is not "
+				"recoverable\n");
+		if (!hwerrs) {
+			ipath_dbg("Clearing freezemode on ignored or "
+				  "recovered hardware error\n");
 			ctrl &= ~INFINIPATH_C_FREEZEMODE;
 			ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
 					 ctrl);
@@ -587,7 +569,39 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 				 dd->ipath_hwerrmask);
 	}
 
-	ipath_dev_err(dd, "%s hardware error\n", msg);
+	if (hwerrs) {
+		/*
+		 * if any set that we aren't ignoring; only
+		 * make the complaint once, in case it's stuck
+		 * or recurring, and we get here multiple
+		 * times.
+		 * force link down, so switch knows, and
+		 * LEDs are turned off
+		 */
+		if (dd->ipath_flags & IPATH_INITTED) {
+			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+			ipath_setup_ht_setextled(dd,
+				INFINIPATH_IBCS_L_STATE_DOWN,
+				INFINIPATH_IBCS_LT_STATE_DISABLED);
+			ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+					  "mode), no longer usable, SN %.16s\n",
+					  dd->ipath_serial);
+			isfatal = 1;
+		}
+		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+		/* mark as having had error */
+		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+		/*
+		 * mark as not usable, at a minimum until driver
+		 * is reloaded, probably until reboot, since no
+		 * other reset is possible.
+		 */
+		dd->ipath_flags &= ~IPATH_INITTED;
+	}
+	else
+		*msg = 0; /* recovered from all of them */
+	if (*msg)
+		ipath_dev_err(dd, "%s hardware error\n", msg);
 	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
 		/*
 		 * for status file; if no trailing brace is copied,
@@ -658,7 +672,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 	if (n)
 		snprintf(name, namelen, "%s", n);
 
-	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
+	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
+		dd->ipath_minrev > 3)) {
 		/*
 		 * This version of the driver only supports Rev 3.2 and 3.3
 		 */
@@ -1163,6 +1178,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
 
 	if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
 		ipath_dev_err(dd, "MemBIST did not complete!\n");
+	if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
+		ipath_dbg("MemBIST corrected\n");
 
 	ipath_check_htlink(dd);
 
@@ -1366,6 +1383,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
 			     u64 __iomem *tidptr, u32 type,
 			     unsigned long pa)
 {
+	if (!dd->ipath_kregbase)
+		return;
+
 	if (pa != dd->ipath_tidinvalid) {
 		if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
 			dev_info(&dd->pcidev->dev,
@@ -1382,10 +1402,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
 			pa |= lenvalid | INFINIPATH_RT_VALID;
 		}
 	}
-	if (dd->ipath_kregbase)
-		writeq(pa, tidptr);
+	writeq(pa, tidptr);
 }
 
+
 /**
  * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
  * @dd: the infinipath device
@@ -1515,7 +1535,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 			 INFINIPATH_S_ABORT);
 
 	ipath_get_eeprom_info(dd);
-	if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
+	if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
 		dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
 		/*
 		 * Later production QHT7040 has same changes as QHT7140, so
@@ -1528,6 +1548,24 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 	return 0;
 }
 
+
+static int ipath_ht_txe_recover(struct ipath_devdata *dd)
+{
+	int cnt = ++ipath_stats.sps_txeparity;
+	if (cnt >= IPATH_MAX_PARITY_ATTEMPTS)  {
+		if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
+			ipath_dev_err(dd,
+				"Too many attempts to recover from "
+				"TXE parity, giving up\n");
+		return 0;
+	}
+	dev_info(&dd->pcidev->dev,
+		"Recovering from TXE PIO parity error\n");
+	ipath_disarm_senderrbufs(dd, 1);
+	return 1;
+}
+
+
 /**
  * ipath_init_ht_get_base_info - set chip-specific flags for user code
  * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 05918e1e7c3..1b9c3085775 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -43,6 +43,8 @@
 #include "ipath_kernel.h"
 #include "ipath_registers.h"
 
+static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64);
+
 /*
  * This file contains all the chip-specific register information and
  * access functions for the QLogic InfiniPath PCI-Express chip.
@@ -207,8 +209,8 @@ static const struct ipath_kregs ipath_pe_kregs = {
 	.kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg),
 
 	/*
-	 * These should not be used directly via ipath_read_kreg64(),
-	 * use them with ipath_read_kreg64_port()
+	 * These should not be used directly via ipath_write_kreg64(),
+	 * use them with ipath_write_kreg64_port(),
 	 */
 	.kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
 	.kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
@@ -321,6 +323,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
 	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
 };
 
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+		        INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+		        << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+
+static int ipath_pe_txe_recover(struct ipath_devdata *);
+
 /**
  * ipath_pe_handle_hwerrors - display hardware errors.
  * @dd: the infinipath device
@@ -394,32 +402,21 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 		 * occur if a processor speculative read is done to the PIO
 		 * buffer while we are sending a packet, for example.
 		 */
-		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-			      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
-			ipath_stats.sps_txeparity++;
-			ipath_dbg("Recovering from TXE parity error (%llu), "
-			    	  "hwerrstatus=%llx\n",
-				  (unsigned long long) ipath_stats.sps_txeparity,
-				  (unsigned long long) hwerrs);
-			ipath_disarm_senderrbufs(dd);
-			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
-				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
-				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
-			if (!hwerrs) { /* else leave in freeze mode */
-				ipath_write_kreg(dd,
-						 dd->ipath_kregs->kr_control,
-						 dd->ipath_control);
-			    return;
-			}
-		}
+		if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd))
+			hwerrs &= ~TXE_PIO_PARITY;
 		if (hwerrs) {
 			/*
 			 * if any set that we aren't ignoring only make the
 			 * complaint once, in case it's stuck or recurring,
 			 * and we get here multiple times
+			 * Force link down, so switch knows, and
+			 * LEDs are turned off
 			 */
 			if (dd->ipath_flags & IPATH_INITTED) {
+				ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
+				ipath_setup_pe_setextled(dd,
+					INFINIPATH_IBCS_L_STATE_DOWN,
+					INFINIPATH_IBCS_LT_STATE_DISABLED);
 				ipath_dev_err(dd, "Fatal Hardware Error (freeze "
 					      "mode), no longer usable, SN %.16s\n",
 						  dd->ipath_serial);
@@ -493,7 +490,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 				 dd->ipath_hwerrmask);
 	}
 
-	ipath_dev_err(dd, "%s hardware error\n", msg);
+	if (*msg)
+		ipath_dev_err(dd, "%s hardware error\n", msg);
 	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
 		/*
 		 * for /sys status file ; if no trailing } is copied, we'll
@@ -581,6 +579,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
 
 	if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
 		ipath_dev_err(dd, "MemBIST did not complete!\n");
+	if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
+		ipath_dbg("MemBIST corrected\n");
 
 	val = ~0ULL;	/* barring bugs, all hwerrors become interrupts, */
 
@@ -1330,6 +1330,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd)
 	dd->ipath_irq = 0;
 }
 
+/*
+ * On platforms using this chip, and not having ordered WC stores, we
+ * can get TXE parity errors due to speculative reads to the PIO buffers,
+ * and this, due to a chip bug can result in (many) false parity error
+ * reports.  So it's a debug print on those, and an info print on systems
+ * where the speculative reads don't occur.
+ * Because we can get lots of false errors, we have no upper limit
+ * on recovery attempts on those platforms.
+ */
+static int ipath_pe_txe_recover(struct ipath_devdata *dd)
+{
+	if (ipath_unordered_wc())
+		ipath_dbg("Recovering from TXE PIO parity error\n");
+	else {
+		int cnt = ++ipath_stats.sps_txeparity;
+		if (cnt >= IPATH_MAX_PARITY_ATTEMPTS)  {
+			if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
+				ipath_dev_err(dd,
+					"Too many attempts to recover from "
+					"TXE parity, giving up\n");
+			return 0;
+		}
+		dev_info(&dd->pcidev->dev,
+			"Recovering from TXE PIO parity error\n");
+	}
+	ipath_disarm_senderrbufs(dd, 1);
+	return 1;
+}
+
 /**
  * ipath_init_iba6120_funcs - set up the chip-specific function pointers
  * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index d4f6b5239ef..7045ba68949 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -216,6 +216,20 @@ static int bringup_link(struct ipath_devdata *dd)
 	return ret;
 }
 
+static struct ipath_portdata *create_portdata0(struct ipath_devdata *dd)
+{
+	struct ipath_portdata *pd = NULL;
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (pd) {
+		pd->port_dd = dd;
+		pd->port_cnt = 1;
+		/* The port 0 pkey table is used by the layer interface. */
+		pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+	}
+	return pd;
+}
+
 static int init_chip_first(struct ipath_devdata *dd,
 			   struct ipath_portdata **pdp)
 {
@@ -271,20 +285,16 @@ static int init_chip_first(struct ipath_devdata *dd,
 		goto done;
 	}
 
-	dd->ipath_pd[0] = kzalloc(sizeof(*pd), GFP_KERNEL);
+	pd = create_portdata0(dd);
 
-	if (!dd->ipath_pd[0]) {
+	if (!pd) {
 		ipath_dev_err(dd, "Unable to allocate portdata for port "
 			      "0, failing\n");
 		ret = -ENOMEM;
 		goto done;
 	}
-	pd = dd->ipath_pd[0];
-	pd->port_dd = dd;
-	pd->port_port = 0;
-	pd->port_cnt = 1;
-	/* The port 0 pkey table is used by the layer interface. */
-	pd->port_pkeys[0] = IPATH_DEFAULT_P_KEY;
+	dd->ipath_pd[0] = pd;
+
 	dd->ipath_rcvtidcnt =
 		ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvtidcnt);
 	dd->ipath_rcvtidbase =
@@ -590,6 +600,10 @@ static int init_housekeeping(struct ipath_devdata *dd,
 		goto done;
 	}
 
+
+	/* clear diagctrl register, in case diags were running and crashed */
+	ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
+
 	/* clear the initial reset flag, in case first driver load */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
 			 INFINIPATH_E_RESET);
@@ -668,6 +682,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 {
 	int ret = 0, i;
 	u32 val32, kpiobufs;
+	u32 piobufs, uports;
 	u64 val;
 	struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
 	gfp_t gfp_flags = GFP_USER | __GFP_COMP;
@@ -702,16 +717,17 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	 * the in memory DMA'ed copies of the registers.  This has to
 	 * be done early, before we calculate lastport, etc.
 	 */
-	val = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+	piobufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
 	/*
 	 * calc number of pioavail registers, and save it; we have 2
 	 * bits per buffer.
 	 */
-	dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
+	dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
 		/ (sizeof(u64) * BITS_PER_BYTE / 2);
+	uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
 	if (ipath_kpiobufs == 0) {
 		/* not set by user (this is default) */
-		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
+		if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
 			kpiobufs = 32;
 		else
 			kpiobufs = 16;
@@ -719,31 +735,25 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	else
 		kpiobufs = ipath_kpiobufs;
 
-	if (kpiobufs >
-	    (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-	     (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) {
-		i = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k -
-			(dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
+	if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
+		i = (int) piobufs -
+			(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
 		if (i < 0)
 			i = 0;
-		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs for "
-			 "kernel leaves too few for %d user ports "
+		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
+			 "%d for kernel leaves too few for %d user ports "
 			 "(%d each); using %u\n", kpiobufs,
-			 dd->ipath_cfgports - 1,
-			 IPATH_MIN_USER_PORT_BUFCNT, i);
+			 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
 		/*
 		 * shouldn't change ipath_kpiobufs, because could be
 		 * different for different devices...
 		 */
 		kpiobufs = i;
 	}
-	dd->ipath_lastport_piobuf =
-		dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - kpiobufs;
-	dd->ipath_pbufsport = dd->ipath_cfgports > 1
-		? dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1)
-		: 0;
-	val32 = dd->ipath_lastport_piobuf -
-		(dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
+	dd->ipath_lastport_piobuf = piobufs - kpiobufs;
+	dd->ipath_pbufsport =
+		uports ? dd->ipath_lastport_piobuf / uports : 0;
+	val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
 	if (val32 > 0) {
 		ipath_dbg("allocating %u pbufs/port leaves %u unused, "
 			  "add to kernel\n", dd->ipath_pbufsport, val32);
@@ -754,8 +764,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
 	ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
 		   "each for %u user ports\n", kpiobufs,
-		   dd->ipath_piobcnt2k + dd->ipath_piobcnt4k,
-		   dd->ipath_pbufsport, dd->ipath_cfgports - 1);
+		   piobufs, dd->ipath_pbufsport, uports);
 
 	dd->ipath_f_early_init(dd);
 
@@ -839,11 +848,24 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	 * Set up the port 0 (kernel) rcvhdr q and egr TIDs.  If doing
 	 * re-init, the simplest way to handle this is to free
 	 * existing, and re-allocate.
+	 * Need to re-create rest of port 0 portdata as well.
 	 */
 	if (reinit) {
-		struct ipath_portdata *pd = dd->ipath_pd[0];
-		dd->ipath_pd[0] = NULL;
-		ipath_free_pddata(dd, pd);
+		/* Alloc and init new ipath_portdata for port0,
+		 * Then free old pd. Could lead to fragmentation, but also
+		 * makes later support for hot-swap easier.
+		 */
+		struct ipath_portdata *npd;
+		npd = create_portdata0(dd);
+		if (npd) {
+			ipath_free_pddata(dd, pd);
+			dd->ipath_pd[0] = pd = npd;
+		} else {
+			ipath_dev_err(dd, "Unable to allocate portdata for"
+				      "  port 0, failing\n");
+			ret = -ENOMEM;
+			goto done;
+		}
 	}
 	dd->ipath_f_tidtemplate(dd);
 	ret = ipath_create_rcvhdrq(dd, pd);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 72b9e279d19..45d033169c6 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,10 +38,39 @@
 #include "ipath_common.h"
 
 /*
+ * clear (write) a pio buffer, to clear a parity error.   This routine
+ * should only be called when in freeze mode, and the buffer should be
+ * canceled afterwards.
+ */
+static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
+{
+	u32 __iomem *pbuf;
+	u32 dwcnt; /* dword count to write */
+	if (pnum < dd->ipath_piobcnt2k) {
+		pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
+			dd->ipath_palign);
+		dwcnt = dd->ipath_piosize2k >> 2;
+	}
+	else {
+		pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
+			(pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
+		dwcnt = dd->ipath_piosize4k >> 2;
+	}
+	dev_info(&dd->pcidev->dev,
+		"Rewrite PIO buffer %u, to recover from parity error\n",
+		pnum);
+	*pbuf = dwcnt+1; /* no flush required, since already in freeze */
+	while(--dwcnt)
+		*pbuf++ = 0;
+}
+
+/*
  * Called when we might have an error that is specific to a particular
  * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ * If rewrite is true, and bits are set in the sendbufferror registers,
+ * we'll write to the buffer, for error recovery on parity errors.
  */
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
 {
 	u32 piobcnt;
 	unsigned long sbuf[4];
@@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
 		}
 
 		for (i = 0; i < piobcnt; i++)
-			if (test_bit(i, sbuf))
+			if (test_bit(i, sbuf)) {
+				if (rewrite)
+					ipath_clrpiobuf(dd, i);
 				ipath_disarm_piobufs(dd, i, 1);
+			}
 		dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
 	}
 }
@@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
 {
 	u64 ignore_this_time = 0;
 
-	ipath_disarm_senderrbufs(dd);
+	ipath_disarm_senderrbufs(dd, 0);
 	if ((errs & E_SUM_LINK_PKTERRS) &&
 	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
 		/*
@@ -403,10 +435,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
 	 * happens so often we never want to count it.
 	 */
 	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
-		ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
-				 ~INFINIPATH_E_IBSTATUSCHANGED);
+		int iserr;
+		iserr = ipath_decode_err(msg, sizeof msg,
+				dd->ipath_lasterror &
+				~INFINIPATH_E_IBSTATUSCHANGED);
 		if (dd->ipath_lasterror &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+			~(INFINIPATH_E_RRCVEGRFULL |
+			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
 			ipath_dev_err(dd, "Suppressed %u messages for "
 				      "fast-repeating errors (%s) (%llx)\n",
 				      supp_msgs, msg,
@@ -420,8 +455,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
 			 * them. So only complain about these at debug
 			 * level.
 			 */
-			ipath_dbg("Suppressed %u messages for %s\n",
-				  supp_msgs, msg);
+			if (iserr)
+				ipath_dbg("Suppressed %u messages for %s\n",
+					  supp_msgs, msg);
+			else
+				ipath_cdbg(ERRPKT,
+					"Suppressed %u messages for %s\n",
+					  supp_msgs, msg);
 		}
 	}
 }
@@ -462,7 +502,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 {
 	char msg[512];
 	u64 ignore_this_time = 0;
-	int i;
+	int i, iserr = 0;
 	int chkerrpkts = 0, noprint = 0;
 	unsigned supp_msgs;
 
@@ -502,6 +542,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 	}
 
 	if (supp_msgs == 250000) {
+		int s_iserr;
 		/*
 		 * It's not entirely reasonable assuming that the errors set
 		 * in the last clear period are all responsible for the
@@ -511,17 +552,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
 				 ~dd->ipath_maskederrs);
-		ipath_decode_err(msg, sizeof msg,
+		s_iserr = ipath_decode_err(msg, sizeof msg,
 				 (dd->ipath_maskederrs & ~dd->
 				  ipath_ignorederrs));
 
 		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
-			ipath_dev_err(dd, "Disabling error(s) %llx because "
-				      "occurring too frequently (%s)\n",
-				      (unsigned long long)
-				      (dd->ipath_maskederrs &
-				       ~dd->ipath_ignorederrs), msg);
+			~(INFINIPATH_E_RRCVEGRFULL |
+			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+			ipath_dev_err(dd, "Temporarily disabling "
+			    "error(s) %llx reporting; too frequent (%s)\n",
+				(unsigned long long) (dd->ipath_maskederrs &
+				~dd->ipath_ignorederrs), msg);
 		else {
 			/*
 			 * rcvegrfull and rcvhdrqfull are "normal",
@@ -530,8 +571,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 			 * processing them.  So only complain about
 			 * these at debug level.
 			 */
-			ipath_dbg("Disabling frequent queue full errors "
-				  "(%s)\n", msg);
+			if (s_iserr)
+				ipath_dbg("Temporarily disabling reporting "
+				    "too frequent queue full errors (%s)\n",
+				    msg);
+			else
+				ipath_cdbg(ERRPKT,
+				    "Temporarily disabling reporting too"
+				    " frequent packet errors (%s)\n",
+				    msg);
 		}
 
 		/*
@@ -589,6 +637,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 		ipath_stats.sps_crcerrs++;
 		chkerrpkts = 1;
 	}
+	iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
+
 
 	/*
 	 * We don't want to print these two as they happen, or we can make
@@ -677,8 +727,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 		*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
 	}
 
-	if (!noprint && *msg)
-		ipath_dev_err(dd, "%s error\n", msg);
+	if (!noprint && *msg) {
+		if (iserr)
+			ipath_dev_err(dd, "%s error\n", msg);
+		else
+			dev_info(&dd->pcidev->dev, "%s packet problems\n",
+				msg);
+	}
 	if (dd->ipath_state_wanted & dd->ipath_flags) {
 		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
 			   "waking\n", dd->ipath_state_wanted,
@@ -819,11 +874,10 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
 		struct ipath_portdata *pd = dd->ipath_pd[i];
 		if (portr & (1 << i) && pd && pd->port_cnt &&
 			test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
-			int rcbit;
 			clear_bit(IPATH_PORT_WAITING_RCV,
 				  &pd->port_flag);
-			rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
-			clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
+			clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
+				  &dd->ipath_rcvctrl);
 			wake_up_interruptible(&pd->port_wait);
 			rcvdint = 1;
 		}
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 6d8d05fb599..e900c2593f4 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
 int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
 void ipath_shutdown_device(struct ipath_devdata *);
-void ipath_disarm_senderrbufs(struct ipath_devdata *);
 
 struct file_operations;
 int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -611,7 +610,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
 extern int ipath_diag_inuse;
 
 irqreturn_t ipath_intr(int irq, void *devid);
-void ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
+int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
 #if __IPATH_INFO || __IPATH_DBG
 extern const char *ipath_ibcstatus_str[];
 #endif
@@ -701,6 +700,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 #define IPATH_PORT_WAITING_RCV   2
 		/* waiting for a PIO buffer to be available */
 #define IPATH_PORT_WAITING_PIO   3
+		/* master has not finished initializing */
+#define IPATH_PORT_MASTER_UNINIT 4
 
 /* free up any allocated data at closes */
 void ipath_free_data(struct ipath_portdata *dd);
@@ -711,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *);
 void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
+void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
 
 /*
  * number of words used for protocol header if not set by ipath_userinit();
@@ -754,8 +756,6 @@ int ipath_eeprom_write(struct ipath_devdata *, u8, const void *, int);
 /* these are used for the registers that vary with port */
 void ipath_write_kreg_port(const struct ipath_devdata *, ipath_kreg,
 			   unsigned, u64);
-u64 ipath_read_kreg64_port(const struct ipath_devdata *, ipath_kreg,
-			   unsigned);
 
 /*
  * We could have a single register get/put routine, that takes a group type,
@@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
 
 extern unsigned ipath_debug; /* debugging bit mask */
 
+#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */
+
 const char *ipath_get_unit_name(int unit);
 
 extern struct mutex ipath_mutex;
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 851763d7d2d..dd487c100f5 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -61,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
 		r = (r + 1) & (rkt->max - 1);
 		if (r == n) {
 			spin_unlock_irqrestore(&rkt->lock, flags);
-			ipath_dbg(KERN_INFO "LKEY table full\n");
+			ipath_dbg("LKEY table full\n");
 			ret = 0;
 			goto bail;
 		}
@@ -133,6 +133,12 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
 	 * being reversible by calling bus_to_virt().
 	 */
 	if (sge->lkey == 0) {
+		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+		if (pd->user) {
+			ret = 0;
+			goto bail;
+		}
 		isge->mr = NULL;
 		isge->vaddr = (void *) sge->addr;
 		isge->length = sge->length;
@@ -206,6 +212,12 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
 	 * (see ipath_get_dma_mr and ipath_dma.c).
 	 */
 	if (rkey == 0) {
+		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
+
+		if (pd->user) {
+			ret = 0;
+			goto bail;
+		}
 		sge->mr = NULL;
 		sge->vaddr = (void *) vaddr;
 		sge->length = len;
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 8cc8598d6c6..31e70732e36 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -210,9 +210,15 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
 	m = 0;
 	n = 0;
 	list_for_each_entry(chunk, &region->chunk_list, list) {
-		for (i = 0; i < chunk->nmap; i++) {
-			mr->mr.map[m]->segs[n].vaddr =
-				page_address(chunk->page_list[i].page);
+		for (i = 0; i < chunk->nents; i++) {
+			void *vaddr;
+
+			vaddr = page_address(chunk->page_list[i].page);
+			if (!vaddr) {
+				ret = ERR_PTR(-EINVAL);
+				goto bail;
+			}
+			mr->mr.map[m]->segs[n].vaddr = vaddr;
 			mr->mr.map[m]->segs[n].length = region->page_size;
 			n++;
 			if (n == IPATH_SEGSZ) {
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 64f07b19349..16db9ac0b40 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -81,11 +81,51 @@ static u32 credit_table[31] = {
 	32768			/* 1E */
 };
 
-static u32 alloc_qpn(struct ipath_qp_table *qpt)
+
+static void get_map_page(struct ipath_qp_table *qpt, struct qpn_map *map)
+{
+	unsigned long page = get_zeroed_page(GFP_KERNEL);
+	unsigned long flags;
+
+	/*
+	 * Free the page if someone raced with us installing it.
+	 */
+
+	spin_lock_irqsave(&qpt->lock, flags);
+	if (map->page)
+		free_page(page);
+	else
+		map->page = (void *)page;
+	spin_unlock_irqrestore(&qpt->lock, flags);
+}
+
+
+static int alloc_qpn(struct ipath_qp_table *qpt, enum ib_qp_type type)
 {
 	u32 i, offset, max_scan, qpn;
 	struct qpn_map *map;
-	u32 ret;
+	u32 ret = -1;
+
+	if (type == IB_QPT_SMI)
+		ret = 0;
+	else if (type == IB_QPT_GSI)
+		ret = 1;
+
+	if (ret != -1) {
+		map = &qpt->map[0];
+		if (unlikely(!map->page)) {
+			get_map_page(qpt, map);
+			if (unlikely(!map->page)) {
+				ret = -ENOMEM;
+				goto bail;
+			}
+		}
+		if (!test_and_set_bit(ret, map->page))
+			atomic_dec(&map->n_free);
+		else
+			ret = -EBUSY;
+		goto bail;
+	}
 
 	qpn = qpt->last + 1;
 	if (qpn >= QPN_MAX)
@@ -95,19 +135,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
 	max_scan = qpt->nmaps - !offset;
 	for (i = 0;;) {
 		if (unlikely(!map->page)) {
-			unsigned long page = get_zeroed_page(GFP_KERNEL);
-			unsigned long flags;
-
-			/*
-			 * Free the page if someone raced with us
-			 * installing it:
-			 */
-			spin_lock_irqsave(&qpt->lock, flags);
-			if (map->page)
-				free_page(page);
-			else
-				map->page = (void *)page;
-			spin_unlock_irqrestore(&qpt->lock, flags);
+			get_map_page(qpt, map);
 			if (unlikely(!map->page))
 				break;
 		}
@@ -151,7 +179,7 @@ static u32 alloc_qpn(struct ipath_qp_table *qpt)
 		qpn = mk_qpn(qpt, map, offset);
 	}
 
-	ret = 0;
+	ret = -ENOMEM;
 
 bail:
 	return ret;
@@ -180,29 +208,19 @@ static int ipath_alloc_qpn(struct ipath_qp_table *qpt, struct ipath_qp *qp,
 			   enum ib_qp_type type)
 {
 	unsigned long flags;
-	u32 qpn;
 	int ret;
 
-	if (type == IB_QPT_SMI)
-		qpn = 0;
-	else if (type == IB_QPT_GSI)
-		qpn = 1;
-	else {
-		/* Allocate the next available QPN */
-		qpn = alloc_qpn(qpt);
-		if (qpn == 0) {
-			ret = -ENOMEM;
-			goto bail;
-		}
-	}
-	qp->ibqp.qp_num = qpn;
+	ret = alloc_qpn(qpt, type);
+	if (ret < 0)
+		goto bail;
+	qp->ibqp.qp_num = ret;
 
 	/* Add the QP to the hash table. */
 	spin_lock_irqsave(&qpt->lock, flags);
 
-	qpn %= qpt->max;
-	qp->next = qpt->table[qpn];
-	qpt->table[qpn] = qp;
+	ret %= qpt->max;
+	qp->next = qpt->table[ret];
+	qpt->table[ret] = qp;
 	atomic_inc(&qp->refcount);
 
 	spin_unlock_irqrestore(&qpt->lock, flags);
@@ -245,9 +263,7 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
 	if (!fnd)
 		return;
 
-	/* If QPN is not reserved, mark QPN free in the bitmap. */
-	if (qp->ibqp.qp_num > 1)
-		free_qpn(qpt, qp->ibqp.qp_num);
+	free_qpn(qpt, qp->ibqp.qp_num);
 
 	wait_event(qp->wait, !atomic_read(&qp->refcount));
 }
@@ -270,11 +286,10 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
 
 		while (qp) {
 			nqp = qp->next;
-			if (qp->ibqp.qp_num > 1)
-				free_qpn(qpt, qp->ibqp.qp_num);
+			free_qpn(qpt, qp->ibqp.qp_num);
 			if (!atomic_dec_and_test(&qp->refcount) ||
 			    !ipath_destroy_qp(&qp->ibqp))
-				ipath_dbg(KERN_INFO "QP memory leak!\n");
+				ipath_dbg("QP memory leak!\n");
 			qp = nqp;
 		}
 	}
@@ -320,7 +335,8 @@ static void ipath_reset_qp(struct ipath_qp *qp)
 	qp->remote_qpn = 0;
 	qp->qkey = 0;
 	qp->qp_access_flags = 0;
-	clear_bit(IPATH_S_BUSY, &qp->s_flags);
+	qp->s_busy = 0;
+	qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR;
 	qp->s_hdrwords = 0;
 	qp->s_psn = 0;
 	qp->r_psn = 0;
@@ -333,7 +349,6 @@ static void ipath_reset_qp(struct ipath_qp *qp)
 		qp->r_state = IB_OPCODE_UC_SEND_LAST;
 	}
 	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 	qp->r_nak_state = 0;
 	qp->r_wrid_valid = 0;
 	qp->s_rnr_timeout = 0;
@@ -344,6 +359,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
 	qp->s_wait_credit = 0;
+	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
+	qp->r_head_ack_queue = 0;
+	qp->s_tail_ack_queue = 0;
+	qp->s_num_rd_atomic = 0;
 	if (qp->r_rq.wq) {
 		qp->r_rq.wq->head = 0;
 		qp->r_rq.wq->tail = 0;
@@ -357,7 +376,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
  * @err: the receive completion error to signal if a RWQE is active
  *
  * Flushes both send and receive work queues.
- * QP s_lock should be held and interrupts disabled.
+ * The QP s_lock should be held and interrupts disabled.
  */
 
 void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -365,7 +384,7 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ib_wc wc;
 
-	ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
+	ipath_dbg("QP%d/%d in error state\n",
 		  qp->ibqp.qp_num, qp->remote_qpn);
 
 	spin_lock(&dev->pending_lock);
@@ -389,6 +408,8 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 	wc.port_num = 0;
 	if (qp->r_wrid_valid) {
 		qp->r_wrid_valid = 0;
+		wc.wr_id = qp->r_wr_id;
+		wc.opcode = IB_WC_RECV;
 		wc.status = err;
 		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
 	}
@@ -503,13 +524,17 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		    attr->path_mig_state != IB_MIG_REARM)
 			goto inval;
 
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		if (attr->max_dest_rd_atomic > IPATH_MAX_RDMA_ATOMIC)
+			goto inval;
+
 	switch (new_state) {
 	case IB_QPS_RESET:
 		ipath_reset_qp(qp);
 		break;
 
 	case IB_QPS_ERR:
-		ipath_error_qp(qp, IB_WC_GENERAL_ERR);
+		ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 		break;
 
 	default:
@@ -559,6 +584,12 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	if (attr_mask & IB_QP_QKEY)
 		qp->qkey = attr->qkey;
 
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+		qp->s_max_rd_atomic = attr->max_rd_atomic;
+
 	qp->state = new_state;
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
@@ -598,8 +629,8 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	attr->alt_pkey_index = 0;
 	attr->en_sqd_async_notify = 0;
 	attr->sq_draining = 0;
-	attr->max_rd_atomic = 1;
-	attr->max_dest_rd_atomic = 1;
+	attr->max_rd_atomic = qp->s_max_rd_atomic;
+	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
 	attr->min_rnr_timer = qp->r_min_rnr_timer;
 	attr->port_num = 1;
 	attr->timeout = qp->timeout;
@@ -614,7 +645,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	init_attr->recv_cq = qp->ibqp.recv_cq;
 	init_attr->srq = qp->ibqp.srq;
 	init_attr->cap = attr->cap;
-	if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
+	if (qp->s_flags & IPATH_S_SIGNAL_REQ_WR)
 		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
 	else
 		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
@@ -786,7 +817,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 		qp->s_size = init_attr->cap.max_send_wr + 1;
 		qp->s_max_sge = init_attr->cap.max_send_sge;
 		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
+			qp->s_flags = IPATH_S_SIGNAL_REQ_WR;
 		else
 			qp->s_flags = 0;
 		dev = to_idev(ibpd->device);
@@ -958,7 +989,7 @@ bail:
  * @wc: the WC responsible for putting the QP in this state
  *
  * Flushes the send work queue.
- * The QP s_lock should be held.
+ * The QP s_lock should be held and interrupts disabled.
  */
 
 void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
@@ -966,7 +997,7 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
 
-	ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
+	ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
 		  qp->ibqp.qp_num, qp->remote_qpn, wc->status);
 
 	spin_lock(&dev->pending_lock);
@@ -984,12 +1015,12 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
 	wc->status = IB_WC_WR_FLUSH_ERR;
 
 	while (qp->s_last != qp->s_head) {
+		wqe = get_swqe_ptr(qp, qp->s_last);
 		wc->wr_id = wqe->wr.wr_id;
 		wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 		ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
 		if (++qp->s_last >= qp->s_size)
 			qp->s_last = 0;
-		wqe = get_swqe_ptr(qp, qp->s_last);
 	}
 	qp->s_cur = qp->s_tail = qp->s_head;
 	qp->state = IB_QPS_SQE;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 5ff20cb0449..b4b88d0b53f 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -37,6 +37,19 @@
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_RC_##x
 
+static u32 restart_sge(struct ipath_sge_state *ss, struct ipath_swqe *wqe,
+		       u32 psn, u32 pmtu)
+{
+	u32 len;
+
+	len = ((psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
+	ss->sge = wqe->sg_list[0];
+	ss->sg_list = wqe->sg_list + 1;
+	ss->num_sge = wqe->wr.num_sge;
+	ipath_skip_sge(ss, len);
+	return wqe->length - len;
+}
+
 /**
  * ipath_init_restart- initialize the qp->s_sge after a restart
  * @qp: the QP who's SGE we're restarting
@@ -47,15 +60,9 @@
 static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
 {
 	struct ipath_ibdev *dev;
-	u32 len;
 
-	len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) *
-		ib_mtu_enum_to_int(qp->path_mtu);
-	qp->s_sge.sge = wqe->sg_list[0];
-	qp->s_sge.sg_list = wqe->sg_list + 1;
-	qp->s_sge.num_sge = wqe->wr.num_sge;
-	ipath_skip_sge(&qp->s_sge, len);
-	qp->s_len = wqe->length - len;
+	qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn,
+				ib_mtu_enum_to_int(qp->path_mtu));
 	dev = to_idev(qp->ibqp.device);
 	spin_lock(&dev->pending_lock);
 	if (list_empty(&qp->timerwait))
@@ -70,107 +77,123 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
  * @ohdr: a pointer to the IB header being constructed
  * @pmtu: the path MTU
  *
- * Return bth0 if constructed; otherwise, return 0.
+ * Return 1 if constructed; otherwise, return 0.
+ * Note that we are in the responder's side of the QP context.
  * Note the QP s_lock must be held.
  */
-u32 ipath_make_rc_ack(struct ipath_qp *qp,
-		      struct ipath_other_headers *ohdr,
-		      u32 pmtu)
+static int ipath_make_rc_ack(struct ipath_qp *qp,
+			     struct ipath_other_headers *ohdr,
+			     u32 pmtu, u32 *bth0p, u32 *bth2p)
 {
+	struct ipath_ack_entry *e;
 	u32 hwords;
 	u32 len;
 	u32 bth0;
+	u32 bth2;
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
 
-	/*
-	 * Send a response.  Note that we are in the responder's
-	 * side of the QP context.
-	 */
 	switch (qp->s_ack_state) {
-	case OP(RDMA_READ_REQUEST):
-		qp->s_cur_sge = &qp->s_rdma_sge;
-		len = qp->s_rdma_len;
-		if (len > pmtu) {
-			len = pmtu;
-			qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-		} else
-			qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
-		qp->s_rdma_len -= len;
+	case OP(RDMA_READ_RESPONSE_LAST):
+	case OP(RDMA_READ_RESPONSE_ONLY):
+	case OP(ATOMIC_ACKNOWLEDGE):
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		/* FALLTHROUGH */
+	case OP(ACKNOWLEDGE):
+		/* Check for no next entry in the queue. */
+		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
+			if (qp->s_flags & IPATH_S_ACK_PENDING)
+				goto normal;
+			goto bail;
+		}
+
+		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+		if (e->opcode == OP(RDMA_READ_REQUEST)) {
+			/* Copy SGE state in case we need to resend */
+			qp->s_ack_rdma_sge = e->rdma_sge;
+			qp->s_cur_sge = &qp->s_ack_rdma_sge;
+			len = e->rdma_sge.sge.sge_length;
+			if (len > pmtu) {
+				len = pmtu;
+				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
+			} else {
+				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+				if (++qp->s_tail_ack_queue >
+				    IPATH_MAX_RDMA_ATOMIC)
+					qp->s_tail_ack_queue = 0;
+			}
+			ohdr->u.aeth = ipath_compute_aeth(qp);
+			hwords++;
+			qp->s_ack_rdma_psn = e->psn;
+			bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
+		} else {
+			/* COMPARE_SWAP or FETCH_ADD */
+			qp->s_cur_sge = NULL;
+			len = 0;
+			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+			ohdr->u.at.aeth = ipath_compute_aeth(qp);
+			ohdr->u.at.atomic_ack_eth[0] =
+				cpu_to_be32(e->atomic_data >> 32);
+			ohdr->u.at.atomic_ack_eth[1] =
+				cpu_to_be32(e->atomic_data);
+			hwords += sizeof(ohdr->u.at) / sizeof(u32);
+			bth2 = e->psn;
+			if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+				qp->s_tail_ack_queue = 0;
+		}
 		bth0 = qp->s_ack_state << 24;
-		ohdr->u.aeth = ipath_compute_aeth(qp);
-		hwords++;
 		break;
 
 	case OP(RDMA_READ_RESPONSE_FIRST):
 		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
 		/* FALLTHROUGH */
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
-		qp->s_cur_sge = &qp->s_rdma_sge;
-		len = qp->s_rdma_len;
+		len = qp->s_ack_rdma_sge.sge.sge_length;
 		if (len > pmtu)
 			len = pmtu;
 		else {
 			ohdr->u.aeth = ipath_compute_aeth(qp);
 			hwords++;
 			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+			if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+				qp->s_tail_ack_queue = 0;
 		}
-		qp->s_rdma_len -= len;
 		bth0 = qp->s_ack_state << 24;
-		break;
-
-	case OP(RDMA_READ_RESPONSE_LAST):
-	case OP(RDMA_READ_RESPONSE_ONLY):
-		/*
-		 * We have to prevent new requests from changing
-		 * the r_sge state while a ipath_verbs_send()
-		 * is in progress.
-		 */
-		qp->s_ack_state = OP(ACKNOWLEDGE);
-		bth0 = 0;
-		goto bail;
-
-	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD):
-		qp->s_cur_sge = NULL;
-		len = 0;
-		/*
-		 * Set the s_ack_state so the receive interrupt handler
-		 * won't try to send an ACK (out of order) until this one
-		 * is actually sent.
-		 */
-		qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-		bth0 = OP(ATOMIC_ACKNOWLEDGE) << 24;
-		ohdr->u.at.aeth = ipath_compute_aeth(qp);
-		ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
-		hwords += sizeof(ohdr->u.at) / 4;
+		bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
 		break;
 
 	default:
-		/* Send a regular ACK. */
-		qp->s_cur_sge = NULL;
-		len = 0;
+	normal:
 		/*
-		 * Set the s_ack_state so the receive interrupt handler
-		 * won't try to send an ACK (out of order) until this one
-		 * is actually sent.
+		 * Send a regular ACK.
+		 * Set the s_ack_state so we wait until after sending
+		 * the ACK before setting s_ack_state to ACKNOWLEDGE
+		 * (see above).
 		 */
-		qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-		bth0 = OP(ACKNOWLEDGE) << 24;
+		qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+		qp->s_flags &= ~IPATH_S_ACK_PENDING;
+		qp->s_cur_sge = NULL;
 		if (qp->s_nak_state)
-			ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
-						    (qp->s_nak_state <<
-						     IPATH_AETH_CREDIT_SHIFT));
+			ohdr->u.aeth =
+				cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
+					    (qp->s_nak_state <<
+					     IPATH_AETH_CREDIT_SHIFT));
 		else
 			ohdr->u.aeth = ipath_compute_aeth(qp);
 		hwords++;
+		len = 0;
+		bth0 = OP(ACKNOWLEDGE) << 24;
+		bth2 = qp->s_ack_psn & IPATH_PSN_MASK;
 	}
 	qp->s_hdrwords = hwords;
 	qp->s_cur_size = len;
+	*bth0p = bth0;
+	*bth2p = bth2;
+	return 1;
 
 bail:
-	return bth0;
+	return 0;
 }
 
 /**
@@ -197,9 +220,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 	u32 bth2;
 	char newreq;
 
+	/* Sending responses has higher priority over sending requests. */
+	if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+	     (qp->s_flags & IPATH_S_ACK_PENDING) ||
+	     qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) &&
+	    ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
+		goto done;
+
 	if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
 	    qp->s_rnr_timeout)
-		goto done;
+		goto bail;
 
 	/* Limit the number of packets sent without an ACK. */
 	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
@@ -210,7 +240,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			list_add_tail(&qp->timerwait,
 				      &dev->pending[dev->pending_index]);
 		spin_unlock(&dev->pending_lock);
-		goto done;
+		goto bail;
 	}
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -232,7 +262,16 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 		if (qp->s_cur == qp->s_tail) {
 			/* Check if send work queue is empty. */
 			if (qp->s_tail == qp->s_head)
-				goto done;
+				goto bail;
+			/*
+			 * If a fence is requested, wait for previous
+			 * RDMA read and atomic operations to finish.
+			 */
+			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
+			    qp->s_num_rd_atomic) {
+				qp->s_flags |= IPATH_S_FENCE_PENDING;
+				goto bail;
+			}
 			wqe->psn = qp->s_next_psn;
 			newreq = 1;
 		}
@@ -250,7 +289,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			/* If no credit, return. */
 			if (qp->s_lsn != (u32) -1 &&
 			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
-				goto done;
+				goto bail;
 			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
 				wqe->lpsn += (len - 1) / pmtu;
@@ -281,13 +320,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			/* If no credit, return. */
 			if (qp->s_lsn != (u32) -1 &&
 			    ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0)
-				goto done;
+				goto bail;
 			ohdr->u.rc.reth.vaddr =
 				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
 			ohdr->u.rc.reth.rkey =
 				cpu_to_be32(wqe->wr.wr.rdma.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			hwords += sizeof(struct ib_reth) / 4;
+			hwords += sizeof(struct ib_reth) / sizeof(u32);
 			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
 				wqe->lpsn += (len - 1) / pmtu;
@@ -312,14 +351,17 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			break;
 
 		case IB_WR_RDMA_READ:
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
-			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->wr.wr.rdma.rkey);
-			ohdr->u.rc.reth.length = cpu_to_be32(len);
-			qp->s_state = OP(RDMA_READ_REQUEST);
-			hwords += sizeof(ohdr->u.rc.reth) / 4;
+			/*
+			 * Don't allow more operations to be started
+			 * than the QP limits allow.
+			 */
 			if (newreq) {
+				if (qp->s_num_rd_atomic >=
+				    qp->s_max_rd_atomic) {
+					qp->s_flags |= IPATH_S_RDMAR_PENDING;
+					goto bail;
+				}
+				qp->s_num_rd_atomic++;
 				if (qp->s_lsn != (u32) -1)
 					qp->s_lsn++;
 				/*
@@ -330,6 +372,13 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 					qp->s_next_psn += (len - 1) / pmtu;
 				wqe->lpsn = qp->s_next_psn++;
 			}
+			ohdr->u.rc.reth.vaddr =
+				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+			ohdr->u.rc.reth.rkey =
+				cpu_to_be32(wqe->wr.wr.rdma.rkey);
+			ohdr->u.rc.reth.length = cpu_to_be32(len);
+			qp->s_state = OP(RDMA_READ_REQUEST);
+			hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 			ss = NULL;
 			len = 0;
 			if (++qp->s_cur == qp->s_size)
@@ -338,32 +387,48 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 
 		case IB_WR_ATOMIC_CMP_AND_SWP:
 		case IB_WR_ATOMIC_FETCH_AND_ADD:
-			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP)
-				qp->s_state = OP(COMPARE_SWAP);
-			else
-				qp->s_state = OP(FETCH_ADD);
-			ohdr->u.atomic_eth.vaddr = cpu_to_be64(
-				wqe->wr.wr.atomic.remote_addr);
-			ohdr->u.atomic_eth.rkey = cpu_to_be32(
-				wqe->wr.wr.atomic.rkey);
-			ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-				wqe->wr.wr.atomic.swap);
-			ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-				wqe->wr.wr.atomic.compare_add);
-			hwords += sizeof(struct ib_atomic_eth) / 4;
+			/*
+			 * Don't allow more operations to be started
+			 * than the QP limits allow.
+			 */
 			if (newreq) {
+				if (qp->s_num_rd_atomic >=
+				    qp->s_max_rd_atomic) {
+					qp->s_flags |= IPATH_S_RDMAR_PENDING;
+					goto bail;
+				}
+				qp->s_num_rd_atomic++;
 				if (qp->s_lsn != (u32) -1)
 					qp->s_lsn++;
 				wqe->lpsn = wqe->psn;
 			}
-			if (++qp->s_cur == qp->s_size)
-				qp->s_cur = 0;
+			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+				qp->s_state = OP(COMPARE_SWAP);
+				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+					wqe->wr.wr.atomic.swap);
+				ohdr->u.atomic_eth.compare_data = cpu_to_be64(
+					wqe->wr.wr.atomic.compare_add);
+			} else {
+				qp->s_state = OP(FETCH_ADD);
+				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+					wqe->wr.wr.atomic.compare_add);
+				ohdr->u.atomic_eth.compare_data = 0;
+			}
+			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
+				wqe->wr.wr.atomic.remote_addr >> 32);
+			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
+				wqe->wr.wr.atomic.remote_addr);
+			ohdr->u.atomic_eth.rkey = cpu_to_be32(
+				wqe->wr.wr.atomic.rkey);
+			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
 			ss = NULL;
 			len = 0;
+			if (++qp->s_cur == qp->s_size)
+				qp->s_cur = 0;
 			break;
 
 		default:
-			goto done;
+			goto bail;
 		}
 		qp->s_sge.sge = wqe->sg_list[0];
 		qp->s_sge.sg_list = wqe->sg_list + 1;
@@ -379,7 +444,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			qp->s_psn = wqe->lpsn + 1;
 		else {
 			qp->s_psn++;
-			if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+			if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 				qp->s_next_psn = qp->s_psn;
 		}
 		/*
@@ -406,7 +471,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 		/* FALLTHROUGH */
 	case OP(SEND_MIDDLE):
 		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
@@ -442,7 +507,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 		/* FALLTHROUGH */
 	case OP(RDMA_WRITE_MIDDLE):
 		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		len = qp->s_len;
@@ -479,9 +544,9 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 			cpu_to_be32(wqe->wr.wr.rdma.rkey);
 		ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
 		qp->s_state = OP(RDMA_READ_REQUEST);
-		hwords += sizeof(ohdr->u.rc.reth) / 4;
+		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
 		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if ((int)(qp->s_psn - qp->s_next_psn) > 0)
+		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
 			qp->s_next_psn = qp->s_psn;
 		ss = NULL;
 		len = 0;
@@ -489,20 +554,6 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 		if (qp->s_cur == qp->s_size)
 			qp->s_cur = 0;
 		break;
-
-	case OP(RDMA_READ_REQUEST):
-	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD):
-		/*
-		 * We shouldn't start anything new until this request is
-		 * finished.  The ACK will handle rescheduling us.  XXX The
-		 * number of outstanding ones is negotiated at connection
-		 * setup time (see pg. 258,289)?  XXX Also, if we support
-		 * multiple outstanding requests, we need to check the WQE
-		 * IB_SEND_FENCE flag and not send a new request if a RDMA
-		 * read or atomic is pending.
-		 */
-		goto done;
 	}
 	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
 		bth2 |= 1 << 31;	/* Request ACK. */
@@ -512,9 +563,10 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 	qp->s_cur_size = len;
 	*bth0p = bth0 | (qp->s_state << 24);
 	*bth2p = bth2;
+done:
 	return 1;
 
-done:
+bail:
 	return 0;
 }
 
@@ -524,7 +576,8 @@ done:
  *
  * This is called from ipath_rc_rcv() and only uses the receive
  * side QP state.
- * Note that RDMA reads are handled in the send side QP state and tasklet.
+ * Note that RDMA reads and atomics are handled in the
+ * send side QP state and tasklet.
  */
 static void send_rc_ack(struct ipath_qp *qp)
 {
@@ -535,6 +588,10 @@ static void send_rc_ack(struct ipath_qp *qp)
 	struct ipath_ib_header hdr;
 	struct ipath_other_headers *ohdr;
 
+	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
+	if (qp->r_head_ack_queue != qp->s_tail_ack_queue)
+		goto queue_ack;
+
 	/* Construct the header. */
 	ohdr = &hdr.u.oth;
 	lrh0 = IPATH_LRH_BTH;
@@ -548,19 +605,14 @@ static void send_rc_ack(struct ipath_qp *qp)
 		lrh0 = IPATH_LRH_GRH;
 	}
 	/* read pkey_index w/o lock (its atomic) */
-	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
+	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
+		OP(ACKNOWLEDGE) << 24;
 	if (qp->r_nak_state)
 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
 					    (qp->r_nak_state <<
 					     IPATH_AETH_CREDIT_SHIFT));
 	else
 		ohdr->u.aeth = ipath_compute_aeth(qp);
-	if (qp->r_ack_state >= OP(COMPARE_SWAP)) {
-		bth0 |= OP(ATOMIC_ACKNOWLEDGE) << 24;
-		ohdr->u.at.atomic_ack_eth = cpu_to_be64(qp->r_atomic_data);
-		hwords += sizeof(ohdr->u.at.atomic_ack_eth) / 4;
-	} else
-		bth0 |= OP(ACKNOWLEDGE) << 24;
 	lrh0 |= qp->remote_ah_attr.sl << 4;
 	hdr.lrh[0] = cpu_to_be16(lrh0);
 	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
@@ -574,31 +626,31 @@ static void send_rc_ack(struct ipath_qp *qp)
 	 * If we can send the ACK, clear the ACK state.
 	 */
 	if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
-		qp->r_ack_state = OP(ACKNOWLEDGE);
 		dev->n_unicast_xmit++;
-	} else {
-		/*
-		 * We are out of PIO buffers at the moment.
-		 * Pass responsibility for sending the ACK to the
-		 * send tasklet so that when a PIO buffer becomes
-		 * available, the ACK is sent ahead of other outgoing
-		 * packets.
-		 */
-		dev->n_rc_qacks++;
-		spin_lock_irq(&qp->s_lock);
-		/* Don't coalesce if a RDMA read or atomic is pending. */
-		if (qp->s_ack_state == OP(ACKNOWLEDGE) ||
-		    qp->s_ack_state < OP(RDMA_READ_REQUEST)) {
-			qp->s_ack_state = qp->r_ack_state;
-			qp->s_nak_state = qp->r_nak_state;
-			qp->s_ack_psn = qp->r_ack_psn;
-			qp->r_ack_state = OP(ACKNOWLEDGE);
-		}
-		spin_unlock_irq(&qp->s_lock);
-
-		/* Call ipath_do_rc_send() in another thread. */
-		tasklet_hi_schedule(&qp->s_task);
+		goto done;
 	}
+
+	/*
+	 * We are out of PIO buffers at the moment.
+	 * Pass responsibility for sending the ACK to the
+	 * send tasklet so that when a PIO buffer becomes
+	 * available, the ACK is sent ahead of other outgoing
+	 * packets.
+	 */
+	dev->n_rc_qacks++;
+
+queue_ack:
+	spin_lock_irq(&qp->s_lock);
+	qp->s_flags |= IPATH_S_ACK_PENDING;
+	qp->s_nak_state = qp->r_nak_state;
+	qp->s_ack_psn = qp->r_ack_psn;
+	spin_unlock_irq(&qp->s_lock);
+
+	/* Call ipath_do_rc_send() in another thread. */
+	tasklet_hi_schedule(&qp->s_task);
+
+done:
+	return;
 }
 
 /**
@@ -727,7 +779,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
 		dev->n_rc_resends++;
 	else
-		dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+		dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
 
 	reset_psn(qp, psn);
 	tasklet_hi_schedule(&qp->s_task);
@@ -775,10 +827,6 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 		list_del_init(&qp->timerwait);
 	spin_unlock(&dev->pending_lock);
 
-	/* Nothing is pending to ACK/NAK. */
-	if (unlikely(qp->s_last == qp->s_tail))
-		goto bail;
-
 	/*
 	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
 	 * requests and implicitly NAK RDMA read and atomic requests issued
@@ -806,7 +854,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 		 */
 		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
 		     (opcode != OP(RDMA_READ_RESPONSE_LAST) ||
-		       ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
+		      ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
 		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
@@ -824,20 +872,33 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 			 */
 			goto bail;
 		}
-		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
-		    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-			tasklet_hi_schedule(&qp->s_task);
+		if (qp->s_num_rd_atomic &&
+		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
+		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
+			qp->s_num_rd_atomic--;
+			/* Restart sending task if fence is complete */
+			if ((qp->s_flags & IPATH_S_FENCE_PENDING) &&
+			    !qp->s_num_rd_atomic) {
+				qp->s_flags &= ~IPATH_S_FENCE_PENDING;
+				tasklet_hi_schedule(&qp->s_task);
+			} else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
+				qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
+				tasklet_hi_schedule(&qp->s_task);
+			}
+		}
 		/* Post a send completion queue entry if requested. */
-		if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+		if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			wc.wr_id = wqe->wr.wr_id;
 			wc.status = IB_WC_SUCCESS;
 			wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
 			wc.vendor_err = 0;
 			wc.byte_len = wqe->length;
+			wc.imm_data = 0;
 			wc.qp = &qp->ibqp;
 			wc.src_qp = qp->remote_qpn;
+			wc.wc_flags = 0;
 			wc.pkey_index = 0;
 			wc.slid = qp->remote_ah_attr.dlid;
 			wc.sl = qp->remote_ah_attr.sl;
@@ -854,15 +915,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 		if (qp->s_last == qp->s_cur) {
 			if (++qp->s_cur >= qp->s_size)
 				qp->s_cur = 0;
+			qp->s_last = qp->s_cur;
+			if (qp->s_last == qp->s_tail)
+				break;
 			wqe = get_swqe_ptr(qp, qp->s_cur);
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = wqe->psn;
+		} else {
+			if (++qp->s_last >= qp->s_size)
+				qp->s_last = 0;
+			if (qp->s_last == qp->s_tail)
+				break;
+			wqe = get_swqe_ptr(qp, qp->s_last);
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
-		wqe = get_swqe_ptr(qp, qp->s_last);
-		if (qp->s_last == qp->s_tail)
-			break;
 	}
 
 	switch (aeth >> 29) {
@@ -874,6 +939,18 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 			list_add_tail(&qp->timerwait,
 				      &dev->pending[dev->pending_index]);
 			spin_unlock(&dev->pending_lock);
+			/*
+			 * If we get a partial ACK for a resent operation,
+			 * we can stop resending the earlier packets and
+			 * continue with the next packet the receiver wants.
+			 */
+			if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+				reset_psn(qp, psn + 1);
+				tasklet_hi_schedule(&qp->s_task);
+			}
+		} else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
+			qp->s_state = OP(SEND_LAST);
+			qp->s_psn = psn + 1;
 		}
 		ipath_get_credit(qp, aeth);
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
@@ -884,22 +961,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 
 	case 1:		/* RNR NAK */
 		dev->n_rnr_naks++;
+		if (qp->s_last == qp->s_tail)
+			goto bail;
 		if (qp->s_rnr_retry == 0) {
-			if (qp->s_last == qp->s_tail)
-				goto bail;
-
 			wc.status = IB_WC_RNR_RETRY_EXC_ERR;
 			goto class_b;
 		}
 		if (qp->s_rnr_retry_cnt < 7)
 			qp->s_rnr_retry--;
-		if (qp->s_last == qp->s_tail)
-			goto bail;
 
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 
-		dev->n_rc_resends += (int)qp->s_psn - (int)psn;
+		if (wqe->wr.opcode == IB_WR_RDMA_READ)
+			dev->n_rc_resends++;
+		else
+			dev->n_rc_resends +=
+				(qp->s_psn - psn) & IPATH_PSN_MASK;
 
 		reset_psn(qp, psn);
 
@@ -910,26 +988,20 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 		goto bail;
 
 	case 3:		/* NAK */
-		/* The last valid PSN seen is the previous request's. */
-		if (qp->s_last != qp->s_tail)
-			update_last_psn(qp, wqe->psn - 1);
+		if (qp->s_last == qp->s_tail)
+			goto bail;
+		/* The last valid PSN is the previous PSN. */
+		update_last_psn(qp, psn - 1);
 		switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
 			IPATH_AETH_CREDIT_MASK) {
 		case 0:	/* PSN sequence error */
 			dev->n_seq_naks++;
 			/*
-			 * Back up to the responder's expected PSN.  XXX
+			 * Back up to the responder's expected PSN.
 			 * Note that we might get a NAK in the middle of an
 			 * RDMA READ response which terminates the RDMA
 			 * READ.
 			 */
-			if (qp->s_last == qp->s_tail)
-				break;
-
-			if (ipath_cmp24(psn, wqe->psn) < 0)
-				break;
-
-			/* Retry the request. */
 			ipath_restart_rc(qp, psn, &wc);
 			break;
 
@@ -1003,6 +1075,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 				     u32 psn, u32 hdrsize, u32 pmtu,
 				     int header_in_data)
 {
+	struct ipath_swqe *wqe;
 	unsigned long flags;
 	struct ib_wc wc;
 	int diff;
@@ -1032,6 +1105,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 		goto ack_done;
 	}
 
+	if (unlikely(qp->s_last == qp->s_tail))
+		goto ack_done;
+	wqe = get_swqe_ptr(qp, qp->s_last);
+
 	switch (opcode) {
 	case OP(ACKNOWLEDGE):
 	case OP(ATOMIC_ACKNOWLEDGE):
@@ -1042,38 +1119,49 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 			aeth = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
 		}
-		if (opcode == OP(ATOMIC_ACKNOWLEDGE))
-			*(u64 *) qp->s_sge.sge.vaddr = *(u64 *) data;
+		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
+			u64 val;
+
+			if (!header_in_data) {
+				__be32 *p = ohdr->u.at.atomic_ack_eth;
+
+				val = ((u64) be32_to_cpu(p[0]) << 32) |
+					be32_to_cpu(p[1]);
+			} else
+				val = be64_to_cpu(((__be64 *) data)[0]);
+			*(u64 *) wqe->sg_list[0].vaddr = val;
+		}
 		if (!do_rc_ack(qp, aeth, psn, opcode) ||
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
 		hdrsize += 4;
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
 		/*
-		 * do_rc_ack() has already checked the PSN so skip
-		 * the sequence check.
+		 * If this is a response to a resent RDMA read, we
+		 * have to be careful to copy the data to the right
+		 * location.
 		 */
-		goto rdma_read;
+		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+						  wqe, psn, pmtu);
+		goto read_middle;
 
 	case OP(RDMA_READ_RESPONSE_MIDDLE):
 		/* no AETH, no ACK */
 		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
 			dev->n_rdma_seq++;
-			if (qp->s_last != qp->s_tail)
-				ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
 			goto ack_done;
 		}
-	rdma_read:
-		if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
-			goto ack_done;
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
+	read_middle:
 		if (unlikely(tlen != (hdrsize + pmtu + 4)))
-			goto ack_done;
-		if (unlikely(pmtu >= qp->s_len))
-			goto ack_done;
+			goto ack_len_err;
+		if (unlikely(pmtu >= qp->s_rdma_read_len))
+			goto ack_len_err;
+
 		/* We got a response so update the timeout. */
-		if (unlikely(qp->s_last == qp->s_tail ||
-			     get_swqe_ptr(qp, qp->s_last)->wr.opcode !=
-			     IB_WR_RDMA_READ))
-			goto ack_done;
 		spin_lock(&dev->pending_lock);
 		if (qp->s_rnr_timeout == 0 && !list_empty(&qp->timerwait))
 			list_move_tail(&qp->timerwait,
@@ -1082,67 +1170,97 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 		/*
 		 * Update the RDMA receive state but do the copy w/o
 		 * holding the locks and blocking interrupts.
-		 * XXX Yet another place that affects relaxed RDMA order
-		 * since we don't want s_sge modified.
 		 */
-		qp->s_len -= pmtu;
+		qp->s_rdma_read_len -= pmtu;
 		update_last_psn(qp, psn);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
-		ipath_copy_sge(&qp->s_sge, data, pmtu);
+		ipath_copy_sge(&qp->s_rdma_read_sge, data, pmtu);
 		goto bail;
 
-	case OP(RDMA_READ_RESPONSE_LAST):
-		/* ACKs READ req. */
+	case OP(RDMA_READ_RESPONSE_ONLY):
 		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
 			dev->n_rdma_seq++;
-			if (qp->s_last != qp->s_tail)
-				ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
 			goto ack_done;
 		}
-		/* FALLTHROUGH */
-	case OP(RDMA_READ_RESPONSE_ONLY):
-		if (unlikely(qp->s_state != OP(RDMA_READ_REQUEST)))
-			goto ack_done;
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
+		/* Get the number of bytes the message was padded by. */
+		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+		/*
+		 * Check that the data size is >= 0 && <= pmtu.
+		 * Remember to account for the AETH header (4) and
+		 * ICRC (4).
+		 */
+		if (unlikely(tlen < (hdrsize + pad + 8)))
+			goto ack_len_err;
 		/*
-		 * Get the number of bytes the message was padded by.
+		 * If this is a response to a resent RDMA read, we
+		 * have to be careful to copy the data to the right
+		 * location.
 		 */
+		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+						  wqe, psn, pmtu);
+		goto read_last;
+
+	case OP(RDMA_READ_RESPONSE_LAST):
+		/* ACKs READ req. */
+		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
+			dev->n_rdma_seq++;
+			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+			goto ack_done;
+		}
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
+		/* Get the number of bytes the message was padded by. */
 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
 		/*
 		 * Check that the data size is >= 1 && <= pmtu.
 		 * Remember to account for the AETH header (4) and
 		 * ICRC (4).
 		 */
-		if (unlikely(tlen <= (hdrsize + pad + 8))) {
-			/* XXX Need to generate an error CQ entry. */
-			goto ack_done;
-		}
+		if (unlikely(tlen <= (hdrsize + pad + 8)))
+			goto ack_len_err;
+	read_last:
 		tlen -= hdrsize + pad + 8;
-		if (unlikely(tlen != qp->s_len)) {
-			/* XXX Need to generate an error CQ entry. */
-			goto ack_done;
-		}
+		if (unlikely(tlen != qp->s_rdma_read_len))
+			goto ack_len_err;
 		if (!header_in_data)
 			aeth = be32_to_cpu(ohdr->u.aeth);
 		else {
 			aeth = be32_to_cpu(((__be32 *) data)[0]);
 			data += sizeof(__be32);
 		}
-		ipath_copy_sge(&qp->s_sge, data, tlen);
-		if (do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST))) {
-			/*
-			 * Change the state so we contimue
-			 * processing new requests and wake up the
-			 * tasklet if there are posted sends.
-			 */
-			qp->s_state = OP(SEND_LAST);
-			if (qp->s_tail != qp->s_head)
-				tasklet_hi_schedule(&qp->s_task);
-		}
+		ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
+		(void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
 		goto ack_done;
 	}
 
 ack_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
+	goto bail;
+
+ack_op_err:
+	wc.status = IB_WC_LOC_QP_OP_ERR;
+	goto ack_err;
+
+ack_len_err:
+	wc.status = IB_WC_LOC_LEN_ERR;
+ack_err:
+	wc.wr_id = wqe->wr.wr_id;
+	wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+	wc.vendor_err = 0;
+	wc.byte_len = 0;
+	wc.imm_data = 0;
+	wc.qp = &qp->ibqp;
+	wc.src_qp = qp->remote_qpn;
+	wc.wc_flags = 0;
+	wc.pkey_index = 0;
+	wc.slid = qp->remote_ah_attr.dlid;
+	wc.sl = qp->remote_ah_attr.sl;
+	wc.dlid_path_bits = 0;
+	wc.port_num = 0;
+	ipath_sqerror_qp(qp, &wc);
 bail:
 	return;
 }
@@ -1162,7 +1280,7 @@ bail:
  * incoming RC packet for the given QP.
  * Called at interrupt level.
  * Return 1 if no more processing is needed; otherwise return 0 to
- * schedule a response to be sent and the s_lock unlocked.
+ * schedule a response to be sent.
  */
 static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 				     struct ipath_other_headers *ohdr,
@@ -1173,25 +1291,23 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 				     int diff,
 				     int header_in_data)
 {
-	struct ib_reth *reth;
+	struct ipath_ack_entry *e;
+	u8 i, prev;
+	int old_req;
 
 	if (diff > 0) {
 		/*
 		 * Packet sequence error.
 		 * A NAK will ACK earlier sends and RDMA writes.
-		 * Don't queue the NAK if a RDMA read, atomic, or
-		 * NAK is pending though.
+		 * Don't queue the NAK if we already sent one.
 		 */
-		if (qp->s_ack_state != OP(ACKNOWLEDGE) ||
-		    qp->r_nak_state != 0)
-			goto done;
-		if (qp->r_ack_state < OP(COMPARE_SWAP)) {
-			qp->r_ack_state = OP(SEND_ONLY);
+		if (!qp->r_nak_state) {
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
+			goto send_ack;
 		}
-		goto send_ack;
+		goto done;
 	}
 
 	/*
@@ -1204,8 +1320,46 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 	 * can coalesce an outstanding duplicate ACK.  We have to
 	 * send the earliest so that RDMA reads can be restarted at
 	 * the requester's expected PSN.
+	 *
+	 * First, find where this duplicate PSN falls within the
+	 * ACKs previously sent.
 	 */
-	if (opcode == OP(RDMA_READ_REQUEST)) {
+	psn &= IPATH_PSN_MASK;
+	e = NULL;
+	old_req = 1;
+	spin_lock_irq(&qp->s_lock);
+	for (i = qp->r_head_ack_queue; ; i = prev) {
+		if (i == qp->s_tail_ack_queue)
+			old_req = 0;
+		if (i)
+			prev = i - 1;
+		else
+			prev = IPATH_MAX_RDMA_ATOMIC;
+		if (prev == qp->r_head_ack_queue) {
+			e = NULL;
+			break;
+		}
+		e = &qp->s_ack_queue[prev];
+		if (!e->opcode) {
+			e = NULL;
+			break;
+		}
+		if (ipath_cmp24(psn, e->psn) >= 0)
+			break;
+	}
+	switch (opcode) {
+	case OP(RDMA_READ_REQUEST): {
+		struct ib_reth *reth;
+		u32 offset;
+		u32 len;
+
+		/*
+		 * If we didn't find the RDMA read request in the ack queue,
+		 * or the send tasklet is already backed up to send an
+		 * earlier entry, we can ignore this request.
+		 */
+		if (!e || e->opcode != OP(RDMA_READ_REQUEST) || old_req)
+			goto unlock_done;
 		/* RETH comes after BTH */
 		if (!header_in_data)
 			reth = &ohdr->u.rc.reth;
@@ -1214,88 +1368,87 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 			data += sizeof(*reth);
 		}
 		/*
-		 * If we receive a duplicate RDMA request, it means the
-		 * requester saw a sequence error and needs to restart
-		 * from an earlier point.  We can abort the current
-		 * RDMA read send in that case.
+		 * Address range must be a subset of the original
+		 * request and start on pmtu boundaries.
+		 * We reuse the old ack_queue slot since the requester
+		 * should not back up and request an earlier PSN for the
+		 * same request.
 		 */
-		spin_lock_irq(&qp->s_lock);
-		if (qp->s_ack_state != OP(ACKNOWLEDGE) &&
-		    (qp->s_hdrwords || ipath_cmp24(psn, qp->s_ack_psn) >= 0)) {
-			/*
-			 * We are already sending earlier requested data.
-			 * Don't abort it to send later out of sequence data.
-			 */
-			spin_unlock_irq(&qp->s_lock);
-			goto done;
-		}
-		qp->s_rdma_len = be32_to_cpu(reth->length);
-		if (qp->s_rdma_len != 0) {
+		offset = ((psn - e->psn) & IPATH_PSN_MASK) *
+			ib_mtu_enum_to_int(qp->path_mtu);
+		len = be32_to_cpu(reth->length);
+		if (unlikely(offset + len > e->rdma_sge.sge.sge_length))
+			goto unlock_done;
+		if (len != 0) {
 			u32 rkey = be32_to_cpu(reth->rkey);
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
-			/*
-			 * Address range must be a subset of the original
-			 * request and start on pmtu boundaries.
-			 */
-			ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
-					   qp->s_rdma_len, vaddr, rkey,
+			ok = ipath_rkey_ok(qp, &e->rdma_sge,
+					   len, vaddr, rkey,
 					   IB_ACCESS_REMOTE_READ);
-			if (unlikely(!ok)) {
-				spin_unlock_irq(&qp->s_lock);
-				goto done;
-			}
+			if (unlikely(!ok))
+				goto unlock_done;
 		} else {
-			qp->s_rdma_sge.sg_list = NULL;
-			qp->s_rdma_sge.num_sge = 0;
-			qp->s_rdma_sge.sge.mr = NULL;
-			qp->s_rdma_sge.sge.vaddr = NULL;
-			qp->s_rdma_sge.sge.length = 0;
-			qp->s_rdma_sge.sge.sge_length = 0;
+			e->rdma_sge.sg_list = NULL;
+			e->rdma_sge.num_sge = 0;
+			e->rdma_sge.sge.mr = NULL;
+			e->rdma_sge.sge.vaddr = NULL;
+			e->rdma_sge.sge.length = 0;
+			e->rdma_sge.sge.sge_length = 0;
 		}
-		qp->s_ack_state = opcode;
-		qp->s_ack_psn = psn;
-		spin_unlock_irq(&qp->s_lock);
-		tasklet_hi_schedule(&qp->s_task);
-		goto send_ack;
+		e->psn = psn;
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		qp->s_tail_ack_queue = prev;
+		break;
 	}
 
-	/*
-	 * A pending RDMA read will ACK anything before it so
-	 * ignore earlier duplicate requests.
-	 */
-	if (qp->s_ack_state != OP(ACKNOWLEDGE))
-		goto done;
-
-	/*
-	 * If an ACK is pending, don't replace the pending ACK
-	 * with an earlier one since the later one will ACK the earlier.
-	 * Also, if we already have a pending atomic, send it.
-	 */
-	if (qp->r_ack_state != OP(ACKNOWLEDGE) &&
-	    (ipath_cmp24(psn, qp->r_ack_psn) <= 0 ||
-	     qp->r_ack_state >= OP(COMPARE_SWAP)))
-		goto send_ack;
-	switch (opcode) {
 	case OP(COMPARE_SWAP):
-	case OP(FETCH_ADD):
+	case OP(FETCH_ADD): {
 		/*
-		 * Check for the PSN of the last atomic operation
-		 * performed and resend the result if found.
+		 * If we didn't find the atomic request in the ack queue
+		 * or the send tasklet is already backed up to send an
+		 * earlier entry, we can ignore this request.
 		 */
-		if ((psn & IPATH_PSN_MASK) != qp->r_atomic_psn)
-			goto done;
+		if (!e || e->opcode != (u8) opcode || old_req)
+			goto unlock_done;
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		qp->s_tail_ack_queue = prev;
+		break;
+	}
+
+	default:
+		if (old_req)
+			goto unlock_done;
+		/*
+		 * Resend the most recent ACK if this request is
+		 * after all the previous RDMA reads and atomics.
+		 */
+		if (i == qp->r_head_ack_queue) {
+			spin_unlock_irq(&qp->s_lock);
+			qp->r_nak_state = 0;
+			qp->r_ack_psn = qp->r_psn - 1;
+			goto send_ack;
+		}
+		/*
+		 * Resend the RDMA read or atomic op which
+		 * ACKs this duplicate request.
+		 */
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+		qp->s_tail_ack_queue = i;
 		break;
 	}
-	qp->r_ack_state = opcode;
 	qp->r_nak_state = 0;
-	qp->r_ack_psn = psn;
-send_ack:
-	return 0;
+	spin_unlock_irq(&qp->s_lock);
+	tasklet_hi_schedule(&qp->s_task);
 
+unlock_done:
+	spin_unlock_irq(&qp->s_lock);
 done:
 	return 1;
+
+send_ack:
+	return 0;
 }
 
 static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
@@ -1391,15 +1544,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
 			break;
 	nack_inv:
-		/*
-		 * A NAK will ACK earlier sends and RDMA writes.
-		 * Don't queue the NAK if a RDMA read, atomic, or NAK
-		 * is pending though.
-		 */
-		if (qp->r_ack_state >= OP(COMPARE_SWAP))
-			goto send_ack;
 		ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
-		qp->r_ack_state = OP(SEND_ONLY);
 		qp->r_nak_state = IB_NAK_INVALID_REQUEST;
 		qp->r_ack_psn = qp->r_psn;
 		goto send_ack;
@@ -1441,9 +1586,8 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			 * Don't queue the NAK if a RDMA read or atomic
 			 * is pending though.
 			 */
-			if (qp->r_ack_state >= OP(COMPARE_SWAP))
-				goto send_ack;
-			qp->r_ack_state = OP(SEND_ONLY);
+			if (qp->r_nak_state)
+				goto done;
 			qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
 			qp->r_ack_psn = qp->r_psn;
 			goto send_ack;
@@ -1567,7 +1711,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			goto rnr_nak;
 		goto send_last_imm;
 
-	case OP(RDMA_READ_REQUEST):
+	case OP(RDMA_READ_REQUEST): {
+		struct ipath_ack_entry *e;
+		u32 len;
+		u8 next;
+
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+			goto nack_acc;
+		next = qp->r_head_ack_queue + 1;
+		if (next > IPATH_MAX_RDMA_ATOMIC)
+			next = 0;
+		if (unlikely(next == qp->s_tail_ack_queue))
+			goto nack_inv;
+		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		/* RETH comes after BTH */
 		if (!header_in_data)
 			reth = &ohdr->u.rc.reth;
@@ -1575,72 +1731,75 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			reth = (struct ib_reth *)data;
 			data += sizeof(*reth);
 		}
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ)))
-			goto nack_acc;
-		spin_lock_irq(&qp->s_lock);
-		qp->s_rdma_len = be32_to_cpu(reth->length);
-		if (qp->s_rdma_len != 0) {
+		len = be32_to_cpu(reth->length);
+		if (len) {
 			u32 rkey = be32_to_cpu(reth->rkey);
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
-					   qp->s_rdma_len, vaddr, rkey,
-					   IB_ACCESS_REMOTE_READ);
-			if (unlikely(!ok)) {
-				spin_unlock_irq(&qp->s_lock);
+			ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+					   rkey, IB_ACCESS_REMOTE_READ);
+			if (unlikely(!ok))
 				goto nack_acc;
-			}
 			/*
 			 * Update the next expected PSN.  We add 1 later
 			 * below, so only add the remainder here.
 			 */
-			if (qp->s_rdma_len > pmtu)
-				qp->r_psn += (qp->s_rdma_len - 1) / pmtu;
+			if (len > pmtu)
+				qp->r_psn += (len - 1) / pmtu;
 		} else {
-			qp->s_rdma_sge.sg_list = NULL;
-			qp->s_rdma_sge.num_sge = 0;
-			qp->s_rdma_sge.sge.mr = NULL;
-			qp->s_rdma_sge.sge.vaddr = NULL;
-			qp->s_rdma_sge.sge.length = 0;
-			qp->s_rdma_sge.sge.sge_length = 0;
+			e->rdma_sge.sg_list = NULL;
+			e->rdma_sge.num_sge = 0;
+			e->rdma_sge.sge.mr = NULL;
+			e->rdma_sge.sge.vaddr = NULL;
+			e->rdma_sge.sge.length = 0;
+			e->rdma_sge.sge.sge_length = 0;
 		}
+		e->opcode = opcode;
+		e->psn = psn;
 		/*
 		 * We need to increment the MSN here instead of when we
 		 * finish sending the result since a duplicate request would
 		 * increment it more than once.
 		 */
 		qp->r_msn++;
-
-		qp->s_ack_state = opcode;
-		qp->s_ack_psn = psn;
-		spin_unlock_irq(&qp->s_lock);
-
 		qp->r_psn++;
 		qp->r_state = opcode;
 		qp->r_nak_state = 0;
+		barrier();
+		qp->r_head_ack_queue = next;
 
 		/* Call ipath_do_rc_send() in another thread. */
 		tasklet_hi_schedule(&qp->s_task);
 
 		goto done;
+	}
 
 	case OP(COMPARE_SWAP):
 	case OP(FETCH_ADD): {
 		struct ib_atomic_eth *ateth;
+		struct ipath_ack_entry *e;
 		u64 vaddr;
+		atomic64_t *maddr;
 		u64 sdata;
 		u32 rkey;
+		u8 next;
 
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_ATOMIC)))
+			goto nack_acc;
+		next = qp->r_head_ack_queue + 1;
+		if (next > IPATH_MAX_RDMA_ATOMIC)
+			next = 0;
+		if (unlikely(next == qp->s_tail_ack_queue))
+			goto nack_inv;
 		if (!header_in_data)
 			ateth = &ohdr->u.atomic_eth;
-		else {
+		else
 			ateth = (struct ib_atomic_eth *)data;
-			data += sizeof(*ateth);
-		}
-		vaddr = be64_to_cpu(ateth->vaddr);
+		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+			be32_to_cpu(ateth->vaddr[1]);
 		if (unlikely(vaddr & (sizeof(u64) - 1)))
 			goto nack_inv;
 		rkey = be32_to_cpu(ateth->rkey);
@@ -1649,63 +1808,50 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 					    sizeof(u64), vaddr, rkey,
 					    IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_acc;
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_ATOMIC)))
-			goto nack_acc;
 		/* Perform atomic OP and save result. */
+		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
 		sdata = be64_to_cpu(ateth->swap_data);
-		spin_lock_irq(&dev->pending_lock);
-		qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
-		if (opcode == OP(FETCH_ADD))
-			*(u64 *) qp->r_sge.sge.vaddr =
-				qp->r_atomic_data + sdata;
-		else if (qp->r_atomic_data ==
-			 be64_to_cpu(ateth->compare_data))
-			*(u64 *) qp->r_sge.sge.vaddr = sdata;
-		spin_unlock_irq(&dev->pending_lock);
+		e = &qp->s_ack_queue[qp->r_head_ack_queue];
+		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
+			(u64) atomic64_add_return(sdata, maddr) - sdata :
+			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+				      be64_to_cpu(ateth->compare_data),
+				      sdata);
+		e->opcode = opcode;
+		e->psn = psn & IPATH_PSN_MASK;
 		qp->r_msn++;
-		qp->r_atomic_psn = psn & IPATH_PSN_MASK;
-		psn |= 1 << 31;
-		break;
+		qp->r_psn++;
+		qp->r_state = opcode;
+		qp->r_nak_state = 0;
+		barrier();
+		qp->r_head_ack_queue = next;
+
+		/* Call ipath_do_rc_send() in another thread. */
+		tasklet_hi_schedule(&qp->s_task);
+
+		goto done;
 	}
 
 	default:
-		/* Drop packet for unknown opcodes. */
-		goto done;
+		/* NAK unknown opcodes. */
+		goto nack_inv;
 	}
 	qp->r_psn++;
 	qp->r_state = opcode;
+	qp->r_ack_psn = psn;
 	qp->r_nak_state = 0;
 	/* Send an ACK if requested or required. */
-	if (psn & (1 << 31)) {
-		/*
-		 * Coalesce ACKs unless there is a RDMA READ or
-		 * ATOMIC pending.
-		 */
-		if (qp->r_ack_state < OP(COMPARE_SWAP)) {
-			qp->r_ack_state = opcode;
-			qp->r_ack_psn = psn;
-		}
+	if (psn & (1 << 31))
 		goto send_ack;
-	}
 	goto done;
 
 nack_acc:
-	/*
-	 * A NAK will ACK earlier sends and RDMA writes.
-	 * Don't queue the NAK if a RDMA read, atomic, or NAK
-	 * is pending though.
-	 */
-	if (qp->r_ack_state < OP(COMPARE_SWAP)) {
-		ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
-		qp->r_ack_state = OP(RDMA_WRITE_ONLY);
-		qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
-		qp->r_ack_psn = qp->r_psn;
-	}
+	ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
+	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+	qp->r_ack_psn = qp->r_psn;
+
 send_ack:
-	/* Send ACK right away unless the send tasklet has a pending ACK. */
-	if (qp->s_ack_state == OP(ACKNOWLEDGE))
-		send_rc_ack(qp);
+	send_rc_ack(qp);
 
 done:
 	return;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index dffc76016d3..c182bcd6209 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -126,9 +126,18 @@
 #define INFINIPATH_E_RESET           0x0004000000000000ULL
 #define INFINIPATH_E_HARDWARE        0x0008000000000000ULL
 
+/*
+ * this is used to print "common" packet errors only when the
+ * __IPATH_ERRPKTDBG bit is set in ipath_debug.
+ */
+#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
+		| INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
+		| INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
+		| INFINIPATH_E_REBP )
+
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 /* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
- * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: eagerTID, 3: expTID
+ * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2:  expTID, 3: eagerTID
  * 		bit 4: flag buffer, 5: datainfo, 6: header info */
 #define INFINIPATH_HWE_TXEMEMPARITYERR_MASK 0xFULL
 #define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
@@ -143,8 +152,8 @@
 /* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
 #define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF   0x01ULL
 #define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ  0x02ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL
-#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x08ULL
 #define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF  0x10ULL
 #define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
 #define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO  0x40ULL
@@ -299,13 +308,6 @@
 #define INFINIPATH_XGXS_RX_POL_SHIFT 19
 #define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
 
-#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL	/* 40 bits valid */
-
-/* TID entries (memory), HT-only */
-#define INFINIPATH_RT_VALID 0x8000000000000000ULL
-#define INFINIPATH_RT_ADDR_SHIFT 0
-#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
-#define INFINIPATH_RT_BUFSIZE_SHIFT 48
 
 /*
  * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index e86cb171872..d9c2a9b15d8 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -202,6 +202,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 	wq->tail = tail;
 
 	ret = 1;
+	qp->r_wrid_valid = 1;
 	if (handler) {
 		u32 n;
 
@@ -229,7 +230,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 		}
 	}
 	spin_unlock_irqrestore(&rq->lock, flags);
-	qp->r_wrid_valid = 1;
 
 bail:
 	return ret;
@@ -255,6 +255,7 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
 	unsigned long flags;
 	struct ib_wc wc;
 	u64 sdata;
+	atomic64_t *maddr;
 
 	qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
 	if (!qp) {
@@ -265,7 +266,8 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
 again:
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
-	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK)) {
+	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
+	    qp->s_rnr_timeout) {
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		goto done;
 	}
@@ -310,7 +312,7 @@ again:
 				sqp->s_rnr_retry--;
 			dev->n_rnr_naks++;
 			sqp->s_rnr_timeout =
-				ib_ipath_rnr_table[sqp->r_min_rnr_timer];
+				ib_ipath_rnr_table[qp->r_min_rnr_timer];
 			ipath_insert_rnr_queue(sqp);
 			goto done;
 		}
@@ -343,20 +345,22 @@ again:
 			wc.sl = sqp->remote_ah_attr.sl;
 			wc.dlid_path_bits = 0;
 			wc.port_num = 0;
+			spin_lock_irqsave(&sqp->s_lock, flags);
 			ipath_sqerror_qp(sqp, &wc);
+			spin_unlock_irqrestore(&sqp->s_lock, flags);
 			goto done;
 		}
 		break;
 
 	case IB_WR_RDMA_READ:
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_READ)))
+			goto acc_err;
 		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.rkey,
 					    IB_ACCESS_REMOTE_READ)))
 			goto acc_err;
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ)))
-			goto acc_err;
 		qp->r_sge.sge = wqe->sg_list[0];
 		qp->r_sge.sg_list = wqe->sg_list + 1;
 		qp->r_sge.num_sge = wqe->wr.num_sge;
@@ -364,22 +368,22 @@ again:
 
 	case IB_WR_ATOMIC_CMP_AND_SWP:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_ATOMIC)))
+			goto acc_err;
 		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
-					    wqe->wr.wr.rdma.remote_addr,
-					    wqe->wr.wr.rdma.rkey,
+					    wqe->wr.wr.atomic.remote_addr,
+					    wqe->wr.wr.atomic.rkey,
 					    IB_ACCESS_REMOTE_ATOMIC)))
 			goto acc_err;
 		/* Perform atomic OP and save result. */
-		sdata = wqe->wr.wr.atomic.swap;
-		spin_lock_irqsave(&dev->pending_lock, flags);
-		qp->r_atomic_data = *(u64 *) qp->r_sge.sge.vaddr;
-		if (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-			*(u64 *) qp->r_sge.sge.vaddr =
-				qp->r_atomic_data + sdata;
-		else if (qp->r_atomic_data == wqe->wr.wr.atomic.compare_add)
-			*(u64 *) qp->r_sge.sge.vaddr = sdata;
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
-		*(u64 *) sqp->s_sge.sge.vaddr = qp->r_atomic_data;
+		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		sdata = wqe->wr.wr.atomic.compare_add;
+		*(u64 *) sqp->s_sge.sge.vaddr =
+			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+			(u64) atomic64_add_return(sdata, maddr) - sdata :
+			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+				      sdata, wqe->wr.wr.atomic.swap);
 		goto send_comp;
 
 	default:
@@ -440,7 +444,7 @@ again:
 send_comp:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 
-	if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &sqp->s_flags) ||
+	if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 		wc.wr_id = wqe->wr.wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -502,7 +506,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
 	 * We clear the tasklet flag now since we are committing to return
 	 * from the tasklet function.
 	 */
-	clear_bit(IPATH_S_BUSY, &qp->s_flags);
+	clear_bit(IPATH_S_BUSY, &qp->s_busy);
 	tasklet_unlock(&qp->s_task);
 	want_buffer(dev->dd);
 	dev->n_piowait++;
@@ -541,6 +545,9 @@ int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		    wr->sg_list[0].addr & (sizeof(u64) - 1))) {
 		ret = -EINVAL;
 		goto bail;
+	} else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
+		ret = -EINVAL;
+		goto bail;
 	}
 	/* IB spec says that num_sge == 0 is OK. */
 	if (wr->num_sge > qp->s_max_sge) {
@@ -647,7 +654,7 @@ void ipath_do_ruc_send(unsigned long data)
 	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
 	struct ipath_other_headers *ohdr;
 
-	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
+	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
 		goto bail;
 
 	if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
@@ -683,19 +690,15 @@ again:
 	 */
 	spin_lock_irqsave(&qp->s_lock, flags);
 
-	/* Sending responses has higher priority over sending requests. */
-	if (qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE &&
-	    (bth0 = ipath_make_rc_ack(qp, ohdr, pmtu)) != 0)
-		bth2 = qp->s_ack_psn++ & IPATH_PSN_MASK;
-	else if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
-		   ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
-		   ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
+	if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
+	       ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
+	       ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
 		/*
 		 * Clear the busy bit before unlocking to avoid races with
 		 * adding new work queue items and then failing to process
 		 * them.
 		 */
-		clear_bit(IPATH_S_BUSY, &qp->s_flags);
+		clear_bit(IPATH_S_BUSY, &qp->s_busy);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		goto bail;
 	}
@@ -728,7 +731,7 @@ again:
 	goto again;
 
 clear:
-	clear_bit(IPATH_S_BUSY, &qp->s_flags);
+	clear_bit(IPATH_S_BUSY, &qp->s_busy);
 bail:
 	return;
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 30a825928fc..9307f7187ca 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -207,7 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
 	 * don't access the chip while running diags, or memory diags can
 	 * fail
 	 */
-	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT) ||
+	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_INITTED) ||
 	    ipath_diag_inuse)
 		/* but re-arm the timer, for diags case; won't hurt other */
 		goto done;
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long opaque)
 	if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
 	    && time_after(jiffies, dd->ipath_unmasktime)) {
 		char ebuf[256];
-		ipath_decode_err(ebuf, sizeof ebuf,
+		int iserr;
+		iserr = ipath_decode_err(ebuf, sizeof ebuf,
 				 (dd->ipath_maskederrs & ~dd->
 				  ipath_ignorederrs));
 		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
-		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
+				~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+				INFINIPATH_E_PKTERRS ))
 			ipath_dev_err(dd, "Re-enabling masked errors "
 				      "(%s)\n", ebuf);
 		else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long opaque)
 			 * them.  So only complain about these at debug
 			 * level.
 			 */
-			ipath_dbg("Disabling frequent queue full errors "
-				  "(%s)\n", ebuf);
+			if (iserr)
+					ipath_dbg("Re-enabling queue full errors (%s)\n",
+							ebuf);
+			else
+				ipath_cdbg(ERRPKT, "Re-enabling packet"
+						" problem interrupt (%s)\n", ebuf);
 		}
 		dd->ipath_maskederrs = dd->ipath_ignorederrs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 325d6634ff5..1c2b03c2ef5 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -42,7 +42,7 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
 {
 	if (++qp->s_last == qp->s_size)
 		qp->s_last = 0;
-	if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 		wc->wr_id = wqe->wr.wr_id;
 		wc->status = IB_WC_SUCCESS;
@@ -344,13 +344,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	send_first:
 		if (qp->r_reuse_sge) {
 			qp->r_reuse_sge = 0;
-			qp->r_sge = qp->s_rdma_sge;
+			qp->r_sge = qp->s_rdma_read_sge;
 		} else if (!ipath_get_rwqe(qp, 0)) {
 			dev->n_pkt_drops++;
 			goto done;
 		}
 		/* Save the WQE so we can reuse it in case of an error. */
-		qp->s_rdma_sge = qp->r_sge;
+		qp->s_rdma_read_sge = qp->r_sge;
 		qp->r_rcv_len = 0;
 		if (opcode == OP(SEND_ONLY))
 			goto send_last;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 9a3e54664ee..a518f7c8fa8 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -308,6 +308,11 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 		goto bail;
 	}
 
+	if (wr->wr.ud.ah->pd != qp->ibqp.pd) {
+		ret = -EPERM;
+		goto bail;
+	}
+
 	/* IB spec says that num_sge == 0 is OK. */
 	if (wr->num_sge > qp->s_max_sge) {
 		ret = -EINVAL;
@@ -467,7 +472,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
 
 done:
 	/* Queue the completion status entry. */
-	if (!test_bit(IPATH_S_SIGNAL_REQ_WR, &qp->s_flags) ||
+	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
 	    (wr->send_flags & IB_SEND_SIGNALED)) {
 		wc.wr_id = wr->wr_id;
 		wc.status = IB_WC_SUCCESS;
@@ -647,6 +652,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
 	ipath_copy_sge(&qp->r_sge, data,
 		       wc.byte_len - sizeof(struct ib_grh));
+	qp->r_wrid_valid = 0;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
 	wc.opcode = IB_WC_RECV;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 2aaacdb7e52..18c6df2052c 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -438,6 +438,10 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
 		struct ipath_mcast *mcast;
 		struct ipath_mcast_qp *p;
 
+		if (lnh != IPATH_LRH_GRH) {
+			dev->n_pkt_drops++;
+			goto bail;
+		}
 		mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
 		if (mcast == NULL) {
 			dev->n_pkt_drops++;
@@ -445,8 +449,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
 		}
 		dev->n_multicast_rcv++;
 		list_for_each_entry_rcu(p, &mcast->qp_list, list)
-			ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
-				     tlen, p->qp);
+			ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
 		/*
 		 * Notify ipath_multicast_detach() if it is waiting for us
 		 * to finish.
@@ -773,7 +776,6 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
 	/* +1 is for the qword padding of pbc */
 	plen = hdrwords + ((len + 3) >> 2) + 1;
 	if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
-		ipath_dbg("packet len 0x%x too long, failing\n", plen);
 		ret = -EINVAL;
 		goto bail;
 	}
@@ -980,14 +982,14 @@ static int ipath_query_device(struct ib_device *ibdev,
 	props->max_cqe = ib_ipath_max_cqes;
 	props->max_mr = dev->lk_table.max;
 	props->max_pd = ib_ipath_max_pds;
-	props->max_qp_rd_atom = 1;
-	props->max_qp_init_rd_atom = 1;
+	props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
+	props->max_qp_init_rd_atom = 255;
 	/* props->max_res_rd_atom */
 	props->max_srq = ib_ipath_max_srqs;
 	props->max_srq_wr = ib_ipath_max_srq_wrs;
 	props->max_srq_sge = ib_ipath_max_srq_sges;
 	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_HCA;
+	props->atomic_cap = IB_ATOMIC_GLOB;
 	props->max_pkeys = ipath_get_npkeys(dev->dd);
 	props->max_mcast_grp = ib_ipath_max_mcast_grps;
 	props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
@@ -1557,7 +1559,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	dev->node_type = RDMA_NODE_IB_CA;
 	dev->phys_port_cnt = 1;
 	dev->dma_device = &dd->pcidev->dev;
-	dev->class_dev.dev = dev->dma_device;
 	dev->query_device = ipath_query_device;
 	dev->modify_device = ipath_modify_device;
 	dev->query_port = ipath_query_port;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index c0c8d5b24a7..7c4929f1cb5 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -40,9 +40,12 @@
 #include <linux/interrupt.h>
 #include <linux/kref.h>
 #include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
 
 #include "ipath_layer.h"
 
+#define IPATH_MAX_RDMA_ATOMIC	4
+
 #define QPN_MAX                 (1 << 24)
 #define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
 
@@ -89,7 +92,7 @@ struct ib_reth {
 } __attribute__ ((packed));
 
 struct ib_atomic_eth {
-	__be64 vaddr;
+	__be32 vaddr[2];	/* unaligned so access as 2 32-bit words */
 	__be32 rkey;
 	__be64 swap_data;
 	__be64 compare_data;
@@ -108,7 +111,7 @@ struct ipath_other_headers {
 		} rc;
 		struct {
 			__be32 aeth;
-			__be64 atomic_ack_eth;
+			__be32 atomic_ack_eth[2];
 		} at;
 		__be32 imm_data;
 		__be32 aeth;
@@ -186,7 +189,7 @@ struct ipath_mmap_info {
 struct ipath_cq_wc {
 	u32 head;		/* index of next entry to fill */
 	u32 tail;		/* index of next ib_poll_cq() entry */
-	struct ib_wc queue[1];	/* this is actually size ibcq.cqe + 1 */
+	struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
 };
 
 /*
@@ -312,6 +315,19 @@ struct ipath_sge_state {
 };
 
 /*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct ipath_ack_entry {
+	u8 opcode;
+	u32 psn;
+	union {
+		struct ipath_sge_state rdma_sge;
+		u64 atomic_data;
+	};
+};
+
+/*
  * Variables prefixed with s_ are for the requester (sender).
  * Variables prefixed with r_ are for the responder (receiver).
  * Variables prefixed with ack_ are for responder replies.
@@ -333,24 +349,24 @@ struct ipath_qp {
 	struct ipath_mmap_info *ip;
 	struct ipath_sge_state *s_cur_sge;
 	struct ipath_sge_state s_sge;	/* current send request data */
-	/* current RDMA read send data */
-	struct ipath_sge_state s_rdma_sge;
+	struct ipath_ack_entry s_ack_queue[IPATH_MAX_RDMA_ATOMIC + 1];
+	struct ipath_sge_state s_ack_rdma_sge;
+	struct ipath_sge_state s_rdma_read_sge;
 	struct ipath_sge_state r_sge;	/* current receive data */
 	spinlock_t s_lock;
-	unsigned long s_flags;
+	unsigned long s_busy;
 	u32 s_hdrwords;		/* size of s_hdr in 32 bit words */
 	u32 s_cur_size;		/* size of send packet in bytes */
 	u32 s_len;		/* total length of s_sge */
-	u32 s_rdma_len;		/* total length of s_rdma_sge */
+	u32 s_rdma_read_len;	/* total length of s_rdma_read_sge */
 	u32 s_next_psn;		/* PSN for next request */
 	u32 s_last_psn;		/* last response PSN processed */
 	u32 s_psn;		/* current packet sequence number */
-	u32 s_ack_psn;		/* PSN for RDMA_READ */
+	u32 s_ack_rdma_psn;	/* PSN for sending RDMA read responses */
+	u32 s_ack_psn;		/* PSN for acking sends and RDMA writes */
 	u32 s_rnr_timeout;	/* number of milliseconds for RNR timeout */
 	u32 r_ack_psn;		/* PSN for next ACK or atomic ACK */
 	u64 r_wr_id;		/* ID for current receive WQE */
-	u64 r_atomic_data;	/* data for last atomic op */
-	u32 r_atomic_psn;	/* PSN of last atomic op */
 	u32 r_len;		/* total length of r_sge */
 	u32 r_rcv_len;		/* receive data len processed */
 	u32 r_psn;		/* expected rcv packet sequence number */
@@ -360,12 +376,13 @@ struct ipath_qp {
 	u8 s_ack_state;		/* opcode of packet to ACK */
 	u8 s_nak_state;		/* non-zero if NAK is pending */
 	u8 r_state;		/* opcode of last packet received */
-	u8 r_ack_state;		/* opcode of packet to ACK */
 	u8 r_nak_state;		/* non-zero if NAK is pending */
 	u8 r_min_rnr_timer;	/* retry timeout value for RNR NAKs */
 	u8 r_reuse_sge;		/* for UC receive errors */
 	u8 r_sge_inx;		/* current index into sg_list */
 	u8 r_wrid_valid;	/* r_wrid set but CQ entry not yet made */
+	u8 r_max_rd_atomic;	/* max number of RDMA read/atomic to receive */
+	u8 r_head_ack_queue;	/* index into s_ack_queue[] */
 	u8 qp_access_flags;
 	u8 s_max_sge;		/* size of s_wq->sg_list */
 	u8 s_retry_cnt;		/* number of times to retry */
@@ -374,6 +391,10 @@ struct ipath_qp {
 	u8 s_rnr_retry;		/* requester RNR retry counter */
 	u8 s_wait_credit;	/* limit number of unacked packets sent */
 	u8 s_pkey_index;	/* PKEY index to use */
+	u8 s_max_rd_atomic;	/* max number of RDMA read/atomic to send */
+	u8 s_num_rd_atomic;	/* number of RDMA read/atomic pending */
+	u8 s_tail_ack_queue;	/* index into s_ack_queue[] */
+	u8 s_flags;
 	u8 timeout;		/* Timeout for this QP */
 	enum ib_mtu path_mtu;
 	u32 remote_qpn;
@@ -390,11 +411,16 @@ struct ipath_qp {
 	struct ipath_sge r_sg_list[0];	/* verified SGEs */
 };
 
+/* Bit definition for s_busy. */
+#define IPATH_S_BUSY		0
+
 /*
  * Bit definitions for s_flags.
  */
-#define IPATH_S_BUSY		0
-#define IPATH_S_SIGNAL_REQ_WR	1
+#define IPATH_S_SIGNAL_REQ_WR	0x01
+#define IPATH_S_FENCE_PENDING	0x02
+#define IPATH_S_RDMAR_PENDING	0x04
+#define IPATH_S_ACK_PENDING	0x08
 
 #define IPATH_PSN_CREDIT	2048
 
@@ -706,8 +732,6 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
 
 int ipath_destroy_srq(struct ib_srq *ibsrq);
 
-void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
-
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
 
 struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
@@ -757,9 +781,6 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
 
 void ipath_do_ruc_send(unsigned long data);
 
-u32 ipath_make_rc_ack(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
-		      u32 pmtu);
-
 int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
 		      u32 pmtu, u32 *bth0p, u32 *bth2p);
 
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 0d9b7d06bbc..773145e2994 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1013,14 +1013,14 @@ static struct {
 	u64 latest_fw;
 	u32 flags;
 } mthca_hca_table[] = {
-	[TAVOR]        = { .latest_fw = MTHCA_FW_VER(3, 4, 0),
+	[TAVOR]        = { .latest_fw = MTHCA_FW_VER(3, 5, 0),
 			   .flags     = 0 },
-	[ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 600),
+	[ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 8, 200),
 			   .flags     = MTHCA_FLAG_PCIE },
-	[ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 400),
+	[ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 2, 0),
 			   .flags     = MTHCA_FLAG_MEMFREE |
 					MTHCA_FLAG_PCIE },
-	[SINAI]        = { .latest_fw = MTHCA_FW_VER(1, 1, 0),
+	[SINAI]        = { .latest_fw = MTHCA_FW_VER(1, 2, 0),
 			   .flags     = MTHCA_FLAG_MEMFREE |
 					MTHCA_FLAG_PCIE    |
 					MTHCA_FLAG_SINAI_OPT }
@@ -1135,7 +1135,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 		goto err_cmd;
 
 	if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
-		mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n",
+		mthca_warn(mdev, "HCA FW version %d.%d.%3d is old (%d.%d.%3d is current).\n",
 			   (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
 			   (int) (mdev->fw_ver & 0xffff),
 			   (int) (mthca_hca_table[hca_type].latest_fw >> 32),
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index ee561c569d5..aa6c70a6a36 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -297,7 +297,8 @@ out:
 
 int mthca_write_mtt_size(struct mthca_dev *dev)
 {
-	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+	    !(dev->mthca_flags & MTHCA_FLAG_FMR))
 		/*
 		 * Be friendly to WRITE_MTT command
 		 * and leave two empty slots for the
@@ -355,7 +356,8 @@ int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
 	int size = mthca_write_mtt_size(dev);
 	int chunk;
 
-	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
+	if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy ||
+	    !(dev->mthca_flags & MTHCA_FLAG_FMR))
 		return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
 
 	while (list_len > 0) {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0725ad7ad9b..47e6fd46d9c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1293,7 +1293,6 @@ int mthca_register_device(struct mthca_dev *dev)
 	dev->ib_dev.node_type            = RDMA_NODE_IB_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 	dev->ib_dev.dma_device           = &dev->pdev->dev;
-	dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
 	dev->ib_dev.query_device         = mthca_query_device;
 	dev->ib_dev.query_port           = mthca_query_port;
 	dev->ib_dev.modify_device        = mthca_modify_device;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 1c6b63aca26..8fe6fee7a97 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1419,11 +1419,10 @@ void mthca_free_qp(struct mthca_dev *dev,
 	 * unref the mem-free tables and free the QPN in our table.
 	 */
 	if (!qp->ibqp.uobject) {
-		mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn,
+		mthca_cq_clean(dev, recv_cq, qp->qpn,
 			       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
-		if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
-			mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
-				       qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
+		if (send_cq != recv_cq)
+			mthca_cq_clean(dev, send_cq, qp->qpn, NULL);
 
 		mthca_free_memfree(dev, qp);
 		mthca_free_wqe_buf(dev, qp);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2b242a4823f..0c4e59b906c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -228,7 +228,6 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 	struct net_device *dev = cm_id->context;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_cm_rx *p;
-	unsigned long flags;
 	unsigned psn;
 	int ret;
 
@@ -257,9 +256,9 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 
 	cm_id->context = p;
 	p->jiffies = jiffies;
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irq(&priv->lock);
 	list_add(&p->list, &priv->cm.passive_ids);
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock_irq(&priv->lock);
 	queue_delayed_work(ipoib_workqueue,
 			   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
 	return 0;
@@ -277,7 +276,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
 {
 	struct ipoib_cm_rx *p;
 	struct ipoib_dev_priv *priv;
-	unsigned long flags;
 	int ret;
 
 	switch (event->event) {
@@ -290,14 +288,14 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
 	case IB_CM_REJ_RECEIVED:
 		p = cm_id->context;
 		priv = netdev_priv(p->dev);
-		spin_lock_irqsave(&priv->lock, flags);
+		spin_lock_irq(&priv->lock);
 		if (list_empty(&p->list))
 			ret = 0; /* Connection is going away already. */
 		else {
 			list_del_init(&p->list);
 			ret = -ECONNRESET;
 		}
-		spin_unlock_irqrestore(&priv->lock, flags);
+		spin_unlock_irq(&priv->lock);
 		if (ret) {
 			ib_destroy_qp(p->qp);
 			kfree(p);
@@ -351,8 +349,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	u64 mapping[IPOIB_CM_RX_SG];
 	int frags;
 
-	ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n",
-		       wr_id, wc->opcode, wc->status);
+	ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
+		       wr_id, wc->status);
 
 	if (unlikely(wr_id >= ipoib_recvq_size)) {
 		ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -408,7 +406,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
 
 	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, IPOIB_ENCAP_LEN);
 
 	dev->last_rx = jiffies;
@@ -504,8 +502,8 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
 	struct ipoib_tx_buf *tx_req;
 	unsigned long flags;
 
-	ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n",
-		       wr_id, wc->opcode, wc->status);
+	ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
+		       wr_id, wc->status);
 
 	if (unlikely(wr_id >= ipoib_sendq_size)) {
 		ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
@@ -612,23 +610,22 @@ void ipoib_cm_dev_stop(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_cm_rx *p;
-	unsigned long flags;
 
 	if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
 		return;
 
 	ib_destroy_cm_id(priv->cm.id);
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irq(&priv->lock);
 	while (!list_empty(&priv->cm.passive_ids)) {
 		p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
 		list_del_init(&p->list);
-		spin_unlock_irqrestore(&priv->lock, flags);
+		spin_unlock_irq(&priv->lock);
 		ib_destroy_cm_id(p->id);
 		ib_destroy_qp(p->qp);
 		kfree(p);
-		spin_lock_irqsave(&priv->lock, flags);
+		spin_lock_irq(&priv->lock);
 	}
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock_irq(&priv->lock);
 
 	cancel_delayed_work(&priv->cm.stale_task);
 }
@@ -642,7 +639,6 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 	struct sk_buff *skb;
-	unsigned long flags;
 
 	p->mtu = be32_to_cpu(data->mtu);
 
@@ -680,12 +676,12 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 
 	skb_queue_head_init(&skqueue);
 
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irq(&priv->lock);
 	set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
 	if (p->neigh)
 		while ((skb = __skb_dequeue(&p->neigh->queue)))
 			__skb_queue_tail(&skqueue, skb);
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock_irq(&priv->lock);
 
 	while ((skb = __skb_dequeue(&skqueue))) {
 		skb->dev = p->dev;
@@ -895,7 +891,6 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 	struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
 	struct net_device *dev = priv->dev;
 	struct ipoib_neigh *neigh;
-	unsigned long flags;
 	int ret;
 
 	switch (event->event) {
@@ -914,7 +909,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 	case IB_CM_REJ_RECEIVED:
 	case IB_CM_TIMEWAIT_EXIT:
 		ipoib_dbg(priv, "CM error %d.\n", event->event);
-		spin_lock_irqsave(&priv->tx_lock, flags);
+		spin_lock_irq(&priv->tx_lock);
 		spin_lock(&priv->lock);
 		neigh = tx->neigh;
 
@@ -934,7 +929,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 		}
 
 		spin_unlock(&priv->lock);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
+		spin_unlock_irq(&priv->tx_lock);
 		break;
 	default:
 		break;
@@ -1023,21 +1018,20 @@ static void ipoib_cm_tx_reap(struct work_struct *work)
 	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 						   cm.reap_task);
 	struct ipoib_cm_tx *p;
-	unsigned long flags;
 
-	spin_lock_irqsave(&priv->tx_lock, flags);
+	spin_lock_irq(&priv->tx_lock);
 	spin_lock(&priv->lock);
 	while (!list_empty(&priv->cm.reap_list)) {
 		p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
 		list_del(&p->list);
 		spin_unlock(&priv->lock);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
+		spin_unlock_irq(&priv->tx_lock);
 		ipoib_cm_tx_destroy(p);
-		spin_lock_irqsave(&priv->tx_lock, flags);
+		spin_lock_irq(&priv->tx_lock);
 		spin_lock(&priv->lock);
 	}
 	spin_unlock(&priv->lock);
-	spin_unlock_irqrestore(&priv->tx_lock, flags);
+	spin_unlock_irq(&priv->tx_lock);
 }
 
 static void ipoib_cm_skb_reap(struct work_struct *work)
@@ -1046,15 +1040,14 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 						   cm.skb_task);
 	struct net_device *dev = priv->dev;
 	struct sk_buff *skb;
-	unsigned long flags;
 
 	unsigned mtu = priv->mcast_mtu;
 
-	spin_lock_irqsave(&priv->tx_lock, flags);
+	spin_lock_irq(&priv->tx_lock);
 	spin_lock(&priv->lock);
 	while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
 		spin_unlock(&priv->lock);
-		spin_unlock_irqrestore(&priv->tx_lock, flags);
+		spin_unlock_irq(&priv->tx_lock);
 		if (skb->protocol == htons(ETH_P_IP))
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -1062,11 +1055,11 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
 #endif
 		dev_kfree_skb_any(skb);
-		spin_lock_irqsave(&priv->tx_lock, flags);
+		spin_lock_irq(&priv->tx_lock);
 		spin_lock(&priv->lock);
 	}
 	spin_unlock(&priv->lock);
-	spin_unlock_irqrestore(&priv->tx_lock, flags);
+	spin_unlock_irq(&priv->tx_lock);
 }
 
 void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
@@ -1088,9 +1081,8 @@ static void ipoib_cm_stale_task(struct work_struct *work)
 	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 						   cm.stale_task.work);
 	struct ipoib_cm_rx *p;
-	unsigned long flags;
 
-	spin_lock_irqsave(&priv->lock, flags);
+	spin_lock_irq(&priv->lock);
 	while (!list_empty(&priv->cm.passive_ids)) {
 		/* List if sorted by LRU, start from tail,
 		 * stop when we see a recently used entry */
@@ -1098,13 +1090,13 @@ static void ipoib_cm_stale_task(struct work_struct *work)
 		if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
 			break;
 		list_del_init(&p->list);
-		spin_unlock_irqrestore(&priv->lock, flags);
+		spin_unlock_irq(&priv->lock);
 		ib_destroy_cm_id(p->id);
 		ib_destroy_qp(p->qp);
 		kfree(p);
-		spin_lock_irqsave(&priv->lock, flags);
+		spin_lock_irq(&priv->lock);
 	}
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_unlock_irq(&priv->lock);
 }
 
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ba0ee5cf2ad..1bdb9101911 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -172,8 +172,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	struct sk_buff *skb;
 	u64 addr;
 
-	ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n",
-		       wr_id, wc->opcode, wc->status);
+	ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
+		       wr_id, wc->status);
 
 	if (unlikely(wr_id >= ipoib_recvq_size)) {
 		ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
@@ -216,7 +216,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	if (wc->slid != priv->local_lid ||
 	    wc->src_qp != priv->qp->qp_num) {
 		skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb_pull(skb, IPOIB_ENCAP_LEN);
 
 		dev->last_rx = jiffies;
@@ -245,8 +245,8 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 	struct ipoib_tx_buf *tx_req;
 	unsigned long flags;
 
-	ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n",
-		       wr_id, wc->opcode, wc->status);
+	ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
+		       wr_id, wc->status);
 
 	if (unlikely(wr_id >= ipoib_sendq_size)) {
 		ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index f2a40ae8e7d..b4c380c5a3b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -395,14 +395,10 @@ static void path_rec_completion(int status,
 	skb_queue_head_init(&skqueue);
 
 	if (!status) {
-		struct ib_ah_attr av = {
-			.dlid 	       = be16_to_cpu(pathrec->dlid),
-			.sl 	       = pathrec->sl,
-			.port_num      = priv->port,
-			.static_rate   = pathrec->rate
-		};
-
-		ah = ipoib_create_ah(dev, priv->pd, &av);
+		struct ib_ah_attr av;
+
+		if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
+			ah = ipoib_create_ah(dev, priv->pd, &av);
 	}
 
 	spin_lock_irqsave(&priv->lock, flags);
diff --git a/drivers/isdn/act2000/module.c b/drivers/isdn/act2000/module.c
index e3e5c139907..ee2b0b9f8f4 100644
--- a/drivers/isdn/act2000/module.c
+++ b/drivers/isdn/act2000/module.c
@@ -442,7 +442,7 @@ act2000_sendbuf(act2000_card *card, int channel, int ack, struct sk_buff *skb)
 			return 0;
 		}
 		skb_reserve(xmit_skb, 19);
-		memcpy(skb_put(xmit_skb, len), skb->data, len);
+		skb_copy_from_linear_data(skb, skb_put(xmit_skb, len), len);
 	} else {
 		xmit_skb = skb_clone(skb, GFP_ATOMIC);
 		if (!xmit_skb) {
diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c
index 2baef349c12..c8e1c357cec 100644
--- a/drivers/isdn/gigaset/usb-gigaset.c
+++ b/drivers/isdn/gigaset/usb-gigaset.c
@@ -652,7 +652,7 @@ static int write_modem(struct cardstate *cs)
 	 * transmit data
 	 */
 	count = min(bcs->tx_skb->len, (unsigned) ucs->bulk_out_size);
-	memcpy(ucs->bulk_out_buffer, bcs->tx_skb->data, count);
+	skb_copy_from_linear_data(bcs->tx_skb, ucs->bulk_out_buffer, count);
 	skb_pull(bcs->tx_skb, count);
 	atomic_set(&ucs->busy, 1);
 	gig_dbg(DEBUG_OUTPUT, "write_modem: send %d bytes", count);
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 1e2d38e3d68..428872b653e 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -404,7 +404,8 @@ static void b1dma_dispatch_tx(avmcard *card)
 		printk(KERN_DEBUG "tx: put 0x%x len=%d\n", 
 		       skb->data[2], txlen);
 #endif
-		memcpy(dma->sendbuf.dmabuf, skb->data+2, skb->len-2);
+		skb_copy_from_linear_data_offset(skb, 2, dma->sendbuf.dmabuf,
+						 skb->len - 2);
 	}
 	txlen = (txlen + 3) & ~3;
 
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 6f5efa8d78c..d58f927e766 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -457,7 +457,8 @@ static void c4_dispatch_tx(avmcard *card)
 		printk(KERN_DEBUG "%s: tx put 0x%x len=%d\n",
 				card->name, skb->data[2], txlen);
 #endif
-		memcpy(dma->sendbuf.dmabuf, skb->data+2, skb->len-2);
+		skb_copy_from_linear_data_offset(skb, 2, dma->sendbuf.dmabuf,
+						 skb->len - 2);
 	}
 	txlen = (txlen + 3) & ~3;
 
diff --git a/drivers/isdn/hisax/elsa_ser.c b/drivers/isdn/hisax/elsa_ser.c
index ae377e81277..1642dca988a 100644
--- a/drivers/isdn/hisax/elsa_ser.c
+++ b/drivers/isdn/hisax/elsa_ser.c
@@ -254,14 +254,16 @@ write_modem(struct BCState *bcs) {
 	count = len;
 	if (count > MAX_MODEM_BUF - fp) {
 		count = MAX_MODEM_BUF - fp;
-		memcpy(cs->hw.elsa.transbuf + fp, bcs->tx_skb->data, count);
+		skb_copy_from_linear_data(bcs->tx_skb,
+					  cs->hw.elsa.transbuf + fp, count);
 		skb_pull(bcs->tx_skb, count);
 		cs->hw.elsa.transcnt += count;
 		ret = count;
 		count = len - count;
 		fp = 0;
 	}
-	memcpy((cs->hw.elsa.transbuf + fp), bcs->tx_skb->data, count);
+	skb_copy_from_linear_data(bcs->tx_skb,
+				  cs->hw.elsa.transbuf + fp, count);
 	skb_pull(bcs->tx_skb, count);
 	cs->hw.elsa.transcnt += count;
 	ret += count;
diff --git a/drivers/isdn/hisax/isdnl2.c b/drivers/isdn/hisax/isdnl2.c
index cd3b5ad5349..3446f249d67 100644
--- a/drivers/isdn/hisax/isdnl2.c
+++ b/drivers/isdn/hisax/isdnl2.c
@@ -1293,7 +1293,8 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
 		oskb = skb;
 		skb = alloc_skb(oskb->len + i, GFP_ATOMIC);
 		memcpy(skb_put(skb, i), header, i);
-		memcpy(skb_put(skb, oskb->len), oskb->data, oskb->len);
+		skb_copy_from_linear_data(oskb,
+					  skb_put(skb, oskb->len), oskb->len);
 		dev_kfree_skb(oskb);
 	}
 	st->l2.l2l1(st, PH_PULL | INDICATION, skb);
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index b2ae4ec1e49..f85450146bd 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -398,8 +398,9 @@ static u16 hycapi_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 			_len = CAPIMSG_LEN(skb->data);
 			if (_len > 22) {
 				_len2 = _len - 22;
-				memcpy(msghead, skb->data, 22);
-				memcpy(skb->data + _len2, msghead, 22);
+				skb_copy_from_linear_data(skb, msghead, 22);
+				skb_copy_to_linear_data_offset(skb, _len2,
+							       msghead, 22);
 				skb_pull(skb, _len2);
 				CAPIMSG_SETLEN(skb->data, 22);
 				retval = capilib_data_b3_req(&cinfo->ncci_head,
diff --git a/drivers/isdn/hysdn/hysdn_net.c b/drivers/isdn/hysdn/hysdn_net.c
index 557d96c78a6..cfa8fa5e44a 100644
--- a/drivers/isdn/hysdn/hysdn_net.c
+++ b/drivers/isdn/hysdn/hysdn_net.c
@@ -214,8 +214,6 @@ hysdn_rx_netpkt(hysdn_card * card, unsigned char *buf, unsigned short len)
 		lp->stats.rx_dropped++;
 		return;
 	}
-	skb->dev = &lp->netdev;
-
 	/* copy the data */
 	memcpy(skb_put(skb, len), buf, len);
 
diff --git a/drivers/isdn/hysdn/hysdn_sched.c b/drivers/isdn/hysdn/hysdn_sched.c
index b7b5aa4748a..81db4a190d4 100644
--- a/drivers/isdn/hysdn/hysdn_sched.c
+++ b/drivers/isdn/hysdn/hysdn_sched.c
@@ -113,7 +113,8 @@ hysdn_sched_tx(hysdn_card *card, unsigned char *buf,
 	    (skb = hysdn_tx_netget(card)) != NULL) 
 	{
 		if (skb->len <= maxlen) {
-			memcpy(buf, skb->data, skb->len);	/* copy the packet to the buffer */
+			/* copy the packet to the buffer */
+			skb_copy_from_linear_data(skb, buf, skb->len);
 			*len = skb->len;
 			*chan = CHAN_NDIS_DATA;
 			card->net_tx_busy = 1;	/* we are busy sending network data */
@@ -126,7 +127,7 @@ hysdn_sched_tx(hysdn_card *card, unsigned char *buf,
 	    ((skb = hycapi_tx_capiget(card)) != NULL) )
 	{
 		if (skb->len <= maxlen) {
-			memcpy(buf, skb->data, skb->len);
+			skb_copy_from_linear_data(skb, buf, skb->len);
 			*len = skb->len;
 			*chan = CHAN_CAPI;
 			hycapi_tx_capiack(card);
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 9c926e41b11..c97330b1987 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -829,7 +829,7 @@ isdn_readbchan(int di, int channel, u_char * buf, u_char * fp, int len, wait_que
 				dflag = 0;
 			}
 			count_put = count_pull;
-			memcpy(cp, skb->data, count_put);
+			skb_copy_from_linear_data(skb, cp, count_put);
 			cp += count_put;
 			len -= count_put;
 #ifdef CONFIG_ISDN_AUDIO
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 838b3734e2b..aa83277aba7 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -872,7 +872,8 @@ typedef struct {
 static void
 isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
 {
-	u_char *p = skb->nh.raw; /* hopefully, this was set correctly */
+	/* hopefully, this was set correctly */
+	const u_char *p = skb_network_header(skb);
 	unsigned short proto = ntohs(skb->protocol);
 	int data_ofs;
 	ip_ports *ipp;
@@ -880,7 +881,7 @@ isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
 
 	addinfo[0] = '\0';
 	/* This check stolen from 2.1.72 dev_queue_xmit_nit() */
-	if (skb->nh.raw < skb->data || skb->nh.raw >= skb->tail) {
+	if (p < skb->data || skb->network_header >= skb->tail) {
 		/* fall back to old isdn_net_log_packet method() */
 		char * buf = skb->data;
 
@@ -1121,7 +1122,7 @@ isdn_net_adjust_hdr(struct sk_buff *skb, struct net_device *dev)
 	if (!skb)
 		return;
 	if (lp->p_encap == ISDN_NET_ENCAP_ETHER) {
-		int pullsize = (ulong)skb->nh.raw - (ulong)skb->data - ETH_HLEN;
+		const int pullsize = skb_network_offset(skb) - ETH_HLEN;
 		if (pullsize > 0) {
 			printk(KERN_DEBUG "isdn_net: Pull junk %d\n", pullsize);
 			skb_pull(skb, pullsize);
@@ -1366,7 +1367,7 @@ isdn_net_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
 
@@ -1786,7 +1787,7 @@ isdn_net_receive(struct net_device *ndev, struct sk_buff *skb)
 	}
 	skb->dev = ndev;
 	skb->pkt_type = PACKET_HOST;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 #ifdef ISDN_DEBUG_NET_DUMP
 	isdn_dumppkt("R:", skb->data, skb->len, 40);
 #endif
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 1b2df80c3bc..387392cb3d6 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1100,7 +1100,8 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
 					goto drop_packet;
 				}
 				skb_put(skb, skb_old->len + 128);
-				memcpy(skb->data, skb_old->data, skb_old->len);
+				skb_copy_from_linear_data(skb_old, skb->data,
+							  skb_old->len);
 				if (net_dev->local->ppp_slot < 0) {
 					printk(KERN_ERR "%s: net_dev->local->ppp_slot(%d) out of range\n",
 						__FUNCTION__, net_dev->local->ppp_slot);
@@ -1167,7 +1168,7 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
 		mlp->huptimer = 0;
 #endif /* CONFIG_IPPP_FILTER */
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	netif_rx(skb);
 	/* net_dev->local->stats.rx_packets++; done in isdn_net.c */
 	return;
@@ -1902,7 +1903,9 @@ void isdn_ppp_mp_reassembly( isdn_net_dev * net_dev, isdn_net_local * lp,
 		while( from != to ) {
 			unsigned int len = from->len - MP_HEADER_LEN;
 
-			memcpy(skb_put(skb,len), from->data+MP_HEADER_LEN, len);
+			skb_copy_from_linear_data_offset(from, MP_HEADER_LEN,
+							 skb_put(skb,len),
+							 len);
 			frag = from->next;
 			isdn_ppp_mp_free_skb(mp, from);
 			from = frag; 
diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
index e3add27dd0e..e93ad59f60b 100644
--- a/drivers/isdn/isdnloop/isdnloop.c
+++ b/drivers/isdn/isdnloop/isdnloop.c
@@ -415,7 +415,8 @@ isdnloop_sendbuf(int channel, struct sk_buff *skb, isdnloop_card * card)
 		spin_lock_irqsave(&card->isdnloop_lock, flags);
 		nskb = dev_alloc_skb(skb->len);
 		if (nskb) {
-			memcpy(skb_put(nskb, len), skb->data, len);
+			skb_copy_from_linear_data(skb,
+						  skb_put(nskb, len), len);
 			skb_queue_tail(&card->bqueue[channel], nskb);
 			dev_kfree_skb(skb);
 		} else
diff --git a/drivers/isdn/pcbit/capi.c b/drivers/isdn/pcbit/capi.c
index 47c59e95898..7b55e151f1b 100644
--- a/drivers/isdn/pcbit/capi.c
+++ b/drivers/isdn/pcbit/capi.c
@@ -429,8 +429,9 @@ int capi_decode_conn_ind(struct pcbit_chan * chan,
 		if (!(info->data.setup.CallingPN = kmalloc(len - count + 1, GFP_ATOMIC)))
 			return -1;
        
-		memcpy(info->data.setup.CallingPN, skb->data + count + 1, 
-		       len - count);
+		skb_copy_from_linear_data_offset(skb, count + 1,
+						 info->data.setup.CallingPN,
+						 len - count);
 		info->data.setup.CallingPN[len - count] = 0;
 
 	}
@@ -457,8 +458,9 @@ int capi_decode_conn_ind(struct pcbit_chan * chan,
 		if (!(info->data.setup.CalledPN = kmalloc(len - count + 1, GFP_ATOMIC)))
 			return -1;
         
-		memcpy(info->data.setup.CalledPN, skb->data + count + 1, 
-		       len - count); 
+		skb_copy_from_linear_data_offset(skb, count + 1,
+						 info->data.setup.CalledPN,
+						 len - count);
 		info->data.setup.CalledPN[len - count] = 0;
 
 	}
@@ -539,7 +541,7 @@ int capi_decode_conn_actv_ind(struct pcbit_chan * chan, struct sk_buff *skb)
 
 #ifdef DEBUG
 	if (len > 1 && len < 31) {
-		memcpy(str, skb->data + 2, len - 1);
+		skb_copy_from_linear_data_offset(skb, 2, str, len - 1);
 		str[len] = 0;
 		printk(KERN_DEBUG "Connected Party Number: %s\n", str);
 	}
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 76e9c36597e..6a5ab409c4e 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -174,7 +174,7 @@ static unsigned short dvb_net_eth_type_trans(struct sk_buff *skb,
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb,dev->hard_header_len);
 	eth = eth_hdr(skb);
 
@@ -600,6 +600,7 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
 			/* Check CRC32, we've got it in our skb already. */
 			unsigned short ulen = htons(priv->ule_sndu_len);
 			unsigned short utype = htons(priv->ule_sndu_type);
+			const u8 *tail;
 			struct kvec iov[3] = {
 				{ &ulen, sizeof ulen },
 				{ &utype, sizeof utype },
@@ -613,10 +614,11 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
 			}
 
 			ule_crc = iov_crc32(ule_crc, iov, 3);
-			expected_crc = *((u8 *)priv->ule_skb->tail - 4) << 24 |
-				       *((u8 *)priv->ule_skb->tail - 3) << 16 |
-				       *((u8 *)priv->ule_skb->tail - 2) << 8 |
-				       *((u8 *)priv->ule_skb->tail - 1);
+			tail = skb_tail_pointer(priv->ule_skb);
+			expected_crc = *(tail - 4) << 24 |
+				       *(tail - 3) << 16 |
+				       *(tail - 2) << 8 |
+				       *(tail - 1);
 			if (ule_crc != expected_crc) {
 				printk(KERN_WARNING "%lu: CRC32 check FAILED: %08x / %08x, SNDU len %d type %#x, ts_remain %d, next 2: %x.\n",
 				       priv->ts_count, ule_crc, expected_crc, priv->ule_sndu_len, priv->ule_sndu_type, ts_remain, ts_remain > 2 ? *(unsigned short *)from_where : 0);
@@ -695,7 +697,9 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
 					}
 					else
 					{
-						memcpy(dest_addr,  priv->ule_skb->data, ETH_ALEN);
+						skb_copy_from_linear_data(priv->ule_skb,
+							      dest_addr,
+							      ETH_ALEN);
 						skb_pull(priv->ule_skb, ETH_ALEN);
 					}
 				}
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index b691292ff59..7dd34bd28ef 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -714,6 +714,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	LANSendRequest_t *pSendReq;
 	SGETransaction32_t *pTrans;
 	SGESimple64_t *pSimple;
+	const unsigned char *mac;
 	dma_addr_t dma;
 	unsigned long flags;
 	int ctx;
@@ -753,7 +754,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	/* Set the mac.raw pointer, since this apparently isn't getting
 	 * done before we get the skb. Pull the data pointer past the mac data.
 	 */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, 12);
 
         dma = pci_map_single(mpt_dev->pcidev, skb->data, skb->len,
@@ -784,6 +785,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 //			IOC_AND_NETDEV_NAMES_s_s(dev),
 //			ctx, skb, skb->data));
 
+	mac = skb_mac_header(skb);
 #ifdef QLOGIC_NAA_WORKAROUND
 {
 	struct NAA_Hosed *nh;
@@ -793,12 +795,12 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	   drops. */
 	read_lock_irq(&bad_naa_lock);
 	for (nh = mpt_bad_naa; nh != NULL; nh=nh->next) {
-		if ((nh->ieee[0] == skb->mac.raw[0]) &&
-		    (nh->ieee[1] == skb->mac.raw[1]) &&
-		    (nh->ieee[2] == skb->mac.raw[2]) &&
-		    (nh->ieee[3] == skb->mac.raw[3]) &&
-		    (nh->ieee[4] == skb->mac.raw[4]) &&
-		    (nh->ieee[5] == skb->mac.raw[5])) {
+		if ((nh->ieee[0] == mac[0]) &&
+		    (nh->ieee[1] == mac[1]) &&
+		    (nh->ieee[2] == mac[2]) &&
+		    (nh->ieee[3] == mac[3]) &&
+		    (nh->ieee[4] == mac[4]) &&
+		    (nh->ieee[5] == mac[5])) {
 			cur_naa = nh->NAA;
 			dlprintk ((KERN_INFO "mptlan/sdu_send: using NAA value "
 				  "= %04x.\n", cur_naa));
@@ -810,12 +812,12 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 #endif
 
 	pTrans->TransactionDetails[0] = cpu_to_le32((cur_naa         << 16) |
-						    (skb->mac.raw[0] <<  8) |
-						    (skb->mac.raw[1] <<  0));
-	pTrans->TransactionDetails[1] = cpu_to_le32((skb->mac.raw[2] << 24) |
-						    (skb->mac.raw[3] << 16) |
-						    (skb->mac.raw[4] <<  8) |
-						    (skb->mac.raw[5] <<  0));
+						    (mac[0] <<  8) |
+						    (mac[1] <<  0));
+	pTrans->TransactionDetails[1] = cpu_to_le32((mac[2] << 24) |
+						    (mac[3] << 16) |
+						    (mac[4] <<  8) |
+						    (mac[5] <<  0));
 
 	pSimple = (SGESimple64_t *) &pTrans->TransactionDetails[2];
 
@@ -930,7 +932,7 @@ mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg)
 		pci_dma_sync_single_for_cpu(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
 					    priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
 
-		memcpy(skb_put(skb, len), old_skb->data, len);
+		skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
 
 		pci_dma_sync_single_for_device(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
 					       priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
@@ -1091,7 +1093,7 @@ mpt_lan_receive_post_reply(struct net_device *dev,
 						    priv->RcvCtl[ctx].dma,
 						    priv->RcvCtl[ctx].len,
 						    PCI_DMA_FROMDEVICE);
-			memcpy(skb_put(skb, l), old_skb->data, l);
+			skb_copy_from_linear_data(old_skb, skb_put(skb, l), l);
 
 			pci_dma_sync_single_for_device(mpt_dev->pcidev,
 						       priv->RcvCtl[ctx].dma,
@@ -1120,7 +1122,7 @@ mpt_lan_receive_post_reply(struct net_device *dev,
 					    priv->RcvCtl[ctx].len,
 					    PCI_DMA_FROMDEVICE);
 
-		memcpy(skb_put(skb, len), old_skb->data, len);
+		skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
 
 		pci_dma_sync_single_for_device(mpt_dev->pcidev,
 					       priv->RcvCtl[ctx].dma,
@@ -1549,7 +1551,7 @@ mpt_lan_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct mpt_lan_ohdr *fch = (struct mpt_lan_ohdr *)skb->data;
 	struct fcllc *fcllc;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, sizeof(struct mpt_lan_ohdr));
 
 	if (fch->dtype == htons(0xffff)) {
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index fedf9b7eae5..c1b47db29bd 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -287,4 +287,6 @@ source "drivers/mtd/nand/Kconfig"
 
 source "drivers/mtd/onenand/Kconfig"
 
+source "drivers/mtd/ubi/Kconfig"
+
 endif # MTD
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index c130e6261ad..92055405cb3 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -28,3 +28,5 @@ nftl-objs		:= nftlcore.o nftlmount.o
 inftl-objs		:= inftlcore.o inftlmount.o
 
 obj-y		+= chips/ maps/ devices/ nand/ onenand/
+
+obj-$(CONFIG_MTD_UBI)		+= ubi/
diff --git a/drivers/mtd/maps/sun_uflash.c b/drivers/mtd/maps/sun_uflash.c
index 4db2055cee3..001af7f7ddd 100644
--- a/drivers/mtd/maps/sun_uflash.c
+++ b/drivers/mtd/maps/sun_uflash.c
@@ -39,7 +39,7 @@ MODULE_VERSION("2.0");
 
 static LIST_HEAD(device_list);
 struct uflash_dev {
-	char			*name;	/* device name */
+	const char		*name;	/* device name */
 	struct map_info 	map;	/* mtd map info */
 	struct mtd_info		*mtd;	/* mtd info */
 };
@@ -80,7 +80,7 @@ int uflash_devinit(struct linux_ebus_device *edev, struct device_node *dp)
 
 	up->name = of_get_property(dp, "model", NULL);
 	if (up->name && 0 < strlen(up->name))
-		up->map.name = up->name;
+		up->map.name = (char *)up->name;
 
 	up->map.phys = res->start;
 
diff --git a/drivers/mtd/ubi/Kconfig b/drivers/mtd/ubi/Kconfig
new file mode 100644
index 00000000000..b9daf159a4a
--- /dev/null
+++ b/drivers/mtd/ubi/Kconfig
@@ -0,0 +1,58 @@
+# drivers/mtd/ubi/Kconfig
+
+menu "UBI - Unsorted block images"
+	depends on MTD
+
+config MTD_UBI
+	tristate "Enable UBI"
+	depends on MTD
+	select CRC32
+	help
+	  UBI is a software layer above MTD layer which admits of LVM-like
+	  logical volumes on top of MTD devices, hides some complexities of
+	  flash chips like wear and bad blocks and provides some other useful
+	  capabilities. Please, consult the MTD web site for more details
+	  (www.linux-mtd.infradead.org).
+
+config MTD_UBI_WL_THRESHOLD
+	int "UBI wear-leveling threshold"
+	default 4096
+	range 2 65536
+	depends on MTD_UBI
+	help
+	  This parameter defines the maximum difference between the highest
+	  erase counter value and the lowest erase counter value of eraseblocks
+	  of UBI devices. When this threshold is exceeded, UBI starts performing
+	  wear leveling by means of moving data from eraseblock with low erase
+	  counter to eraseblocks with high erase counter. Leave the default
+	  value if unsure.
+
+config MTD_UBI_BEB_RESERVE
+	int "Percentage of reserved eraseblocks for bad eraseblocks handling"
+	default 1
+	range 0 25
+	depends on MTD_UBI
+	help
+	  If the MTD device admits of bad eraseblocks (e.g. NAND flash), UBI
+	  reserves some amount of physical eraseblocks to handle new bad
+	  eraseblocks. For example, if a flash physical eraseblock becomes bad,
+	  UBI uses these reserved physical eraseblocks to relocate the bad one.
+	  This option specifies how many physical eraseblocks will be reserved
+	  for bad eraseblock handling (percents of total number of good flash
+	  eraseblocks). If the underlying flash does not admit of bad
+	  eraseblocks (e.g. NOR flash), this value is ignored and nothing is
+	  reserved. Leave the default value if unsure.
+
+config MTD_UBI_GLUEBI
+	bool "Emulate MTD devices"
+	default n
+	depends on MTD_UBI
+	help
+	   This option enables MTD devices emulation on top of UBI volumes: for
+	   each UBI volumes an MTD device is created, and all I/O to this MTD
+	   device is redirected to the UBI volume. This is handy to make
+	   MTD-oriented software (like JFFS2) work on top of UBI. Do not enable
+	   this if no legacy software will be used.
+
+source "drivers/mtd/ubi/Kconfig.debug"
+endmenu
diff --git a/drivers/mtd/ubi/Kconfig.debug b/drivers/mtd/ubi/Kconfig.debug
new file mode 100644
index 00000000000..1e2ee22edef
--- /dev/null
+++ b/drivers/mtd/ubi/Kconfig.debug
@@ -0,0 +1,104 @@
+comment "UBI debugging options"
+	depends on MTD_UBI
+
+config MTD_UBI_DEBUG
+	bool "UBI debugging"
+	depends on SYSFS
+	depends on MTD_UBI
+	select DEBUG_FS
+	select KALLSYMS_ALL
+	help
+	  This option enables UBI debugging.
+
+config MTD_UBI_DEBUG_MSG
+	bool "UBI debugging messages"
+	depends on MTD_UBI_DEBUG
+	default n
+	help
+	  This option enables UBI debugging messages.
+
+config MTD_UBI_DEBUG_PARANOID
+	bool "Extra self-checks"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  This option enables extra checks in UBI code. Note this slows UBI down
+	  significantly.
+
+config MTD_UBI_DEBUG_DISABLE_BGT
+	bool "Do not enable the UBI background thread"
+	depends on MTD_UBI_DEBUG
+	default n
+	help
+	  This option switches the background thread off by default. The thread
+	  may be also be enabled/disabled via UBI sysfs.
+
+config MTD_UBI_DEBUG_USERSPACE_IO
+	bool "Direct user-space write/erase support"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  By default, users cannot directly write and erase individual
+	  eraseblocks of dynamic volumes, and have to use update operation
+	  instead. This option enables this capability - it is very useful for
+	  debugging and testing.
+
+config MTD_UBI_DEBUG_EMULATE_BITFLIPS
+	bool "Emulate flash bit-flips"
+	depends on MTD_UBI_DEBUG
+	default n
+	help
+	  This option emulates bit-flips with probability 1/50, which in turn
+	  causes scrubbing. Useful for debugging and stressing UBI.
+
+config MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES
+	bool "Emulate flash write failures"
+	depends on MTD_UBI_DEBUG
+	default n
+	help
+	  This option emulates write failures with probability 1/100. Useful for
+	  debugging and testing how UBI handlines errors.
+
+config MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES
+	bool "Emulate flash erase failures"
+	depends on MTD_UBI_DEBUG
+	default n
+	help
+	  This option emulates erase failures with probability 1/100. Useful for
+	  debugging and testing how UBI handlines errors.
+
+menu "Additional UBI debugging messages"
+	depends on MTD_UBI_DEBUG
+
+config MTD_UBI_DEBUG_MSG_BLD
+	bool "Additional UBI initialization and build messages"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  This option enables detailed UBI initialization and device build
+	  debugging messages.
+
+config MTD_UBI_DEBUG_MSG_EBA
+	bool "Eraseblock association unit messages"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  This option enables debugging messages from the UBI eraseblock
+	  association unit.
+
+config MTD_UBI_DEBUG_MSG_WL
+	bool "Wear-leveling unit messages"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  This option enables debugging messages from the UBI wear-leveling
+	  unit.
+
+config MTD_UBI_DEBUG_MSG_IO
+	bool "Input/output unit messages"
+	default n
+	depends on MTD_UBI_DEBUG
+	help
+	  This option enables debugging messages from the UBI input/output unit.
+
+endmenu # UBI debugging messages
diff --git a/drivers/mtd/ubi/Makefile b/drivers/mtd/ubi/Makefile
new file mode 100644
index 00000000000..dd834e04151
--- /dev/null
+++ b/drivers/mtd/ubi/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_MTD_UBI) += ubi.o
+
+ubi-y += vtbl.o vmt.o upd.o build.o cdev.o kapi.o eba.o io.o wl.o scan.o
+ubi-y += misc.o
+
+ubi-$(CONFIG_MTD_UBI_DEBUG) += debug.o
+ubi-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
new file mode 100644
index 00000000000..555d594d181
--- /dev/null
+++ b/drivers/mtd/ubi/build.c
@@ -0,0 +1,848 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ * Copyright (c) Nokia Corporation, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём),
+ *         Frank Haverkamp
+ */
+
+/*
+ * This file includes UBI initialization and building of UBI devices. At the
+ * moment UBI devices may only be added while UBI is initialized, but dynamic
+ * device add/remove functionality is planned. Also, at the moment we only
+ * attach UBI devices by scanning, which will become a bottleneck when flashes
+ * reach certain large size. Then one may improve UBI and add other methods.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/stringify.h>
+#include <linux/stat.h>
+#include "ubi.h"
+
+/* Maximum length of the 'mtd=' parameter */
+#define MTD_PARAM_LEN_MAX 64
+
+/**
+ * struct mtd_dev_param - MTD device parameter description data structure.
+ * @name: MTD device name or number string
+ * @vid_hdr_offs: VID header offset
+ * @data_offs: data offset
+ */
+struct mtd_dev_param
+{
+	char name[MTD_PARAM_LEN_MAX];
+	int vid_hdr_offs;
+	int data_offs;
+};
+
+/* Numbers of elements set in the @mtd_dev_param array */
+static int mtd_devs = 0;
+
+/* MTD devices specification parameters */
+static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
+
+/* Number of UBI devices in system */
+int ubi_devices_cnt;
+
+/* All UBI devices in system */
+struct ubi_device *ubi_devices[UBI_MAX_DEVICES];
+
+/* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */
+struct class *ubi_class;
+
+/* "Show" method for files in '/<sysfs>/class/ubi/' */
+static ssize_t ubi_version_show(struct class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", UBI_VERSION);
+}
+
+/* UBI version attribute ('/<sysfs>/class/ubi/version') */
+static struct class_attribute ubi_version =
+	__ATTR(version, S_IRUGO, ubi_version_show, NULL);
+
+static ssize_t dev_attribute_show(struct device *dev,
+				  struct device_attribute *attr, char *buf);
+
+/* UBI device attributes (correspond to files in '/<sysfs>/class/ubi/ubiX') */
+static struct device_attribute dev_eraseblock_size =
+	__ATTR(eraseblock_size, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_avail_eraseblocks =
+	__ATTR(avail_eraseblocks, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_total_eraseblocks =
+	__ATTR(total_eraseblocks, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_volumes_count =
+	__ATTR(volumes_count, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_max_ec =
+	__ATTR(max_ec, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_reserved_for_bad =
+	__ATTR(reserved_for_bad, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_bad_peb_count =
+	__ATTR(bad_peb_count, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_max_vol_count =
+	__ATTR(max_vol_count, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_min_io_size =
+	__ATTR(min_io_size, S_IRUGO, dev_attribute_show, NULL);
+static struct device_attribute dev_bgt_enabled =
+	__ATTR(bgt_enabled, S_IRUGO, dev_attribute_show, NULL);
+
+/* "Show" method for files in '/<sysfs>/class/ubi/ubiX/' */
+static ssize_t dev_attribute_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	const struct ubi_device *ubi;
+
+	ubi = container_of(dev, struct ubi_device, dev);
+	if (attr == &dev_eraseblock_size)
+		return sprintf(buf, "%d\n", ubi->leb_size);
+	else if (attr == &dev_avail_eraseblocks)
+		return sprintf(buf, "%d\n", ubi->avail_pebs);
+	else if (attr == &dev_total_eraseblocks)
+		return sprintf(buf, "%d\n", ubi->good_peb_count);
+	else if (attr == &dev_volumes_count)
+		return sprintf(buf, "%d\n", ubi->vol_count);
+	else if (attr == &dev_max_ec)
+		return sprintf(buf, "%d\n", ubi->max_ec);
+	else if (attr == &dev_reserved_for_bad)
+		return sprintf(buf, "%d\n", ubi->beb_rsvd_pebs);
+	else if (attr == &dev_bad_peb_count)
+		return sprintf(buf, "%d\n", ubi->bad_peb_count);
+	else if (attr == &dev_max_vol_count)
+		return sprintf(buf, "%d\n", ubi->vtbl_slots);
+	else if (attr == &dev_min_io_size)
+		return sprintf(buf, "%d\n", ubi->min_io_size);
+	else if (attr == &dev_bgt_enabled)
+		return sprintf(buf, "%d\n", ubi->thread_enabled);
+	else
+		BUG();
+
+	return 0;
+}
+
+/* Fake "release" method for UBI devices */
+static void dev_release(struct device *dev) { }
+
+/**
+ * ubi_sysfs_init - initialize sysfs for an UBI device.
+ * @ubi: UBI device description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int ubi_sysfs_init(struct ubi_device *ubi)
+{
+	int err;
+
+	ubi->dev.release = dev_release;
+	ubi->dev.devt = MKDEV(ubi->major, 0);
+	ubi->dev.class = ubi_class;
+	sprintf(&ubi->dev.bus_id[0], UBI_NAME_STR"%d", ubi->ubi_num);
+	err = device_register(&ubi->dev);
+	if (err)
+		goto out;
+
+	err = device_create_file(&ubi->dev, &dev_eraseblock_size);
+	if (err)
+		goto out_unregister;
+	err = device_create_file(&ubi->dev, &dev_avail_eraseblocks);
+	if (err)
+		goto out_eraseblock_size;
+	err = device_create_file(&ubi->dev, &dev_total_eraseblocks);
+	if (err)
+		goto out_avail_eraseblocks;
+	err = device_create_file(&ubi->dev, &dev_volumes_count);
+	if (err)
+		goto out_total_eraseblocks;
+	err = device_create_file(&ubi->dev, &dev_max_ec);
+	if (err)
+		goto out_volumes_count;
+	err = device_create_file(&ubi->dev, &dev_reserved_for_bad);
+	if (err)
+		goto out_volumes_max_ec;
+	err = device_create_file(&ubi->dev, &dev_bad_peb_count);
+	if (err)
+		goto out_reserved_for_bad;
+	err = device_create_file(&ubi->dev, &dev_max_vol_count);
+	if (err)
+		goto out_bad_peb_count;
+	err = device_create_file(&ubi->dev, &dev_min_io_size);
+	if (err)
+		goto out_max_vol_count;
+	err = device_create_file(&ubi->dev, &dev_bgt_enabled);
+	if (err)
+		goto out_min_io_size;
+
+	return 0;
+
+out_min_io_size:
+	device_remove_file(&ubi->dev, &dev_min_io_size);
+out_max_vol_count:
+	device_remove_file(&ubi->dev, &dev_max_vol_count);
+out_bad_peb_count:
+	device_remove_file(&ubi->dev, &dev_bad_peb_count);
+out_reserved_for_bad:
+	device_remove_file(&ubi->dev, &dev_reserved_for_bad);
+out_volumes_max_ec:
+	device_remove_file(&ubi->dev, &dev_max_ec);
+out_volumes_count:
+	device_remove_file(&ubi->dev, &dev_volumes_count);
+out_total_eraseblocks:
+	device_remove_file(&ubi->dev, &dev_total_eraseblocks);
+out_avail_eraseblocks:
+	device_remove_file(&ubi->dev, &dev_avail_eraseblocks);
+out_eraseblock_size:
+	device_remove_file(&ubi->dev, &dev_eraseblock_size);
+out_unregister:
+	device_unregister(&ubi->dev);
+out:
+	ubi_err("failed to initialize sysfs for %s", ubi->ubi_name);
+	return err;
+}
+
+/**
+ * ubi_sysfs_close - close sysfs for an UBI device.
+ * @ubi: UBI device description object
+ */
+static void ubi_sysfs_close(struct ubi_device *ubi)
+{
+	device_remove_file(&ubi->dev, &dev_bgt_enabled);
+	device_remove_file(&ubi->dev, &dev_min_io_size);
+	device_remove_file(&ubi->dev, &dev_max_vol_count);
+	device_remove_file(&ubi->dev, &dev_bad_peb_count);
+	device_remove_file(&ubi->dev, &dev_reserved_for_bad);
+	device_remove_file(&ubi->dev, &dev_max_ec);
+	device_remove_file(&ubi->dev, &dev_volumes_count);
+	device_remove_file(&ubi->dev, &dev_total_eraseblocks);
+	device_remove_file(&ubi->dev, &dev_avail_eraseblocks);
+	device_remove_file(&ubi->dev, &dev_eraseblock_size);
+	device_unregister(&ubi->dev);
+}
+
+/**
+ * kill_volumes - destroy all volumes.
+ * @ubi: UBI device description object
+ */
+static void kill_volumes(struct ubi_device *ubi)
+{
+	int i;
+
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		if (ubi->volumes[i])
+			ubi_free_volume(ubi, i);
+}
+
+/**
+ * uif_init - initialize user interfaces for an UBI device.
+ * @ubi: UBI device description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int uif_init(struct ubi_device *ubi)
+{
+	int i, err;
+	dev_t dev;
+
+	mutex_init(&ubi->vtbl_mutex);
+	spin_lock_init(&ubi->volumes_lock);
+
+	sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num);
+
+	/*
+	 * Major numbers for the UBI character devices are allocated
+	 * dynamically. Major numbers of volume character devices are
+	 * equivalent to ones of the corresponding UBI character device. Minor
+	 * numbers of UBI character devices are 0, while minor numbers of
+	 * volume character devices start from 1. Thus, we allocate one major
+	 * number and ubi->vtbl_slots + 1 minor numbers.
+	 */
+	err = alloc_chrdev_region(&dev, 0, ubi->vtbl_slots + 1, ubi->ubi_name);
+	if (err) {
+		ubi_err("cannot register UBI character devices");
+		return err;
+	}
+
+	cdev_init(&ubi->cdev, &ubi_cdev_operations);
+	ubi->major = MAJOR(dev);
+	dbg_msg("%s major is %u", ubi->ubi_name, ubi->major);
+	ubi->cdev.owner = THIS_MODULE;
+
+	dev = MKDEV(ubi->major, 0);
+	err = cdev_add(&ubi->cdev, dev, 1);
+	if (err) {
+		ubi_err("cannot add character device %s", ubi->ubi_name);
+		goto out_unreg;
+	}
+
+	err = ubi_sysfs_init(ubi);
+	if (err)
+		goto out_cdev;
+
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		if (ubi->volumes[i]) {
+			err = ubi_add_volume(ubi, i);
+			if (err)
+				goto out_volumes;
+		}
+
+	return 0;
+
+out_volumes:
+	kill_volumes(ubi);
+	ubi_sysfs_close(ubi);
+out_cdev:
+	cdev_del(&ubi->cdev);
+out_unreg:
+	unregister_chrdev_region(MKDEV(ubi->major, 0),
+				 ubi->vtbl_slots + 1);
+	return err;
+}
+
+/**
+ * uif_close - close user interfaces for an UBI device.
+ * @ubi: UBI device description object
+ */
+static void uif_close(struct ubi_device *ubi)
+{
+	kill_volumes(ubi);
+	ubi_sysfs_close(ubi);
+	cdev_del(&ubi->cdev);
+	unregister_chrdev_region(MKDEV(ubi->major, 0), ubi->vtbl_slots + 1);
+}
+
+/**
+ * attach_by_scanning - attach an MTD device using scanning method.
+ * @ubi: UBI device descriptor
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ *
+ * Note, currently this is the only method to attach UBI devices. Hopefully in
+ * the future we'll have more scalable attaching methods and avoid full media
+ * scanning. But even in this case scanning will be needed as a fall-back
+ * attaching method if there are some on-flash table corruptions.
+ */
+static int attach_by_scanning(struct ubi_device *ubi)
+{
+	int err;
+	struct ubi_scan_info *si;
+
+	si = ubi_scan(ubi);
+	if (IS_ERR(si))
+		return PTR_ERR(si);
+
+	ubi->bad_peb_count = si->bad_peb_count;
+	ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count;
+	ubi->max_ec = si->max_ec;
+	ubi->mean_ec = si->mean_ec;
+
+	err = ubi_read_volume_table(ubi, si);
+	if (err)
+		goto out_si;
+
+	err = ubi_wl_init_scan(ubi, si);
+	if (err)
+		goto out_vtbl;
+
+	err = ubi_eba_init_scan(ubi, si);
+	if (err)
+		goto out_wl;
+
+	ubi_scan_destroy_si(si);
+	return 0;
+
+out_wl:
+	ubi_wl_close(ubi);
+out_vtbl:
+	kfree(ubi->vtbl);
+out_si:
+	ubi_scan_destroy_si(si);
+	return err;
+}
+
+/**
+ * io_init - initialize I/O unit for a given UBI device.
+ * @ubi: UBI device description object
+ *
+ * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are
+ * assumed:
+ *   o EC header is always at offset zero - this cannot be changed;
+ *   o VID header starts just after the EC header at the closest address
+ *   aligned to @io->@hdrs_min_io_size;
+ *   o data starts just after the VID header at the closest address aligned to
+ *     @io->@min_io_size
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int io_init(struct ubi_device *ubi)
+{
+	if (ubi->mtd->numeraseregions != 0) {
+		/*
+		 * Some flashes have several erase regions. Different regions
+		 * may have different eraseblock size and other
+		 * characteristics. It looks like mostly multi-region flashes
+		 * have one "main" region and one or more small regions to
+		 * store boot loader code or boot parameters or whatever. I
+		 * guess we should just pick the largest region. But this is
+		 * not implemented.
+		 */
+		ubi_err("multiple regions, not implemented");
+		return -EINVAL;
+	}
+
+	/*
+	 * Note, in this implementation we support MTD devices with 0x7FFFFFFF
+	 * physical eraseblocks maximum.
+	 */
+
+	ubi->peb_size   = ubi->mtd->erasesize;
+	ubi->peb_count  = ubi->mtd->size / ubi->mtd->erasesize;
+	ubi->flash_size = ubi->mtd->size;
+
+	if (ubi->mtd->block_isbad && ubi->mtd->block_markbad)
+		ubi->bad_allowed = 1;
+
+	ubi->min_io_size = ubi->mtd->writesize;
+	ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
+
+	/* Make sure minimal I/O unit is power of 2 */
+	if (ubi->min_io_size == 0 ||
+	    (ubi->min_io_size & (ubi->min_io_size - 1))) {
+		ubi_err("bad min. I/O unit");
+		return -EINVAL;
+	}
+
+	ubi_assert(ubi->hdrs_min_io_size > 0);
+	ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size);
+	ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0);
+
+	/* Calculate default aligned sizes of EC and VID headers */
+	ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size);
+	ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size);
+
+	dbg_msg("min_io_size      %d", ubi->min_io_size);
+	dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size);
+	dbg_msg("ec_hdr_alsize    %d", ubi->ec_hdr_alsize);
+	dbg_msg("vid_hdr_alsize   %d", ubi->vid_hdr_alsize);
+
+	if (ubi->vid_hdr_offset == 0)
+		/* Default offset */
+		ubi->vid_hdr_offset = ubi->vid_hdr_aloffset =
+				      ubi->ec_hdr_alsize;
+	else {
+		ubi->vid_hdr_aloffset = ubi->vid_hdr_offset &
+						~(ubi->hdrs_min_io_size - 1);
+		ubi->vid_hdr_shift = ubi->vid_hdr_offset -
+						ubi->vid_hdr_aloffset;
+	}
+
+	/* Similar for the data offset */
+	if (ubi->leb_start == 0) {
+		ubi->leb_start = ubi->vid_hdr_offset + ubi->vid_hdr_alsize;
+		ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size);
+	}
+
+	dbg_msg("vid_hdr_offset   %d", ubi->vid_hdr_offset);
+	dbg_msg("vid_hdr_aloffset %d", ubi->vid_hdr_aloffset);
+	dbg_msg("vid_hdr_shift    %d", ubi->vid_hdr_shift);
+	dbg_msg("leb_start        %d", ubi->leb_start);
+
+	/* The shift must be aligned to 32-bit boundary */
+	if (ubi->vid_hdr_shift % 4) {
+		ubi_err("unaligned VID header shift %d",
+			ubi->vid_hdr_shift);
+		return -EINVAL;
+	}
+
+	/* Check sanity */
+	if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE ||
+	    ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE ||
+	    ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE ||
+	    ubi->leb_start % ubi->min_io_size) {
+		ubi_err("bad VID header (%d) or data offsets (%d)",
+			ubi->vid_hdr_offset, ubi->leb_start);
+		return -EINVAL;
+	}
+
+	/*
+	 * It may happen that EC and VID headers are situated in one minimal
+	 * I/O unit. In this case we can only accept this UBI image in
+	 * read-only mode.
+	 */
+	if (ubi->vid_hdr_offset + UBI_VID_HDR_SIZE <= ubi->hdrs_min_io_size) {
+		ubi_warn("EC and VID headers are in the same minimal I/O unit, "
+			 "switch to read-only mode");
+		ubi->ro_mode = 1;
+	}
+
+	ubi->leb_size = ubi->peb_size - ubi->leb_start;
+
+	if (!(ubi->mtd->flags & MTD_WRITEABLE)) {
+		ubi_msg("MTD device %d is write-protected, attach in "
+			"read-only mode", ubi->mtd->index);
+		ubi->ro_mode = 1;
+	}
+
+	dbg_msg("leb_size         %d", ubi->leb_size);
+	dbg_msg("ro_mode          %d", ubi->ro_mode);
+
+	/*
+	 * Note, ideally, we have to initialize ubi->bad_peb_count here. But
+	 * unfortunately, MTD does not provide this information. We should loop
+	 * over all physical eraseblocks and invoke mtd->block_is_bad() for
+	 * each physical eraseblock. So, we skip ubi->bad_peb_count
+	 * uninitialized and initialize it after scanning.
+	 */
+
+	return 0;
+}
+
+/**
+ * attach_mtd_dev - attach an MTD device.
+ * @mtd_dev: MTD device name or number string
+ * @vid_hdr_offset: VID header offset
+ * @data_offset: data offset
+ *
+ * This function attaches an MTD device to UBI. It first treats @mtd_dev as the
+ * MTD device name, and tries to open it by this name. If it is unable to open,
+ * it tries to convert @mtd_dev to an integer and open the MTD device by its
+ * number. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int attach_mtd_dev(const char *mtd_dev, int vid_hdr_offset,
+			  int data_offset)
+{
+	struct ubi_device *ubi;
+	struct mtd_info *mtd;
+	int i, err;
+
+	mtd = get_mtd_device_nm(mtd_dev);
+	if (IS_ERR(mtd)) {
+		int mtd_num;
+		char *endp;
+
+		if (PTR_ERR(mtd) != -ENODEV)
+			return PTR_ERR(mtd);
+
+		/*
+		 * Probably this is not MTD device name but MTD device number -
+		 * check this out.
+		 */
+		mtd_num = simple_strtoul(mtd_dev, &endp, 0);
+		if (*endp != '\0' || mtd_dev == endp) {
+			ubi_err("incorrect MTD device: \"%s\"", mtd_dev);
+			return -ENODEV;
+		}
+
+		mtd = get_mtd_device(NULL, mtd_num);
+		if (IS_ERR(mtd))
+			return PTR_ERR(mtd);
+	}
+
+	/* Check if we already have the same MTD device attached */
+	for (i = 0; i < ubi_devices_cnt; i++)
+		if (ubi_devices[i]->mtd->index == mtd->index) {
+			ubi_err("mtd%d is already attached to ubi%d",
+				mtd->index, i);
+			err = -EINVAL;
+			goto out_mtd;
+		}
+
+	ubi = ubi_devices[ubi_devices_cnt] = kzalloc(sizeof(struct ubi_device),
+						      GFP_KERNEL);
+	if (!ubi) {
+		err = -ENOMEM;
+		goto out_mtd;
+	}
+
+	ubi->ubi_num = ubi_devices_cnt;
+	ubi->mtd = mtd;
+
+	dbg_msg("attaching mtd%d to ubi%d: VID header offset %d data offset %d",
+		ubi->mtd->index, ubi_devices_cnt, vid_hdr_offset, data_offset);
+
+	ubi->vid_hdr_offset = vid_hdr_offset;
+	ubi->leb_start = data_offset;
+	err = io_init(ubi);
+	if (err)
+		goto out_free;
+
+	err = attach_by_scanning(ubi);
+	if (err) {
+		dbg_err("failed to attach by scanning, error %d", err);
+		goto out_free;
+	}
+
+	err = uif_init(ubi);
+	if (err)
+		goto out_detach;
+
+	ubi_devices_cnt += 1;
+
+	ubi_msg("attached mtd%d to ubi%d", ubi->mtd->index, ubi_devices_cnt);
+	ubi_msg("MTD device name:            \"%s\"", ubi->mtd->name);
+	ubi_msg("MTD device size:            %llu MiB", ubi->flash_size >> 20);
+	ubi_msg("physical eraseblock size:   %d bytes (%d KiB)",
+		ubi->peb_size, ubi->peb_size >> 10);
+	ubi_msg("logical eraseblock size:    %d bytes", ubi->leb_size);
+	ubi_msg("number of good PEBs:        %d", ubi->good_peb_count);
+	ubi_msg("number of bad PEBs:         %d", ubi->bad_peb_count);
+	ubi_msg("smallest flash I/O unit:    %d", ubi->min_io_size);
+	ubi_msg("VID header offset:          %d (aligned %d)",
+		ubi->vid_hdr_offset, ubi->vid_hdr_aloffset);
+	ubi_msg("data offset:                %d", ubi->leb_start);
+	ubi_msg("max. allowed volumes:       %d", ubi->vtbl_slots);
+	ubi_msg("wear-leveling threshold:    %d", CONFIG_MTD_UBI_WL_THRESHOLD);
+	ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT);
+	ubi_msg("number of user volumes:     %d",
+		ubi->vol_count - UBI_INT_VOL_COUNT);
+	ubi_msg("available PEBs:             %d", ubi->avail_pebs);
+	ubi_msg("total number of reserved PEBs: %d", ubi->rsvd_pebs);
+	ubi_msg("number of PEBs reserved for bad PEB handling: %d",
+		ubi->beb_rsvd_pebs);
+	ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec);
+
+	/* Enable the background thread */
+	if (!DBG_DISABLE_BGT) {
+		ubi->thread_enabled = 1;
+		wake_up_process(ubi->bgt_thread);
+	}
+
+	return 0;
+
+out_detach:
+	ubi_eba_close(ubi);
+	ubi_wl_close(ubi);
+	kfree(ubi->vtbl);
+out_free:
+	kfree(ubi);
+out_mtd:
+	put_mtd_device(mtd);
+	ubi_devices[ubi_devices_cnt] = NULL;
+	return err;
+}
+
+/**
+ * detach_mtd_dev - detach an MTD device.
+ * @ubi: UBI device description object
+ */
+static void detach_mtd_dev(struct ubi_device *ubi)
+{
+	int ubi_num = ubi->ubi_num, mtd_num = ubi->mtd->index;
+
+	dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num);
+	uif_close(ubi);
+	ubi_eba_close(ubi);
+	ubi_wl_close(ubi);
+	kfree(ubi->vtbl);
+	put_mtd_device(ubi->mtd);
+	kfree(ubi_devices[ubi_num]);
+	ubi_devices[ubi_num] = NULL;
+	ubi_devices_cnt -= 1;
+	ubi_assert(ubi_devices_cnt >= 0);
+	ubi_msg("mtd%d is detached from ubi%d", mtd_num, ubi_num);
+}
+
+static int __init ubi_init(void)
+{
+	int err, i, k;
+
+	/* Ensure that EC and VID headers have correct size */
+	BUILD_BUG_ON(sizeof(struct ubi_ec_hdr) != 64);
+	BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64);
+
+	if (mtd_devs > UBI_MAX_DEVICES) {
+		printk("UBI error: too many MTD devices, maximum is %d\n",
+		       UBI_MAX_DEVICES);
+		return -EINVAL;
+	}
+
+	ubi_class = class_create(THIS_MODULE, UBI_NAME_STR);
+	if (IS_ERR(ubi_class))
+		return PTR_ERR(ubi_class);
+
+	err = class_create_file(ubi_class, &ubi_version);
+	if (err)
+		goto out_class;
+
+	/* Attach MTD devices */
+	for (i = 0; i < mtd_devs; i++) {
+		struct mtd_dev_param *p = &mtd_dev_param[i];
+
+		cond_resched();
+
+		if (!p->name) {
+			dbg_err("empty name");
+			err = -EINVAL;
+			goto out_detach;
+		}
+
+		err = attach_mtd_dev(p->name, p->vid_hdr_offs, p->data_offs);
+		if (err)
+			goto out_detach;
+	}
+
+	return 0;
+
+out_detach:
+	for (k = 0; k < i; k++)
+		detach_mtd_dev(ubi_devices[k]);
+	class_remove_file(ubi_class, &ubi_version);
+out_class:
+	class_destroy(ubi_class);
+	return err;
+}
+module_init(ubi_init);
+
+static void __exit ubi_exit(void)
+{
+	int i, n = ubi_devices_cnt;
+
+	for (i = 0; i < n; i++)
+		detach_mtd_dev(ubi_devices[i]);
+	class_remove_file(ubi_class, &ubi_version);
+	class_destroy(ubi_class);
+}
+module_exit(ubi_exit);
+
+/**
+ * bytes_str_to_int - convert a string representing number of bytes to an
+ * integer.
+ * @str: the string to convert
+ *
+ * This function returns positive resulting integer in case of success and a
+ * negative error code in case of failure.
+ */
+static int __init bytes_str_to_int(const char *str)
+{
+	char *endp;
+	unsigned long result;
+
+	result = simple_strtoul(str, &endp, 0);
+	if (str == endp || result < 0) {
+		printk("UBI error: incorrect bytes count: \"%s\"\n", str);
+		return -EINVAL;
+	}
+
+	switch (*endp) {
+	case 'G':
+		result *= 1024;
+	case 'M':
+		result *= 1024;
+	case 'K':
+	case 'k':
+		result *= 1024;
+		if (endp[1] == 'i' && (endp[2] == '\0' ||
+			  endp[2] == 'B'  || endp[2] == 'b'))
+			endp += 2;
+	case '\0':
+		break;
+	default:
+		printk("UBI error: incorrect bytes count: \"%s\"\n", str);
+		return -EINVAL;
+	}
+
+	return result;
+}
+
+/**
+ * ubi_mtd_param_parse - parse the 'mtd=' UBI parameter.
+ * @val: the parameter value to parse
+ * @kp: not used
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of error.
+ */
+static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
+{
+	int i, len;
+	struct mtd_dev_param *p;
+	char buf[MTD_PARAM_LEN_MAX];
+	char *pbuf = &buf[0];
+	char *tokens[3] = {NULL, NULL, NULL};
+
+	if (mtd_devs == UBI_MAX_DEVICES) {
+		printk("UBI error: too many parameters, max. is %d\n",
+		       UBI_MAX_DEVICES);
+		return -EINVAL;
+	}
+
+	len = strnlen(val, MTD_PARAM_LEN_MAX);
+	if (len == MTD_PARAM_LEN_MAX) {
+		printk("UBI error: parameter \"%s\" is too long, max. is %d\n",
+		       val, MTD_PARAM_LEN_MAX);
+		return -EINVAL;
+	}
+
+	if (len == 0) {
+		printk("UBI warning: empty 'mtd=' parameter - ignored\n");
+		return 0;
+	}
+
+	strcpy(buf, val);
+
+	/* Get rid of the final newline */
+	if (buf[len - 1] == '\n')
+		buf[len - 1] = 0;
+
+	for (i = 0; i < 3; i++)
+		tokens[i] = strsep(&pbuf, ",");
+
+	if (pbuf) {
+		printk("UBI error: too many arguments at \"%s\"\n", val);
+		return -EINVAL;
+	}
+
+	if (tokens[0] == '\0')
+		return -EINVAL;
+
+	p = &mtd_dev_param[mtd_devs];
+	strcpy(&p->name[0], tokens[0]);
+
+	if (tokens[1])
+		p->vid_hdr_offs = bytes_str_to_int(tokens[1]);
+	if (tokens[2])
+		p->data_offs = bytes_str_to_int(tokens[2]);
+
+	if (p->vid_hdr_offs < 0)
+		return p->vid_hdr_offs;
+	if (p->data_offs < 0)
+		return p->data_offs;
+
+	mtd_devs += 1;
+	return 0;
+}
+
+module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000);
+MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: "
+		      "mtd=<name|num>[,<vid_hdr_offs>,<data_offs>]. "
+		      "Multiple \"mtd\" parameters may be specified.\n"
+		      "MTD devices may be specified by their number or name. "
+		      "Optional \"vid_hdr_offs\" and \"data_offs\" parameters "
+		      "specify UBI VID header position and data starting "
+		      "position to be used by UBI.\n"
+		      "Example: mtd=content,1984,2048 mtd=4 - attach MTD device"
+		      "with name content using VID header offset 1984 and data "
+		      "start 2048, and MTD device number 4 using default "
+		      "offsets");
+
+MODULE_VERSION(__stringify(UBI_VERSION));
+MODULE_DESCRIPTION("UBI - Unsorted Block Images");
+MODULE_AUTHOR("Artem Bityutskiy");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
new file mode 100644
index 00000000000..6612eb79bf1
--- /dev/null
+++ b/drivers/mtd/ubi/cdev.c
@@ -0,0 +1,722 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file includes implementation of UBI character device operations.
+ *
+ * There are two kinds of character devices in UBI: UBI character devices and
+ * UBI volume character devices. UBI character devices allow users to
+ * manipulate whole volumes: create, remove, and re-size them. Volume character
+ * devices provide volume I/O capabilities.
+ *
+ * Major and minor numbers are assigned dynamically to both UBI and volume
+ * character devices.
+ */
+
+#include <linux/module.h>
+#include <linux/stat.h>
+#include <linux/ioctl.h>
+#include <linux/capability.h>
+#include <mtd/ubi-user.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h>
+#include "ubi.h"
+
+/*
+ * Maximum sequence numbers of UBI and volume character device IOCTLs (direct
+ * logical eraseblock erase is a debug-only feature).
+ */
+#define UBI_CDEV_IOC_MAX_SEQ 2
+#ifndef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO
+#define VOL_CDEV_IOC_MAX_SEQ 1
+#else
+#define VOL_CDEV_IOC_MAX_SEQ 2
+#endif
+
+/**
+ * major_to_device - get UBI device object by character device major number.
+ * @major: major number
+ *
+ * This function returns a pointer to the UBI device object.
+ */
+static struct ubi_device *major_to_device(int major)
+{
+	int i;
+
+	for (i = 0; i < ubi_devices_cnt; i++)
+		if (ubi_devices[i] && ubi_devices[i]->major == major)
+			return ubi_devices[i];
+	BUG();
+}
+
+/**
+ * get_exclusive - get exclusive access to an UBI volume.
+ * @desc: volume descriptor
+ *
+ * This function changes UBI volume open mode to "exclusive". Returns previous
+ * mode value (positive integer) in case of success and a negative error code
+ * in case of failure.
+ */
+static int get_exclusive(struct ubi_volume_desc *desc)
+{
+	int users, err;
+	struct ubi_volume *vol = desc->vol;
+
+	spin_lock(&vol->ubi->volumes_lock);
+	users = vol->readers + vol->writers + vol->exclusive;
+	ubi_assert(users > 0);
+	if (users > 1) {
+		dbg_err("%d users for volume %d", users, vol->vol_id);
+		err = -EBUSY;
+	} else {
+		vol->readers = vol->writers = 0;
+		vol->exclusive = 1;
+		err = desc->mode;
+		desc->mode = UBI_EXCLUSIVE;
+	}
+	spin_unlock(&vol->ubi->volumes_lock);
+
+	return err;
+}
+
+/**
+ * revoke_exclusive - revoke exclusive mode.
+ * @desc: volume descriptor
+ * @mode: new mode to switch to
+ */
+static void revoke_exclusive(struct ubi_volume_desc *desc, int mode)
+{
+	struct ubi_volume *vol = desc->vol;
+
+	spin_lock(&vol->ubi->volumes_lock);
+	ubi_assert(vol->readers == 0 && vol->writers == 0);
+	ubi_assert(vol->exclusive == 1 && desc->mode == UBI_EXCLUSIVE);
+	vol->exclusive = 0;
+	if (mode == UBI_READONLY)
+		vol->readers = 1;
+	else if (mode == UBI_READWRITE)
+		vol->writers = 1;
+	else
+		vol->exclusive = 1;
+	spin_unlock(&vol->ubi->volumes_lock);
+
+	desc->mode = mode;
+}
+
+static int vol_cdev_open(struct inode *inode, struct file *file)
+{
+	struct ubi_volume_desc *desc;
+	const struct ubi_device *ubi = major_to_device(imajor(inode));
+	int vol_id = iminor(inode) - 1;
+	int mode;
+
+	if (file->f_mode & FMODE_WRITE)
+		mode = UBI_READWRITE;
+	else
+		mode = UBI_READONLY;
+
+	dbg_msg("open volume %d, mode %d", vol_id, mode);
+
+	desc = ubi_open_volume(ubi->ubi_num, vol_id, mode);
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+
+	file->private_data = desc;
+	return 0;
+}
+
+static int vol_cdev_release(struct inode *inode, struct file *file)
+{
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+
+	dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode);
+
+	if (vol->updating) {
+		ubi_warn("update of volume %d not finished, volume is damaged",
+			 vol->vol_id);
+		vol->updating = 0;
+		kfree(vol->upd_buf);
+	}
+
+	ubi_close_volume(desc);
+	return 0;
+}
+
+static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin)
+{
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+	loff_t new_offset;
+
+	if (vol->updating) {
+		 /* Update is in progress, seeking is prohibited */
+		dbg_err("updating");
+		return -EBUSY;
+	}
+
+	switch (origin) {
+	case 0: /* SEEK_SET */
+		new_offset = offset;
+		break;
+	case 1: /* SEEK_CUR */
+		new_offset = file->f_pos + offset;
+		break;
+	case 2: /* SEEK_END */
+		new_offset = vol->used_bytes + offset;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (new_offset < 0 || new_offset > vol->used_bytes) {
+		dbg_err("bad seek %lld", new_offset);
+		return -EINVAL;
+	}
+
+	dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld",
+		vol->vol_id, offset, origin, new_offset);
+
+	file->f_pos = new_offset;
+	return new_offset;
+}
+
+static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
+			     loff_t *offp)
+{
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int err, lnum, off, len,  vol_id = desc->vol->vol_id, tbuf_size;
+	size_t count_save = count;
+	void *tbuf;
+	uint64_t tmp;
+
+	dbg_msg("read %zd bytes from offset %lld of volume %d",
+		count, *offp, vol_id);
+
+	if (vol->updating) {
+		dbg_err("updating");
+		return -EBUSY;
+	}
+	if (vol->upd_marker) {
+		dbg_err("damaged volume, update marker is set");
+		return -EBADF;
+	}
+	if (*offp == vol->used_bytes || count == 0)
+		return 0;
+
+	if (vol->corrupted)
+		dbg_msg("read from corrupted volume %d", vol_id);
+
+	if (*offp + count > vol->used_bytes)
+		count_save = count = vol->used_bytes - *offp;
+
+	tbuf_size = vol->usable_leb_size;
+	if (count < tbuf_size)
+		tbuf_size = ALIGN(count, ubi->min_io_size);
+	tbuf = kmalloc(tbuf_size, GFP_KERNEL);
+	if (!tbuf)
+		return -ENOMEM;
+
+	len = count > tbuf_size ? tbuf_size : count;
+
+	tmp = *offp;
+	off = do_div(tmp, vol->usable_leb_size);
+	lnum = tmp;
+
+	do {
+		cond_resched();
+
+		if (off + len >= vol->usable_leb_size)
+			len = vol->usable_leb_size - off;
+
+		err = ubi_eba_read_leb(ubi, vol_id, lnum, tbuf, off, len, 0);
+		if (err)
+			break;
+
+		off += len;
+		if (off == vol->usable_leb_size) {
+			lnum += 1;
+			off -= vol->usable_leb_size;
+		}
+
+		count -= len;
+		*offp += len;
+
+		err = copy_to_user(buf, tbuf, len);
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		buf += len;
+		len = count > tbuf_size ? tbuf_size : count;
+	} while (count);
+
+	kfree(tbuf);
+	return err ? err : count_save - count;
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO
+
+/*
+ * This function allows to directly write to dynamic UBI volumes, without
+ * issuing the volume update operation. Available only as a debugging feature.
+ * Very useful for testing UBI.
+ */
+static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
+				     size_t count, loff_t *offp)
+{
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int lnum, off, len, tbuf_size, vol_id = vol->vol_id, err = 0;
+	size_t count_save = count;
+	char *tbuf;
+	uint64_t tmp;
+
+	dbg_msg("requested: write %zd bytes to offset %lld of volume %u",
+		count, *offp, desc->vol->vol_id);
+
+	if (vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	tmp = *offp;
+	off = do_div(tmp, vol->usable_leb_size);
+	lnum = tmp;
+
+	if (off % ubi->min_io_size) {
+		dbg_err("unaligned position");
+		return -EINVAL;
+	}
+
+	if (*offp + count > vol->used_bytes)
+		count_save = count = vol->used_bytes - *offp;
+
+	/* We can write only in fractions of the minimum I/O unit */
+	if (count % ubi->min_io_size) {
+		dbg_err("unaligned write length");
+		return -EINVAL;
+	}
+
+	tbuf_size = vol->usable_leb_size;
+	if (count < tbuf_size)
+		tbuf_size = ALIGN(count, ubi->min_io_size);
+	tbuf = kmalloc(tbuf_size, GFP_KERNEL);
+	if (!tbuf)
+		return -ENOMEM;
+
+	len = count > tbuf_size ? tbuf_size : count;
+
+	while (count) {
+		cond_resched();
+
+		if (off + len >= vol->usable_leb_size)
+			len = vol->usable_leb_size - off;
+
+		err = copy_from_user(tbuf, buf, len);
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = ubi_eba_write_leb(ubi, vol_id, lnum, tbuf, off, len,
+					UBI_UNKNOWN);
+		if (err)
+			break;
+
+		off += len;
+		if (off == vol->usable_leb_size) {
+			lnum += 1;
+			off -= vol->usable_leb_size;
+		}
+
+		count -= len;
+		*offp += len;
+		buf += len;
+		len = count > tbuf_size ? tbuf_size : count;
+	}
+
+	kfree(tbuf);
+	return err ? err : count_save - count;
+}
+
+#else
+#define vol_cdev_direct_write(file, buf, count, offp) -EPERM
+#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */
+
+static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
+			      size_t count, loff_t *offp)
+{
+	int err = 0;
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+
+	if (!vol->updating)
+		return vol_cdev_direct_write(file, buf, count, offp);
+
+	err = ubi_more_update_data(ubi, vol->vol_id, buf, count);
+	if (err < 0) {
+		ubi_err("cannot write %zd bytes of update data", count);
+		return err;
+	}
+
+	if (err) {
+		/*
+		 * Update is finished, @err contains number of actually written
+		 * bytes now.
+		 */
+		count = err;
+
+		err = ubi_check_volume(ubi, vol->vol_id);
+		if (err < 0)
+			return err;
+
+		if (err) {
+			ubi_warn("volume %d on UBI device %d is corrupted",
+				 vol->vol_id, ubi->ubi_num);
+			vol->corrupted = 1;
+		}
+		vol->checked = 1;
+		revoke_exclusive(desc, UBI_READWRITE);
+	}
+
+	*offp += count;
+	return count;
+}
+
+static int vol_cdev_ioctl(struct inode *inode, struct file *file,
+			  unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct ubi_volume_desc *desc = file->private_data;
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	void __user *argp = (void __user *)arg;
+
+	if (_IOC_NR(cmd) > VOL_CDEV_IOC_MAX_SEQ ||
+	    _IOC_TYPE(cmd) != UBI_VOL_IOC_MAGIC)
+		return -ENOTTY;
+
+	if (_IOC_DIR(cmd) && _IOC_READ)
+		err = !access_ok(VERIFY_WRITE, argp, _IOC_SIZE(cmd));
+	else if (_IOC_DIR(cmd) && _IOC_WRITE)
+		err = !access_ok(VERIFY_READ, argp, _IOC_SIZE(cmd));
+	if (err)
+		return -EFAULT;
+
+	switch (cmd) {
+
+	/* Volume update command */
+	case UBI_IOCVOLUP:
+	{
+		int64_t bytes, rsvd_bytes;
+
+		if (!capable(CAP_SYS_RESOURCE)) {
+			err = -EPERM;
+			break;
+		}
+
+		err = copy_from_user(&bytes, argp, sizeof(int64_t));
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (desc->mode == UBI_READONLY) {
+			err = -EROFS;
+			break;
+		}
+
+		rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad);
+		if (bytes < 0 || bytes > rsvd_bytes) {
+			err = -EINVAL;
+			break;
+		}
+
+		err = get_exclusive(desc);
+		if (err < 0)
+			break;
+
+		err = ubi_start_update(ubi, vol->vol_id, bytes);
+		if (bytes == 0)
+			revoke_exclusive(desc, UBI_READWRITE);
+
+		file->f_pos = 0;
+		break;
+	}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO
+	/* Logical eraseblock erasure command */
+	case UBI_IOCEBER:
+	{
+		int32_t lnum;
+
+		err = __get_user(lnum, (__user int32_t *)argp);
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (desc->mode == UBI_READONLY) {
+			err = -EROFS;
+			break;
+		}
+
+		if (lnum < 0 || lnum >= vol->reserved_pebs) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (vol->vol_type != UBI_DYNAMIC_VOLUME) {
+			err = -EROFS;
+			break;
+		}
+
+		dbg_msg("erase LEB %d:%d", vol->vol_id, lnum);
+		err = ubi_eba_unmap_leb(ubi, vol->vol_id, lnum);
+		if (err)
+			break;
+
+		err = ubi_wl_flush(ubi);
+		break;
+	}
+#endif
+
+	default:
+		err = -ENOTTY;
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * verify_mkvol_req - verify volume creation request.
+ * @ubi: UBI device description object
+ * @req: the request to check
+ *
+ * This function zero if the request is correct, and %-EINVAL if not.
+ */
+static int verify_mkvol_req(const struct ubi_device *ubi,
+			    const struct ubi_mkvol_req *req)
+{
+	int n, err = -EINVAL;
+
+	if (req->bytes < 0 || req->alignment < 0 || req->vol_type < 0 ||
+	    req->name_len < 0)
+		goto bad;
+
+	if ((req->vol_id < 0 || req->vol_id >= ubi->vtbl_slots) &&
+	    req->vol_id != UBI_VOL_NUM_AUTO)
+		goto bad;
+
+	if (req->alignment == 0)
+		goto bad;
+
+	if (req->bytes == 0)
+		goto bad;
+
+	if (req->vol_type != UBI_DYNAMIC_VOLUME &&
+	    req->vol_type != UBI_STATIC_VOLUME)
+		goto bad;
+
+	if (req->alignment > ubi->leb_size)
+		goto bad;
+
+	n = req->alignment % ubi->min_io_size;
+	if (req->alignment != 1 && n)
+		goto bad;
+
+	if (req->name_len > UBI_VOL_NAME_MAX) {
+		err = -ENAMETOOLONG;
+		goto bad;
+	}
+
+	return 0;
+
+bad:
+	dbg_err("bad volume creation request");
+	ubi_dbg_dump_mkvol_req(req);
+	return err;
+}
+
+/**
+ * verify_rsvol_req - verify volume re-size request.
+ * @ubi: UBI device description object
+ * @req: the request to check
+ *
+ * This function returns zero if the request is correct, and %-EINVAL if not.
+ */
+static int verify_rsvol_req(const struct ubi_device *ubi,
+			    const struct ubi_rsvol_req *req)
+{
+	if (req->bytes <= 0)
+		return -EINVAL;
+
+	if (req->vol_id < 0 || req->vol_id >= ubi->vtbl_slots)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
+			  unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct ubi_device *ubi;
+	struct ubi_volume_desc *desc;
+	void __user *argp = (void __user *)arg;
+
+	if (_IOC_NR(cmd) > UBI_CDEV_IOC_MAX_SEQ ||
+	    _IOC_TYPE(cmd) != UBI_IOC_MAGIC)
+		return -ENOTTY;
+
+	if (_IOC_DIR(cmd) && _IOC_READ)
+		err = !access_ok(VERIFY_WRITE, argp, _IOC_SIZE(cmd));
+	else if (_IOC_DIR(cmd) && _IOC_WRITE)
+		err = !access_ok(VERIFY_READ, argp, _IOC_SIZE(cmd));
+	if (err)
+		return -EFAULT;
+
+	if (!capable(CAP_SYS_RESOURCE))
+		return -EPERM;
+
+	ubi = major_to_device(imajor(inode));
+	if (IS_ERR(ubi))
+		return PTR_ERR(ubi);
+
+	switch (cmd) {
+	/* Create volume command */
+	case UBI_IOCMKVOL:
+	{
+		struct ubi_mkvol_req req;
+
+		dbg_msg("create volume");
+		err = __copy_from_user(&req, argp,
+				       sizeof(struct ubi_mkvol_req));
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = verify_mkvol_req(ubi, &req);
+		if (err)
+			break;
+
+		req.name[req.name_len] = '\0';
+
+		err = ubi_create_volume(ubi, &req);
+		if (err)
+			break;
+
+		err = __put_user(req.vol_id, (__user int32_t *)argp);
+		if (err)
+			err = -EFAULT;
+
+		break;
+	}
+
+	/* Remove volume command */
+	case UBI_IOCRMVOL:
+	{
+		int vol_id;
+
+		dbg_msg("remove volume");
+		err = __get_user(vol_id, (__user int32_t *)argp);
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE);
+		if (IS_ERR(desc)) {
+			err = PTR_ERR(desc);
+			break;
+		}
+
+		err = ubi_remove_volume(desc);
+		if (err)
+			ubi_close_volume(desc);
+
+		break;
+	}
+
+	/* Re-size volume command */
+	case UBI_IOCRSVOL:
+	{
+		int pebs;
+		uint64_t tmp;
+		struct ubi_rsvol_req req;
+
+		dbg_msg("re-size volume");
+		err = __copy_from_user(&req, argp,
+				       sizeof(struct ubi_rsvol_req));
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = verify_rsvol_req(ubi, &req);
+		if (err)
+			break;
+
+		desc = ubi_open_volume(ubi->ubi_num, req.vol_id, UBI_EXCLUSIVE);
+		if (IS_ERR(desc)) {
+			err = PTR_ERR(desc);
+			break;
+		}
+
+		tmp = req.bytes;
+		pebs = !!do_div(tmp, desc->vol->usable_leb_size);
+		pebs += tmp;
+
+		err = ubi_resize_volume(desc, pebs);
+		ubi_close_volume(desc);
+		break;
+	}
+
+	default:
+		err = -ENOTTY;
+		break;
+	}
+
+	return err;
+}
+
+/* UBI character device operations */
+struct file_operations ubi_cdev_operations = {
+	.owner = THIS_MODULE,
+	.ioctl = ubi_cdev_ioctl,
+	.llseek = no_llseek
+};
+
+/* UBI volume character device operations */
+struct file_operations ubi_vol_cdev_operations = {
+	.owner   = THIS_MODULE,
+	.open    = vol_cdev_open,
+	.release = vol_cdev_release,
+	.llseek  = vol_cdev_llseek,
+	.read    = vol_cdev_read,
+	.write   = vol_cdev_write,
+	.ioctl   = vol_cdev_ioctl
+};
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
new file mode 100644
index 00000000000..86364221faf
--- /dev/null
+++ b/drivers/mtd/ubi/debug.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * Here we keep all the UBI debugging stuff which should normally be disabled
+ * and compiled-out, but it is extremely helpful when hunting bugs or doing big
+ * changes.
+ */
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+
+#include "ubi.h"
+
+/**
+ * ubi_dbg_dump_ec_hdr - dump an erase counter header.
+ * @ec_hdr: the erase counter header to dump
+ */
+void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
+{
+	dbg_msg("erase counter header dump:");
+	dbg_msg("magic          %#08x", ubi32_to_cpu(ec_hdr->magic));
+	dbg_msg("version        %d",    (int)ec_hdr->version);
+	dbg_msg("ec             %llu",  (long long)ubi64_to_cpu(ec_hdr->ec));
+	dbg_msg("vid_hdr_offset %d",    ubi32_to_cpu(ec_hdr->vid_hdr_offset));
+	dbg_msg("data_offset    %d",    ubi32_to_cpu(ec_hdr->data_offset));
+	dbg_msg("hdr_crc        %#08x", ubi32_to_cpu(ec_hdr->hdr_crc));
+	dbg_msg("erase counter header hexdump:");
+	ubi_dbg_hexdump(ec_hdr, UBI_EC_HDR_SIZE);
+}
+
+/**
+ * ubi_dbg_dump_vid_hdr - dump a volume identifier header.
+ * @vid_hdr: the volume identifier header to dump
+ */
+void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
+{
+	dbg_msg("volume identifier header dump:");
+	dbg_msg("magic     %08x", ubi32_to_cpu(vid_hdr->magic));
+	dbg_msg("version   %d",   (int)vid_hdr->version);
+	dbg_msg("vol_type  %d",   (int)vid_hdr->vol_type);
+	dbg_msg("copy_flag %d",   (int)vid_hdr->copy_flag);
+	dbg_msg("compat    %d",   (int)vid_hdr->compat);
+	dbg_msg("vol_id    %d",   ubi32_to_cpu(vid_hdr->vol_id));
+	dbg_msg("lnum      %d",   ubi32_to_cpu(vid_hdr->lnum));
+	dbg_msg("leb_ver   %u",   ubi32_to_cpu(vid_hdr->leb_ver));
+	dbg_msg("data_size %d",   ubi32_to_cpu(vid_hdr->data_size));
+	dbg_msg("used_ebs  %d",   ubi32_to_cpu(vid_hdr->used_ebs));
+	dbg_msg("data_pad  %d",   ubi32_to_cpu(vid_hdr->data_pad));
+	dbg_msg("sqnum     %llu",
+		(unsigned long long)ubi64_to_cpu(vid_hdr->sqnum));
+	dbg_msg("hdr_crc   %08x", ubi32_to_cpu(vid_hdr->hdr_crc));
+	dbg_msg("volume identifier header hexdump:");
+}
+
+/**
+ * ubi_dbg_dump_vol_info- dump volume information.
+ * @vol: UBI volume description object
+ */
+void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
+{
+	dbg_msg("volume information dump:");
+	dbg_msg("vol_id          %d", vol->vol_id);
+	dbg_msg("reserved_pebs   %d", vol->reserved_pebs);
+	dbg_msg("alignment       %d", vol->alignment);
+	dbg_msg("data_pad        %d", vol->data_pad);
+	dbg_msg("vol_type        %d", vol->vol_type);
+	dbg_msg("name_len        %d", vol->name_len);
+	dbg_msg("usable_leb_size %d", vol->usable_leb_size);
+	dbg_msg("used_ebs        %d", vol->used_ebs);
+	dbg_msg("used_bytes      %lld", vol->used_bytes);
+	dbg_msg("last_eb_bytes   %d", vol->last_eb_bytes);
+	dbg_msg("corrupted       %d", vol->corrupted);
+	dbg_msg("upd_marker      %d", vol->upd_marker);
+
+	if (vol->name_len <= UBI_VOL_NAME_MAX &&
+	    strnlen(vol->name, vol->name_len + 1) == vol->name_len) {
+		dbg_msg("name          %s", vol->name);
+	} else {
+		dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c",
+			vol->name[0], vol->name[1], vol->name[2],
+			vol->name[3], vol->name[4]);
+	}
+}
+
+/**
+ * ubi_dbg_dump_vtbl_record - dump a &struct ubi_vtbl_record object.
+ * @r: the object to dump
+ * @idx: volume table index
+ */
+void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx)
+{
+	int name_len = ubi16_to_cpu(r->name_len);
+
+	dbg_msg("volume table record %d dump:", idx);
+	dbg_msg("reserved_pebs   %d", ubi32_to_cpu(r->reserved_pebs));
+	dbg_msg("alignment       %d", ubi32_to_cpu(r->alignment));
+	dbg_msg("data_pad        %d", ubi32_to_cpu(r->data_pad));
+	dbg_msg("vol_type        %d", (int)r->vol_type);
+	dbg_msg("upd_marker      %d", (int)r->upd_marker);
+	dbg_msg("name_len        %d", name_len);
+
+	if (r->name[0] == '\0') {
+		dbg_msg("name          NULL");
+		return;
+	}
+
+	if (name_len <= UBI_VOL_NAME_MAX &&
+	    strnlen(&r->name[0], name_len + 1) == name_len) {
+		dbg_msg("name          %s", &r->name[0]);
+	} else {
+		dbg_msg("1st 5 characters of the name: %c%c%c%c%c",
+			r->name[0], r->name[1], r->name[2], r->name[3],
+			r->name[4]);
+	}
+	dbg_msg("crc             %#08x", ubi32_to_cpu(r->crc));
+}
+
+/**
+ * ubi_dbg_dump_sv - dump a &struct ubi_scan_volume object.
+ * @sv: the object to dump
+ */
+void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv)
+{
+	dbg_msg("volume scanning information dump:");
+	dbg_msg("vol_id         %d", sv->vol_id);
+	dbg_msg("highest_lnum   %d", sv->highest_lnum);
+	dbg_msg("leb_count      %d", sv->leb_count);
+	dbg_msg("compat         %d", sv->compat);
+	dbg_msg("vol_type       %d", sv->vol_type);
+	dbg_msg("used_ebs       %d", sv->used_ebs);
+	dbg_msg("last_data_size %d", sv->last_data_size);
+	dbg_msg("data_pad       %d", sv->data_pad);
+}
+
+/**
+ * ubi_dbg_dump_seb - dump a &struct ubi_scan_leb object.
+ * @seb: the object to dump
+ * @type: object type: 0 - not corrupted, 1 - corrupted
+ */
+void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type)
+{
+	dbg_msg("eraseblock scanning information dump:");
+	dbg_msg("ec       %d", seb->ec);
+	dbg_msg("pnum     %d", seb->pnum);
+	if (type == 0) {
+		dbg_msg("lnum     %d", seb->lnum);
+		dbg_msg("scrub    %d", seb->scrub);
+		dbg_msg("sqnum    %llu", seb->sqnum);
+		dbg_msg("leb_ver  %u", seb->leb_ver);
+	}
+}
+
+/**
+ * ubi_dbg_dump_mkvol_req - dump a &struct ubi_mkvol_req object.
+ * @req: the object to dump
+ */
+void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req)
+{
+	char nm[17];
+
+	dbg_msg("volume creation request dump:");
+	dbg_msg("vol_id    %d",   req->vol_id);
+	dbg_msg("alignment %d",   req->alignment);
+	dbg_msg("bytes     %lld", (long long)req->bytes);
+	dbg_msg("vol_type  %d",   req->vol_type);
+	dbg_msg("name_len  %d",   req->name_len);
+
+	memcpy(nm, req->name, 16);
+	nm[16] = 0;
+	dbg_msg("the 1st 16 characters of the name: %s", nm);
+}
+
+#define BYTES_PER_LINE 32
+
+/**
+ * ubi_dbg_hexdump - dump a buffer.
+ * @ptr: the buffer to dump
+ * @size: buffer size which must be multiple of 4 bytes
+ */
+void ubi_dbg_hexdump(const void *ptr, int size)
+{
+	int i, k = 0, rows, columns;
+	const uint8_t *p = ptr;
+
+	size = ALIGN(size, 4);
+	rows = size/BYTES_PER_LINE + size % BYTES_PER_LINE;
+	for (i = 0; i < rows; i++) {
+		int j;
+
+		cond_resched();
+		columns = min(size - k, BYTES_PER_LINE) / 4;
+		if (columns == 0)
+			break;
+		printk(KERN_DEBUG "%5d:  ", i * BYTES_PER_LINE);
+		for (j = 0; j < columns; j++) {
+			int n, N;
+
+			N = size - k > 4 ? 4 : size - k;
+			for (n = 0; n < N; n++)
+				printk("%02x", p[k++]);
+			printk(" ");
+		}
+		printk("\n");
+	}
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
new file mode 100644
index 00000000000..f816ad9a36c
--- /dev/null
+++ b/drivers/mtd/ubi/debug.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+#ifndef __UBI_DEBUG_H__
+#define __UBI_DEBUG_H__
+
+#ifdef CONFIG_MTD_UBI_DEBUG
+#include <linux/random.h>
+
+#define ubi_assert(expr)  BUG_ON(!(expr))
+#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
+#else
+#define ubi_assert(expr)  ({})
+#define dbg_err(fmt, ...) ({})
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
+#define DBG_DISABLE_BGT 1
+#else
+#define DBG_DISABLE_BGT 0
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG
+/* Generic debugging message */
+#define dbg_msg(fmt, ...) \
+	printk(KERN_DEBUG "UBI DBG: %s: " fmt "\n", __FUNCTION__, ##__VA_ARGS__)
+
+#define ubi_dbg_dump_stack() dump_stack()
+
+struct ubi_ec_hdr;
+struct ubi_vid_hdr;
+struct ubi_volume;
+struct ubi_vtbl_record;
+struct ubi_scan_volume;
+struct ubi_scan_leb;
+struct ubi_mkvol_req;
+
+void ubi_dbg_print(int type, const char *func, const char *fmt, ...);
+void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr);
+void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr);
+void ubi_dbg_dump_vol_info(const struct ubi_volume *vol);
+void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx);
+void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv);
+void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type);
+void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
+void ubi_dbg_hexdump(const void *buf, int size);
+
+#else
+
+#define dbg_msg(fmt, ...)    ({})
+#define ubi_dbg_dump_stack() ({})
+#define ubi_dbg_print(func, fmt, ...)    ({})
+#define ubi_dbg_dump_ec_hdr(ec_hdr)      ({})
+#define ubi_dbg_dump_vid_hdr(vid_hdr)    ({})
+#define ubi_dbg_dump_vol_info(vol)       ({})
+#define ubi_dbg_dump_vtbl_record(r, idx) ({})
+#define ubi_dbg_dump_sv(sv)              ({})
+#define ubi_dbg_dump_seb(seb, type)      ({})
+#define ubi_dbg_dump_mkvol_req(req)      ({})
+#define ubi_dbg_hexdump(buf, size)       ({})
+
+#endif /* CONFIG_MTD_UBI_DEBUG_MSG */
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
+/* Messages from the eraseblock association unit */
+#define dbg_eba(fmt, ...) \
+	printk(KERN_DEBUG "UBI DBG eba: %s: " fmt "\n", __FUNCTION__, \
+	       ##__VA_ARGS__)
+#else
+#define dbg_eba(fmt, ...) ({})
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
+/* Messages from the wear-leveling unit */
+#define dbg_wl(fmt, ...) \
+	printk(KERN_DEBUG "UBI DBG wl: %s: " fmt "\n", __FUNCTION__, \
+	       ##__VA_ARGS__)
+#else
+#define dbg_wl(fmt, ...) ({})
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
+/* Messages from the input/output unit */
+#define dbg_io(fmt, ...) \
+	printk(KERN_DEBUG "UBI DBG io: %s: " fmt "\n", __FUNCTION__, \
+	       ##__VA_ARGS__)
+#else
+#define dbg_io(fmt, ...) ({})
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD
+/* Initialization and build messages */
+#define dbg_bld(fmt, ...) \
+	printk(KERN_DEBUG "UBI DBG bld: %s: " fmt "\n", __FUNCTION__, \
+	       ##__VA_ARGS__)
+#else
+#define dbg_bld(fmt, ...) ({})
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS
+/**
+ * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip.
+ *
+ * Returns non-zero if a bit-flip should be emulated, otherwise returns zero.
+ */
+static inline int ubi_dbg_is_bitflip(void)
+{
+	return !(random32() % 200);
+}
+#else
+#define ubi_dbg_is_bitflip() 0
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES
+/**
+ * ubi_dbg_is_write_failure - if it is time to emulate a write failure.
+ *
+ * Returns non-zero if a write failure should be emulated, otherwise returns
+ * zero.
+ */
+static inline int ubi_dbg_is_write_failure(void)
+{
+	return !(random32() % 500);
+}
+#else
+#define ubi_dbg_is_write_failure() 0
+#endif
+
+#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES
+/**
+ * ubi_dbg_is_erase_failure - if its time to emulate an erase failure.
+ *
+ * Returns non-zero if an erase failure should be emulated, otherwise returns
+ * zero.
+ */
+static inline int ubi_dbg_is_erase_failure(void)
+{
+		return !(random32() % 400);
+}
+#else
+#define ubi_dbg_is_erase_failure() 0
+#endif
+
+#endif /* !__UBI_DEBUG_H__ */
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
new file mode 100644
index 00000000000..d847ee1da3d
--- /dev/null
+++ b/drivers/mtd/ubi/eba.c
@@ -0,0 +1,1241 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * The UBI Eraseblock Association (EBA) unit.
+ *
+ * This unit is responsible for I/O to/from logical eraseblock.
+ *
+ * Although in this implementation the EBA table is fully kept and managed in
+ * RAM, which assumes poor scalability, it might be (partially) maintained on
+ * flash in future implementations.
+ *
+ * The EBA unit implements per-logical eraseblock locking. Before accessing a
+ * logical eraseblock it is locked for reading or writing. The per-logical
+ * eraseblock locking is implemented by means of the lock tree. The lock tree
+ * is an RB-tree which refers all the currently locked logical eraseblocks. The
+ * lock tree elements are &struct ltree_entry objects. They are indexed by
+ * (@vol_id, @lnum) pairs.
+ *
+ * EBA also maintains the global sequence counter which is incremented each
+ * time a logical eraseblock is mapped to a physical eraseblock and it is
+ * stored in the volume identifier header. This means that each VID header has
+ * a unique sequence number. The sequence number is only increased an we assume
+ * 64 bits is enough to never overflow.
+ */
+
+#include <linux/slab.h>
+#include <linux/crc32.h>
+#include <linux/err.h>
+#include "ubi.h"
+
+/**
+ * struct ltree_entry - an entry in the lock tree.
+ * @rb: links RB-tree nodes
+ * @vol_id: volume ID of the locked logical eraseblock
+ * @lnum: locked logical eraseblock number
+ * @users: how many tasks are using this logical eraseblock or wait for it
+ * @mutex: read/write mutex to implement read/write access serialization to
+ * the (@vol_id, @lnum) logical eraseblock
+ *
+ * When a logical eraseblock is being locked - corresponding &struct ltree_entry
+ * object is inserted to the lock tree (@ubi->ltree).
+ */
+struct ltree_entry {
+	struct rb_node rb;
+	int vol_id;
+	int lnum;
+	int users;
+	struct rw_semaphore mutex;
+};
+
+/* Slab cache for lock-tree entries */
+static struct kmem_cache *ltree_slab;
+
+/**
+ * next_sqnum - get next sequence number.
+ * @ubi: UBI device description object
+ *
+ * This function returns next sequence number to use, which is just the current
+ * global sequence counter value. It also increases the global sequence
+ * counter.
+ */
+static unsigned long long next_sqnum(struct ubi_device *ubi)
+{
+	unsigned long long sqnum;
+
+	spin_lock(&ubi->ltree_lock);
+	sqnum = ubi->global_sqnum++;
+	spin_unlock(&ubi->ltree_lock);
+
+	return sqnum;
+}
+
+/**
+ * ubi_get_compat - get compatibility flags of a volume.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ *
+ * This function returns compatibility flags for an internal volume. User
+ * volumes have no compatibility flags, so %0 is returned.
+ */
+static int ubi_get_compat(const struct ubi_device *ubi, int vol_id)
+{
+	if (vol_id == UBI_LAYOUT_VOL_ID)
+		return UBI_LAYOUT_VOLUME_COMPAT;
+	return 0;
+}
+
+/**
+ * ltree_lookup - look up the lock tree.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ *
+ * This function returns a pointer to the corresponding &struct ltree_entry
+ * object if the logical eraseblock is locked and %NULL if it is not.
+ * @ubi->ltree_lock has to be locked.
+ */
+static struct ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id,
+					int lnum)
+{
+	struct rb_node *p;
+
+	p = ubi->ltree.rb_node;
+	while (p) {
+		struct ltree_entry *le;
+
+		le = rb_entry(p, struct ltree_entry, rb);
+
+		if (vol_id < le->vol_id)
+			p = p->rb_left;
+		else if (vol_id > le->vol_id)
+			p = p->rb_right;
+		else {
+			if (lnum < le->lnum)
+				p = p->rb_left;
+			else if (lnum > le->lnum)
+				p = p->rb_right;
+			else
+				return le;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * ltree_add_entry - add new entry to the lock tree.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ *
+ * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the
+ * lock tree. If such entry is already there, its usage counter is increased.
+ * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation
+ * failed.
+ */
+static struct ltree_entry *ltree_add_entry(struct ubi_device *ubi, int vol_id,
+					   int lnum)
+{
+	struct ltree_entry *le, *le1, *le_free;
+
+	le = kmem_cache_alloc(ltree_slab, GFP_KERNEL);
+	if (!le)
+		return ERR_PTR(-ENOMEM);
+
+	le->vol_id = vol_id;
+	le->lnum = lnum;
+
+	spin_lock(&ubi->ltree_lock);
+	le1 = ltree_lookup(ubi, vol_id, lnum);
+
+	if (le1) {
+		/*
+		 * This logical eraseblock is already locked. The newly
+		 * allocated lock entry is not needed.
+		 */
+		le_free = le;
+		le = le1;
+	} else {
+		struct rb_node **p, *parent = NULL;
+
+		/*
+		 * No lock entry, add the newly allocated one to the
+		 * @ubi->ltree RB-tree.
+		 */
+		le_free = NULL;
+
+		p = &ubi->ltree.rb_node;
+		while (*p) {
+			parent = *p;
+			le1 = rb_entry(parent, struct ltree_entry, rb);
+
+			if (vol_id < le1->vol_id)
+				p = &(*p)->rb_left;
+			else if (vol_id > le1->vol_id)
+				p = &(*p)->rb_right;
+			else {
+				ubi_assert(lnum != le1->lnum);
+				if (lnum < le1->lnum)
+					p = &(*p)->rb_left;
+				else
+					p = &(*p)->rb_right;
+			}
+		}
+
+		rb_link_node(&le->rb, parent, p);
+		rb_insert_color(&le->rb, &ubi->ltree);
+	}
+	le->users += 1;
+	spin_unlock(&ubi->ltree_lock);
+
+	if (le_free)
+		kmem_cache_free(ltree_slab, le_free);
+
+	return le;
+}
+
+/**
+ * leb_read_lock - lock logical eraseblock for reading.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ *
+ * This function locks a logical eraseblock for reading. Returns zero in case
+ * of success and a negative error code in case of failure.
+ */
+static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum)
+{
+	struct ltree_entry *le;
+
+	le = ltree_add_entry(ubi, vol_id, lnum);
+	if (IS_ERR(le))
+		return PTR_ERR(le);
+	down_read(&le->mutex);
+	return 0;
+}
+
+/**
+ * leb_read_unlock - unlock logical eraseblock.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ */
+static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum)
+{
+	int free = 0;
+	struct ltree_entry *le;
+
+	spin_lock(&ubi->ltree_lock);
+	le = ltree_lookup(ubi, vol_id, lnum);
+	le->users -= 1;
+	ubi_assert(le->users >= 0);
+	if (le->users == 0) {
+		rb_erase(&le->rb, &ubi->ltree);
+		free = 1;
+	}
+	spin_unlock(&ubi->ltree_lock);
+
+	up_read(&le->mutex);
+	if (free)
+		kmem_cache_free(ltree_slab, le);
+}
+
+/**
+ * leb_write_lock - lock logical eraseblock for writing.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ *
+ * This function locks a logical eraseblock for writing. Returns zero in case
+ * of success and a negative error code in case of failure.
+ */
+static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum)
+{
+	struct ltree_entry *le;
+
+	le = ltree_add_entry(ubi, vol_id, lnum);
+	if (IS_ERR(le))
+		return PTR_ERR(le);
+	down_write(&le->mutex);
+	return 0;
+}
+
+/**
+ * leb_write_unlock - unlock logical eraseblock.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ */
+static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum)
+{
+	int free;
+	struct ltree_entry *le;
+
+	spin_lock(&ubi->ltree_lock);
+	le = ltree_lookup(ubi, vol_id, lnum);
+	le->users -= 1;
+	ubi_assert(le->users >= 0);
+	if (le->users == 0) {
+		rb_erase(&le->rb, &ubi->ltree);
+		free = 1;
+	} else
+		free = 0;
+	spin_unlock(&ubi->ltree_lock);
+
+	up_write(&le->mutex);
+	if (free)
+		kmem_cache_free(ltree_slab, le);
+}
+
+/**
+ * ubi_eba_unmap_leb - un-map logical eraseblock.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ *
+ * This function un-maps logical eraseblock @lnum and schedules corresponding
+ * physical eraseblock for erasure. Returns zero in case of success and a
+ * negative error code in case of failure.
+ */
+int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum)
+{
+	int idx = vol_id2idx(ubi, vol_id), err, pnum;
+	struct ubi_volume *vol = ubi->volumes[idx];
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	err = leb_write_lock(ubi, vol_id, lnum);
+	if (err)
+		return err;
+
+	pnum = vol->eba_tbl[lnum];
+	if (pnum < 0)
+		/* This logical eraseblock is already unmapped */
+		goto out_unlock;
+
+	dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum);
+
+	vol->eba_tbl[lnum] = UBI_LEB_UNMAPPED;
+	err = ubi_wl_put_peb(ubi, pnum, 0);
+
+out_unlock:
+	leb_write_unlock(ubi, vol_id, lnum);
+	return err;
+}
+
+/**
+ * ubi_eba_read_leb - read data.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: buffer to store the read data
+ * @offset: offset from where to read
+ * @len: how many bytes to read
+ * @check: data CRC check flag
+ *
+ * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF
+ * bytes. The @check flag only makes sense for static volumes and forces
+ * eraseblock data CRC checking.
+ *
+ * In case of success this function returns zero. In case of a static volume,
+ * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be
+ * returned for any volume type if an ECC error was detected by the MTD device
+ * driver. Other negative error cored may be returned in case of other errors.
+ */
+int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf,
+		     int offset, int len, int check)
+{
+	int err, pnum, scrub = 0, idx = vol_id2idx(ubi, vol_id);
+	struct ubi_vid_hdr *vid_hdr;
+	struct ubi_volume *vol = ubi->volumes[idx];
+	uint32_t crc, crc1;
+
+	err = leb_read_lock(ubi, vol_id, lnum);
+	if (err)
+		return err;
+
+	pnum = vol->eba_tbl[lnum];
+	if (pnum < 0) {
+		/*
+		 * The logical eraseblock is not mapped, fill the whole buffer
+		 * with 0xFF bytes. The exception is static volumes for which
+		 * it is an error to read unmapped logical eraseblocks.
+		 */
+		dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)",
+			len, offset, vol_id, lnum);
+		leb_read_unlock(ubi, vol_id, lnum);
+		ubi_assert(vol->vol_type != UBI_STATIC_VOLUME);
+		memset(buf, 0xFF, len);
+		return 0;
+	}
+
+	dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d",
+		len, offset, vol_id, lnum, pnum);
+
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME)
+		check = 0;
+
+retry:
+	if (check) {
+		vid_hdr = ubi_zalloc_vid_hdr(ubi);
+		if (!vid_hdr) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1);
+		if (err && err != UBI_IO_BITFLIPS) {
+			if (err > 0) {
+				/*
+				 * The header is either absent or corrupted.
+				 * The former case means there is a bug -
+				 * switch to read-only mode just in case.
+				 * The latter case means a real corruption - we
+				 * may try to recover data. FIXME: but this is
+				 * not implemented.
+				 */
+				if (err == UBI_IO_BAD_VID_HDR) {
+					ubi_warn("bad VID header at PEB %d, LEB"
+						 "%d:%d", pnum, vol_id, lnum);
+					err = -EBADMSG;
+				} else
+					ubi_ro_mode(ubi);
+			}
+			goto out_free;
+		} else if (err == UBI_IO_BITFLIPS)
+			scrub = 1;
+
+		ubi_assert(lnum < ubi32_to_cpu(vid_hdr->used_ebs));
+		ubi_assert(len == ubi32_to_cpu(vid_hdr->data_size));
+
+		crc = ubi32_to_cpu(vid_hdr->data_crc);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+	}
+
+	err = ubi_io_read_data(ubi, buf, pnum, offset, len);
+	if (err) {
+		if (err == UBI_IO_BITFLIPS) {
+			scrub = 1;
+			err = 0;
+		} else if (err == -EBADMSG) {
+			if (vol->vol_type == UBI_DYNAMIC_VOLUME)
+				goto out_unlock;
+			scrub = 1;
+			if (!check) {
+				ubi_msg("force data checking");
+				check = 1;
+				goto retry;
+			}
+		} else
+			goto out_unlock;
+	}
+
+	if (check) {
+		crc1 = crc32(UBI_CRC32_INIT, buf, len);
+		if (crc1 != crc) {
+			ubi_warn("CRC error: calculated %#08x, must be %#08x",
+				 crc1, crc);
+			err = -EBADMSG;
+			goto out_unlock;
+		}
+	}
+
+	if (scrub)
+		err = ubi_wl_scrub_peb(ubi, pnum);
+
+	leb_read_unlock(ubi, vol_id, lnum);
+	return err;
+
+out_free:
+	ubi_free_vid_hdr(ubi, vid_hdr);
+out_unlock:
+	leb_read_unlock(ubi, vol_id, lnum);
+	return err;
+}
+
+/**
+ * recover_peb - recover from write failure.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock to recover
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: data which was not written because of the write failure
+ * @offset: offset of the failed write
+ * @len: how many bytes should have been written
+ *
+ * This function is called in case of a write failure and moves all good data
+ * from the potentially bad physical eraseblock to a good physical eraseblock.
+ * This function also writes the data which was not written due to the failure.
+ * Returns new physical eraseblock number in case of success, and a negative
+ * error code in case of failure.
+ */
+static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
+		       const void *buf, int offset, int len)
+{
+	int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0;
+	struct ubi_volume *vol = ubi->volumes[idx];
+	struct ubi_vid_hdr *vid_hdr;
+	unsigned char *new_buf;
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr) {
+		return -ENOMEM;
+	}
+
+retry:
+	new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN);
+	if (new_pnum < 0) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return new_pnum;
+	}
+
+	ubi_msg("recover PEB %d, move data to PEB %d", pnum, new_pnum);
+
+	err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1);
+	if (err && err != UBI_IO_BITFLIPS) {
+		if (err > 0)
+			err = -EIO;
+		goto out_put;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
+	if (err)
+		goto write_error;
+
+	data_size = offset + len;
+	new_buf = kmalloc(data_size, GFP_KERNEL);
+	if (!new_buf) {
+		err = -ENOMEM;
+		goto out_put;
+	}
+	memset(new_buf + offset, 0xFF, len);
+
+	/* Read everything before the area where the write failure happened */
+	if (offset > 0) {
+		err = ubi_io_read_data(ubi, new_buf, pnum, 0, offset);
+		if (err && err != UBI_IO_BITFLIPS) {
+			kfree(new_buf);
+			goto out_put;
+		}
+	}
+
+	memcpy(new_buf + offset, buf, len);
+
+	err = ubi_io_write_data(ubi, new_buf, new_pnum, 0, data_size);
+	if (err) {
+		kfree(new_buf);
+		goto write_error;
+	}
+
+	kfree(new_buf);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+
+	vol->eba_tbl[lnum] = new_pnum;
+	ubi_wl_put_peb(ubi, pnum, 1);
+
+	ubi_msg("data was successfully recovered");
+	return 0;
+
+out_put:
+	ubi_wl_put_peb(ubi, new_pnum, 1);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return err;
+
+write_error:
+	/*
+	 * Bad luck? This physical eraseblock is bad too? Crud. Let's try to
+	 * get another one.
+	 */
+	ubi_warn("failed to write to PEB %d", new_pnum);
+	ubi_wl_put_peb(ubi, new_pnum, 1);
+	if (++tries > UBI_IO_RETRIES) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+	ubi_msg("try again");
+	goto retry;
+}
+
+/**
+ * ubi_eba_write_leb - write data to dynamic volume.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: the data to write
+ * @offset: offset within the logical eraseblock where to write
+ * @len: how many bytes to write
+ * @dtype: data type
+ *
+ * This function writes data to logical eraseblock @lnum of a dynamic volume
+ * @vol_id. Returns zero in case of success and a negative error code in case
+ * of failure. In case of error, it is possible that something was still
+ * written to the flash media, but may be some garbage.
+ */
+int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum,
+		      const void *buf, int offset, int len, int dtype)
+{
+	int idx = vol_id2idx(ubi, vol_id), err, pnum, tries = 0;
+	struct ubi_volume *vol = ubi->volumes[idx];
+	struct ubi_vid_hdr *vid_hdr;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	err = leb_write_lock(ubi, vol_id, lnum);
+	if (err)
+		return err;
+
+	pnum = vol->eba_tbl[lnum];
+	if (pnum >= 0) {
+		dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d",
+			len, offset, vol_id, lnum, pnum);
+
+		err = ubi_io_write_data(ubi, buf, pnum, offset, len);
+		if (err) {
+			ubi_warn("failed to write data to PEB %d", pnum);
+			if (err == -EIO && ubi->bad_allowed)
+				err = recover_peb(ubi, pnum, vol_id, lnum, buf, offset, len);
+			if (err)
+				ubi_ro_mode(ubi);
+		}
+		leb_write_unlock(ubi, vol_id, lnum);
+		return err;
+	}
+
+	/*
+	 * The logical eraseblock is not mapped. We have to get a free physical
+	 * eraseblock and write the volume identifier header there first.
+	 */
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr) {
+		leb_write_unlock(ubi, vol_id, lnum);
+		return -ENOMEM;
+	}
+
+	vid_hdr->vol_type = UBI_VID_DYNAMIC;
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
+	vid_hdr->lnum = cpu_to_ubi32(lnum);
+	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
+	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+
+retry:
+	pnum = ubi_wl_get_peb(ubi, dtype);
+	if (pnum < 0) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		leb_write_unlock(ubi, vol_id, lnum);
+		return pnum;
+	}
+
+	dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d",
+		len, offset, vol_id, lnum, pnum);
+
+	err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
+	if (err) {
+		ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
+			 vol_id, lnum, pnum);
+		goto write_error;
+	}
+
+	err = ubi_io_write_data(ubi, buf, pnum, offset, len);
+	if (err) {
+		ubi_warn("failed to write %d bytes at offset %d of LEB %d:%d, "
+			 "PEB %d", len, offset, vol_id, lnum, pnum);
+		goto write_error;
+	}
+
+	vol->eba_tbl[lnum] = pnum;
+
+	leb_write_unlock(ubi, vol_id, lnum);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return 0;
+
+write_error:
+	if (err != -EIO || !ubi->bad_allowed) {
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	/*
+	 * Fortunately, this is the first write operation to this physical
+	 * eraseblock, so just put it and request a new one. We assume that if
+	 * this physical eraseblock went bad, the erase code will handle that.
+	 */
+	err = ubi_wl_put_peb(ubi, pnum, 1);
+	if (err || ++tries > UBI_IO_RETRIES) {
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	ubi_msg("try another PEB");
+	goto retry;
+}
+
+/**
+ * ubi_eba_write_leb_st - write data to static volume.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: data to write
+ * @len: how many bytes to write
+ * @dtype: data type
+ * @used_ebs: how many logical eraseblocks will this volume contain
+ *
+ * This function writes data to logical eraseblock @lnum of static volume
+ * @vol_id. The @used_ebs argument should contain total number of logical
+ * eraseblock in this static volume.
+ *
+ * When writing to the last logical eraseblock, the @len argument doesn't have
+ * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent
+ * to the real data size, although the @buf buffer has to contain the
+ * alignment. In all other cases, @len has to be aligned.
+ *
+ * It is prohibited to write more then once to logical eraseblocks of static
+ * volumes. This function returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum,
+			 const void *buf, int len, int dtype, int used_ebs)
+{
+	int err, pnum, tries = 0, data_size = len;
+	int idx = vol_id2idx(ubi, vol_id);
+	struct ubi_volume *vol = ubi->volumes[idx];
+	struct ubi_vid_hdr *vid_hdr;
+	uint32_t crc;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	if (lnum == used_ebs - 1)
+		/* If this is the last LEB @len may be unaligned */
+		len = ALIGN(data_size, ubi->min_io_size);
+	else
+		ubi_assert(len % ubi->min_io_size == 0);
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr)
+		return -ENOMEM;
+
+	err = leb_write_lock(ubi, vol_id, lnum);
+	if (err) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
+	vid_hdr->lnum = cpu_to_ubi32(lnum);
+	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
+	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+
+	crc = crc32(UBI_CRC32_INIT, buf, data_size);
+	vid_hdr->vol_type = UBI_VID_STATIC;
+	vid_hdr->data_size = cpu_to_ubi32(data_size);
+	vid_hdr->used_ebs = cpu_to_ubi32(used_ebs);
+	vid_hdr->data_crc = cpu_to_ubi32(crc);
+
+retry:
+	pnum = ubi_wl_get_peb(ubi, dtype);
+	if (pnum < 0) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		leb_write_unlock(ubi, vol_id, lnum);
+		return pnum;
+	}
+
+	dbg_eba("write VID hdr and %d bytes at LEB %d:%d, PEB %d, used_ebs %d",
+		len, vol_id, lnum, pnum, used_ebs);
+
+	err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
+	if (err) {
+		ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
+			 vol_id, lnum, pnum);
+		goto write_error;
+	}
+
+	err = ubi_io_write_data(ubi, buf, pnum, 0, len);
+	if (err) {
+		ubi_warn("failed to write %d bytes of data to PEB %d",
+			 len, pnum);
+		goto write_error;
+	}
+
+	ubi_assert(vol->eba_tbl[lnum] < 0);
+	vol->eba_tbl[lnum] = pnum;
+
+	leb_write_unlock(ubi, vol_id, lnum);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return 0;
+
+write_error:
+	if (err != -EIO || !ubi->bad_allowed) {
+		/*
+		 * This flash device does not admit of bad eraseblocks or
+		 * something nasty and unexpected happened. Switch to read-only
+		 * mode just in case.
+		 */
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	err = ubi_wl_put_peb(ubi, pnum, 1);
+	if (err || ++tries > UBI_IO_RETRIES) {
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	ubi_msg("try another PEB");
+	goto retry;
+}
+
+/*
+ * ubi_eba_atomic_leb_change - change logical eraseblock atomically.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: data to write
+ * @len: how many bytes to write
+ * @dtype: data type
+ *
+ * This function changes the contents of a logical eraseblock atomically. @buf
+ * has to contain new logical eraseblock data, and @len - the length of the
+ * data, which has to be aligned. This function guarantees that in case of an
+ * unclean reboot the old contents is preserved. Returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum,
+			      const void *buf, int len, int dtype)
+{
+	int err, pnum, tries = 0, idx = vol_id2idx(ubi, vol_id);
+	struct ubi_volume *vol = ubi->volumes[idx];
+	struct ubi_vid_hdr *vid_hdr;
+	uint32_t crc;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr)
+		return -ENOMEM;
+
+	err = leb_write_lock(ubi, vol_id, lnum);
+	if (err) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
+	vid_hdr->lnum = cpu_to_ubi32(lnum);
+	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
+	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+
+	crc = crc32(UBI_CRC32_INIT, buf, len);
+	vid_hdr->vol_type = UBI_VID_STATIC;
+	vid_hdr->data_size = cpu_to_ubi32(len);
+	vid_hdr->copy_flag = 1;
+	vid_hdr->data_crc = cpu_to_ubi32(crc);
+
+retry:
+	pnum = ubi_wl_get_peb(ubi, dtype);
+	if (pnum < 0) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		leb_write_unlock(ubi, vol_id, lnum);
+		return pnum;
+	}
+
+	dbg_eba("change LEB %d:%d, PEB %d, write VID hdr to PEB %d",
+		vol_id, lnum, vol->eba_tbl[lnum], pnum);
+
+	err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr);
+	if (err) {
+		ubi_warn("failed to write VID header to LEB %d:%d, PEB %d",
+			 vol_id, lnum, pnum);
+		goto write_error;
+	}
+
+	err = ubi_io_write_data(ubi, buf, pnum, 0, len);
+	if (err) {
+		ubi_warn("failed to write %d bytes of data to PEB %d",
+			 len, pnum);
+		goto write_error;
+	}
+
+	err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
+	if (err) {
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		leb_write_unlock(ubi, vol_id, lnum);
+		return err;
+	}
+
+	vol->eba_tbl[lnum] = pnum;
+	leb_write_unlock(ubi, vol_id, lnum);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return 0;
+
+write_error:
+	if (err != -EIO || !ubi->bad_allowed) {
+		/*
+		 * This flash device does not admit of bad eraseblocks or
+		 * something nasty and unexpected happened. Switch to read-only
+		 * mode just in case.
+		 */
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	err = ubi_wl_put_peb(ubi, pnum, 1);
+	if (err || ++tries > UBI_IO_RETRIES) {
+		ubi_ro_mode(ubi);
+		leb_write_unlock(ubi, vol_id, lnum);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return err;
+	}
+
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	ubi_msg("try another PEB");
+	goto retry;
+}
+
+/**
+ * ltree_entry_ctor - lock tree entries slab cache constructor.
+ * @obj: the lock-tree entry to construct
+ * @cache: the lock tree entry slab cache
+ * @flags: constructor flags
+ */
+static void ltree_entry_ctor(void *obj, struct kmem_cache *cache,
+			     unsigned long flags)
+{
+	struct ltree_entry *le = obj;
+
+	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) !=
+	    SLAB_CTOR_CONSTRUCTOR)
+		return;
+
+	le->users = 0;
+	init_rwsem(&le->mutex);
+}
+
+/**
+ * ubi_eba_copy_leb - copy logical eraseblock.
+ * @ubi: UBI device description object
+ * @from: physical eraseblock number from where to copy
+ * @to: physical eraseblock number where to copy
+ * @vid_hdr: VID header of the @from physical eraseblock
+ *
+ * This function copies logical eraseblock from physical eraseblock @from to
+ * physical eraseblock @to. The @vid_hdr buffer may be changed by this
+ * function. Returns zero in case of success, %UBI_IO_BITFLIPS if the operation
+ * was canceled because bit-flips were detected at the target PEB, and a
+ * negative error code in case of failure.
+ */
+int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
+		     struct ubi_vid_hdr *vid_hdr)
+{
+	int err, vol_id, lnum, data_size, aldata_size, pnum, idx;
+	struct ubi_volume *vol;
+	uint32_t crc;
+	void *buf, *buf1 = NULL;
+
+	vol_id = ubi32_to_cpu(vid_hdr->vol_id);
+	lnum = ubi32_to_cpu(vid_hdr->lnum);
+
+	dbg_eba("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to);
+
+	if (vid_hdr->vol_type == UBI_VID_STATIC) {
+		data_size = ubi32_to_cpu(vid_hdr->data_size);
+		aldata_size = ALIGN(data_size, ubi->min_io_size);
+	} else
+		data_size = aldata_size =
+			    ubi->leb_size - ubi32_to_cpu(vid_hdr->data_pad);
+
+	buf = kmalloc(aldata_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/*
+	 * We do not want anybody to write to this logical eraseblock while we
+	 * are moving it, so we lock it.
+	 */
+	err = leb_write_lock(ubi, vol_id, lnum);
+	if (err) {
+		kfree(buf);
+		return err;
+	}
+
+	/*
+	 * But the logical eraseblock might have been put by this time.
+	 * Cancel if it is true.
+	 */
+	idx = vol_id2idx(ubi, vol_id);
+
+	/*
+	 * We may race with volume deletion/re-size, so we have to hold
+	 * @ubi->volumes_lock.
+	 */
+	spin_lock(&ubi->volumes_lock);
+	vol = ubi->volumes[idx];
+	if (!vol) {
+		dbg_eba("volume %d was removed meanwhile", vol_id);
+		spin_unlock(&ubi->volumes_lock);
+		goto out_unlock;
+	}
+
+	pnum = vol->eba_tbl[lnum];
+	if (pnum != from) {
+		dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to "
+			"PEB %d, cancel", vol_id, lnum, from, pnum);
+		spin_unlock(&ubi->volumes_lock);
+		goto out_unlock;
+	}
+	spin_unlock(&ubi->volumes_lock);
+
+	/* OK, now the LEB is locked and we can safely start moving it */
+
+	dbg_eba("read %d bytes of data", aldata_size);
+	err = ubi_io_read_data(ubi, buf, from, 0, aldata_size);
+	if (err && err != UBI_IO_BITFLIPS) {
+		ubi_warn("error %d while reading data from PEB %d",
+			 err, from);
+		goto out_unlock;
+	}
+
+	/*
+	 * Now we have got to calculate how much data we have to to copy. In
+	 * case of a static volume it is fairly easy - the VID header contains
+	 * the data size. In case of a dynamic volume it is more difficult - we
+	 * have to read the contents, cut 0xFF bytes from the end and copy only
+	 * the first part. We must do this to avoid writing 0xFF bytes as it
+	 * may have some side-effects. And not only this. It is important not
+	 * to include those 0xFFs to CRC because later the they may be filled
+	 * by data.
+	 */
+	if (vid_hdr->vol_type == UBI_VID_DYNAMIC)
+		aldata_size = data_size =
+				ubi_calc_data_len(ubi, buf, data_size);
+
+	cond_resched();
+	crc = crc32(UBI_CRC32_INIT, buf, data_size);
+	cond_resched();
+
+	/*
+	 * It may turn out to me that the whole @from physical eraseblock
+	 * contains only 0xFF bytes. Then we have to only write the VID header
+	 * and do not write any data. This also means we should not set
+	 * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc.
+	 */
+	if (data_size > 0) {
+		vid_hdr->copy_flag = 1;
+		vid_hdr->data_size = cpu_to_ubi32(data_size);
+		vid_hdr->data_crc = cpu_to_ubi32(crc);
+	}
+	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+
+	err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
+	if (err)
+		goto out_unlock;
+
+	cond_resched();
+
+	/* Read the VID header back and check if it was written correctly */
+	err = ubi_io_read_vid_hdr(ubi, to, vid_hdr, 1);
+	if (err) {
+		if (err != UBI_IO_BITFLIPS)
+			ubi_warn("cannot read VID header back from PEB %d", to);
+		goto out_unlock;
+	}
+
+	if (data_size > 0) {
+		err = ubi_io_write_data(ubi, buf, to, 0, aldata_size);
+		if (err)
+			goto out_unlock;
+
+		/*
+		 * We've written the data and are going to read it back to make
+		 * sure it was written correctly.
+		 */
+		buf1 = kmalloc(aldata_size, GFP_KERNEL);
+		if (!buf1) {
+			err = -ENOMEM;
+			goto out_unlock;
+		}
+
+		cond_resched();
+
+		err = ubi_io_read_data(ubi, buf1, to, 0, aldata_size);
+		if (err) {
+			if (err != UBI_IO_BITFLIPS)
+				ubi_warn("cannot read data back from PEB %d",
+					 to);
+			goto out_unlock;
+		}
+
+		cond_resched();
+
+		if (memcmp(buf, buf1, aldata_size)) {
+			ubi_warn("read data back from PEB %d - it is different",
+				 to);
+			goto out_unlock;
+		}
+	}
+
+	ubi_assert(vol->eba_tbl[lnum] == from);
+	vol->eba_tbl[lnum] = to;
+
+	leb_write_unlock(ubi, vol_id, lnum);
+	kfree(buf);
+	kfree(buf1);
+
+	return 0;
+
+out_unlock:
+	leb_write_unlock(ubi, vol_id, lnum);
+	kfree(buf);
+	kfree(buf1);
+	return err;
+}
+
+/**
+ * ubi_eba_init_scan - initialize the EBA unit using scanning information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
+{
+	int i, j, err, num_volumes;
+	struct ubi_scan_volume *sv;
+	struct ubi_volume *vol;
+	struct ubi_scan_leb *seb;
+	struct rb_node *rb;
+
+	dbg_eba("initialize EBA unit");
+
+	spin_lock_init(&ubi->ltree_lock);
+	ubi->ltree = RB_ROOT;
+
+	if (ubi_devices_cnt == 0) {
+		ltree_slab = kmem_cache_create("ubi_ltree_slab",
+					       sizeof(struct ltree_entry), 0,
+					       0, &ltree_entry_ctor, NULL);
+		if (!ltree_slab)
+			return -ENOMEM;
+	}
+
+	ubi->global_sqnum = si->max_sqnum + 1;
+	num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
+
+	for (i = 0; i < num_volumes; i++) {
+		vol = ubi->volumes[i];
+		if (!vol)
+			continue;
+
+		cond_resched();
+
+		vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int),
+				       GFP_KERNEL);
+		if (!vol->eba_tbl) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+
+		for (j = 0; j < vol->reserved_pebs; j++)
+			vol->eba_tbl[j] = UBI_LEB_UNMAPPED;
+
+		sv = ubi_scan_find_sv(si, idx2vol_id(ubi, i));
+		if (!sv)
+			continue;
+
+		ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
+			if (seb->lnum >= vol->reserved_pebs)
+				/*
+				 * This may happen in case of an unclean reboot
+				 * during re-size.
+				 */
+				ubi_scan_move_to_list(sv, seb, &si->erase);
+			vol->eba_tbl[seb->lnum] = seb->pnum;
+		}
+	}
+
+	if (ubi->bad_allowed) {
+		ubi_calculate_reserved(ubi);
+
+		if (ubi->avail_pebs < ubi->beb_rsvd_level) {
+			/* No enough free physical eraseblocks */
+			ubi->beb_rsvd_pebs = ubi->avail_pebs;
+			ubi_warn("cannot reserve enough PEBs for bad PEB "
+				 "handling, reserved %d, need %d",
+				 ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
+		} else
+			ubi->beb_rsvd_pebs = ubi->beb_rsvd_level;
+
+		ubi->avail_pebs -= ubi->beb_rsvd_pebs;
+		ubi->rsvd_pebs  += ubi->beb_rsvd_pebs;
+	}
+
+	dbg_eba("EBA unit is initialized");
+	return 0;
+
+out_free:
+	for (i = 0; i < num_volumes; i++) {
+		if (!ubi->volumes[i])
+			continue;
+		kfree(ubi->volumes[i]->eba_tbl);
+	}
+	if (ubi_devices_cnt == 0)
+		kmem_cache_destroy(ltree_slab);
+	return err;
+}
+
+/**
+ * ubi_eba_close - close EBA unit.
+ * @ubi: UBI device description object
+ */
+void ubi_eba_close(const struct ubi_device *ubi)
+{
+	int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT;
+
+	dbg_eba("close EBA unit");
+
+	for (i = 0; i < num_volumes; i++) {
+		if (!ubi->volumes[i])
+			continue;
+		kfree(ubi->volumes[i]->eba_tbl);
+	}
+	if (ubi_devices_cnt == 1)
+		kmem_cache_destroy(ltree_slab);
+}
diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
new file mode 100644
index 00000000000..fc9478d605f
--- /dev/null
+++ b/drivers/mtd/ubi/gluebi.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём), Joern Engel
+ */
+
+/*
+ * This file includes implementation of fake MTD devices for each UBI volume.
+ * This sounds strange, but it is in fact quite useful to make MTD-oriented
+ * software (including all the legacy software) to work on top of UBI.
+ *
+ * Gluebi emulates MTD devices of "MTD_UBIVOLUME" type. Their minimal I/O unit
+ * size (mtd->writesize) is equivalent to the UBI minimal I/O unit. The
+ * eraseblock size is equivalent to the logical eraseblock size of the volume.
+ */
+
+#include <asm/div64.h>
+#include "ubi.h"
+
+/**
+ * gluebi_get_device - get MTD device reference.
+ * @mtd: the MTD device description object
+ *
+ * This function is called every time the MTD device is being opened and
+ * implements the MTD get_device() operation. Returns zero in case of success
+ * and a negative error code in case of failure.
+ */
+static int gluebi_get_device(struct mtd_info *mtd)
+{
+	struct ubi_volume *vol;
+
+	vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+
+	/*
+	 * We do not introduce locks for gluebi reference count because the
+	 * get_device()/put_device() calls are already serialized at MTD.
+	 */
+	if (vol->gluebi_refcount > 0) {
+		/*
+		 * The MTD device is already referenced and this is just one
+		 * more reference. MTD allows many users to open the same
+		 * volume simultaneously and do not distinguish between
+		 * readers/writers/exclusive openers as UBI does. So we do not
+		 * open the UBI volume again - just increase the reference
+		 * counter and return.
+		 */
+		vol->gluebi_refcount += 1;
+		return 0;
+	}
+
+	/*
+	 * This is the first reference to this UBI volume via the MTD device
+	 * interface. Open the corresponding volume in read-write mode.
+	 */
+	vol->gluebi_desc = ubi_open_volume(vol->ubi->ubi_num, vol->vol_id,
+					   UBI_READWRITE);
+	if (IS_ERR(vol->gluebi_desc))
+		return PTR_ERR(vol->gluebi_desc);
+	vol->gluebi_refcount += 1;
+	return 0;
+}
+
+/**
+ * gluebi_put_device - put MTD device reference.
+ * @mtd: the MTD device description object
+ *
+ * This function is called every time the MTD device is being put. Returns
+ * zero in case of success and a negative error code in case of failure.
+ */
+static void gluebi_put_device(struct mtd_info *mtd)
+{
+	struct ubi_volume *vol;
+
+	vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+	vol->gluebi_refcount -= 1;
+	ubi_assert(vol->gluebi_refcount >= 0);
+	if (vol->gluebi_refcount == 0)
+		ubi_close_volume(vol->gluebi_desc);
+}
+
+/**
+ * gluebi_read - read operation of emulated MTD devices.
+ * @mtd: MTD device description object
+ * @from: absolute offset from where to read
+ * @len: how many bytes to read
+ * @retlen: count of read bytes is returned here
+ * @buf: buffer to store the read data
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len,
+		       size_t *retlen, unsigned char *buf)
+{
+	int err = 0, lnum, offs, total_read;
+	struct ubi_volume *vol;
+	struct ubi_device *ubi;
+	uint64_t tmp = from;
+
+	dbg_msg("read %zd bytes from offset %lld", len, from);
+
+	if (len < 0 || from < 0 || from + len > mtd->size)
+		return -EINVAL;
+
+	vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+	ubi = vol->ubi;
+
+	offs = do_div(tmp, mtd->erasesize);
+	lnum = tmp;
+
+	total_read = len;
+	while (total_read) {
+		size_t to_read = mtd->erasesize - offs;
+
+		if (to_read > total_read)
+			to_read = total_read;
+
+		err = ubi_eba_read_leb(ubi, vol->vol_id, lnum, buf, offs,
+				       to_read, 0);
+		if (err)
+			break;
+
+		lnum += 1;
+		offs = 0;
+		total_read -= to_read;
+		buf += to_read;
+	}
+
+	*retlen = len - total_read;
+	return err;
+}
+
+/**
+ * gluebi_write - write operation of emulated MTD devices.
+ * @mtd: MTD device description object
+ * @to: absolute offset where to write
+ * @len: how many bytes to write
+ * @retlen: count of written bytes is returned here
+ * @buf: buffer with data to write
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len,
+		       size_t *retlen, const u_char *buf)
+{
+	int err = 0, lnum, offs, total_written;
+	struct ubi_volume *vol;
+	struct ubi_device *ubi;
+	uint64_t tmp = to;
+
+	dbg_msg("write %zd bytes to offset %lld", len, to);
+
+	if (len < 0 || to < 0 || len + to > mtd->size)
+		return -EINVAL;
+
+	vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+	ubi = vol->ubi;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	offs = do_div(tmp, mtd->erasesize);
+	lnum = tmp;
+
+	if (len % mtd->writesize || offs % mtd->writesize)
+		return -EINVAL;
+
+	total_written = len;
+	while (total_written) {
+		size_t to_write = mtd->erasesize - offs;
+
+		if (to_write > total_written)
+			to_write = total_written;
+
+		err = ubi_eba_write_leb(ubi, vol->vol_id, lnum, buf, offs,
+					to_write, UBI_UNKNOWN);
+		if (err)
+			break;
+
+		lnum += 1;
+		offs = 0;
+		total_written -= to_write;
+		buf += to_write;
+	}
+
+	*retlen = len - total_written;
+	return err;
+}
+
+/**
+ * gluebi_erase - erase operation of emulated MTD devices.
+ * @mtd: the MTD device description object
+ * @instr: the erase operation description
+ *
+ * This function calls the erase callback when finishes. Returns zero in case
+ * of success and a negative error code in case of failure.
+ */
+static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+	int err, i, lnum, count;
+	struct ubi_volume *vol;
+	struct ubi_device *ubi;
+
+	dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr);
+
+	if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize)
+		return -EINVAL;
+
+	if (instr->len < 0 || instr->addr + instr->len > mtd->size)
+		return -EINVAL;
+
+	if (instr->addr % mtd->writesize || instr->len % mtd->writesize)
+		return -EINVAL;
+
+	lnum = instr->addr / mtd->erasesize;
+	count = instr->len / mtd->erasesize;
+
+	vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
+	ubi = vol->ubi;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	for (i = 0; i < count; i++) {
+		err = ubi_eba_unmap_leb(ubi, vol->vol_id, lnum + i);
+		if (err)
+			goto out_err;
+	}
+
+	/*
+	 * MTD erase operations are synchronous, so we have to make sure the
+	 * physical eraseblock is wiped out.
+	 */
+	err = ubi_wl_flush(ubi);
+	if (err)
+		goto out_err;
+
+        instr->state = MTD_ERASE_DONE;
+        mtd_erase_callback(instr);
+	return 0;
+
+out_err:
+	instr->state = MTD_ERASE_FAILED;
+	instr->fail_addr = lnum * mtd->erasesize;
+	return err;
+}
+
+/**
+ * ubi_create_gluebi - initialize gluebi for an UBI volume.
+ * @ubi: UBI device description object
+ * @vol: volume description object
+ *
+ * This function is called when an UBI volume is created in order to create
+ * corresponding fake MTD device. Returns zero in case of success and a
+ * negative error code in case of failure.
+ */
+int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
+{
+	struct mtd_info *mtd = &vol->gluebi_mtd;
+
+	mtd->name = kmemdup(vol->name, vol->name_len + 1, GFP_KERNEL);
+	if (!mtd->name)
+		return -ENOMEM;
+
+	mtd->type = MTD_UBIVOLUME;
+	if (!ubi->ro_mode)
+		mtd->flags = MTD_WRITEABLE;
+	mtd->writesize  = ubi->min_io_size;
+	mtd->owner      = THIS_MODULE;
+	mtd->size       = vol->usable_leb_size * vol->reserved_pebs;
+	mtd->erasesize  = vol->usable_leb_size;
+	mtd->read       = gluebi_read;
+	mtd->write      = gluebi_write;
+	mtd->erase      = gluebi_erase;
+	mtd->get_device = gluebi_get_device;
+	mtd->put_device = gluebi_put_device;
+
+	if (add_mtd_device(mtd)) {
+		ubi_err("cannot not add MTD device\n");
+		kfree(mtd->name);
+		return -ENFILE;
+	}
+
+	dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u",
+		mtd->index, mtd->name, mtd->size, mtd->erasesize);
+	return 0;
+}
+
+/**
+ * ubi_destroy_gluebi - close gluebi for an UBI volume.
+ * @vol: volume description object
+ *
+ * This function is called when an UBI volume is removed in order to remove
+ * corresponding fake MTD device. Returns zero in case of success and a
+ * negative error code in case of failure.
+ */
+int ubi_destroy_gluebi(struct ubi_volume *vol)
+{
+	int err;
+	struct mtd_info *mtd = &vol->gluebi_mtd;
+
+	dbg_msg("remove mtd%d", mtd->index);
+	err = del_mtd_device(mtd);
+	if (err)
+		return err;
+	kfree(mtd->name);
+	return 0;
+}
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
new file mode 100644
index 00000000000..438914d0515
--- /dev/null
+++ b/drivers/mtd/ubi/io.c
@@ -0,0 +1,1259 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ * Copyright (c) Nokia Corporation, 2006, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * UBI input/output unit.
+ *
+ * This unit provides a uniform way to work with all kinds of the underlying
+ * MTD devices. It also implements handy functions for reading and writing UBI
+ * headers.
+ *
+ * We are trying to have a paranoid mindset and not to trust to what we read
+ * from the flash media in order to be more secure and robust. So this unit
+ * validates every single header it reads from the flash media.
+ *
+ * Some words about how the eraseblock headers are stored.
+ *
+ * The erase counter header is always stored at offset zero. By default, the
+ * VID header is stored after the EC header at the closest aligned offset
+ * (i.e. aligned to the minimum I/O unit size). Data starts next to the VID
+ * header at the closest aligned offset. But this default layout may be
+ * changed. For example, for different reasons (e.g., optimization) UBI may be
+ * asked to put the VID header at further offset, and even at an unaligned
+ * offset. Of course, if the offset of the VID header is unaligned, UBI adds
+ * proper padding in front of it. Data offset may also be changed but it has to
+ * be aligned.
+ *
+ * About minimal I/O units. In general, UBI assumes flash device model where
+ * there is only one minimal I/O unit size. E.g., in case of NOR flash it is 1,
+ * in case of NAND flash it is a NAND page, etc. This is reported by MTD in the
+ * @ubi->mtd->writesize field. But as an exception, UBI admits of using another
+ * (smaller) minimal I/O unit size for EC and VID headers to make it possible
+ * to do different optimizations.
+ *
+ * This is extremely useful in case of NAND flashes which admit of several
+ * write operations to one NAND page. In this case UBI can fit EC and VID
+ * headers at one NAND page. Thus, UBI may use "sub-page" size as the minimal
+ * I/O unit for the headers (the @ubi->hdrs_min_io_size field). But it still
+ * reports NAND page size (@ubi->min_io_size) as a minimal I/O unit for the UBI
+ * users.
+ *
+ * Example: some Samsung NANDs with 2KiB pages allow 4x 512-byte writes, so
+ * although the minimal I/O unit is 2K, UBI uses 512 bytes for EC and VID
+ * headers.
+ *
+ * Q: why not just to treat sub-page as a minimal I/O unit of this flash
+ * device, e.g., make @ubi->min_io_size = 512 in the example above?
+ *
+ * A: because when writing a sub-page, MTD still writes a full 2K page but the
+ * bytes which are no relevant to the sub-page are 0xFF. So, basically, writing
+ * 4x512 sub-pages is 4 times slower then writing one 2KiB NAND page. Thus, we
+ * prefer to use sub-pages only for EV and VID headers.
+ *
+ * As it was noted above, the VID header may start at a non-aligned offset.
+ * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page,
+ * the VID header may reside at offset 1984 which is the last 64 bytes of the
+ * last sub-page (EC header is always at offset zero). This causes some
+ * difficulties when reading and writing VID headers.
+ *
+ * Suppose we have a 64-byte buffer and we read a VID header at it. We change
+ * the data and want to write this VID header out. As we can only write in
+ * 512-byte chunks, we have to allocate one more buffer and copy our VID header
+ * to offset 448 of this buffer.
+ *
+ * The I/O unit does the following trick in order to avoid this extra copy.
+ * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header
+ * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the
+ * VID header is being written out, it shifts the VID header pointer back and
+ * writes the whole sub-page.
+ */
+
+#include <linux/crc32.h>
+#include <linux/err.h>
+#include "ubi.h"
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum);
+static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum);
+static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
+				 const struct ubi_ec_hdr *ec_hdr);
+static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum);
+static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
+				  const struct ubi_vid_hdr *vid_hdr);
+static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum,
+				 int offset, int len);
+#else
+#define paranoid_check_not_bad(ubi, pnum) 0
+#define paranoid_check_peb_ec_hdr(ubi, pnum)  0
+#define paranoid_check_ec_hdr(ubi, pnum, ec_hdr)  0
+#define paranoid_check_peb_vid_hdr(ubi, pnum) 0
+#define paranoid_check_vid_hdr(ubi, pnum, vid_hdr) 0
+#define paranoid_check_all_ff(ubi, pnum, offset, len) 0
+#endif
+
+/**
+ * ubi_io_read - read data from a physical eraseblock.
+ * @ubi: UBI device description object
+ * @buf: buffer where to store the read data
+ * @pnum: physical eraseblock number to read from
+ * @offset: offset within the physical eraseblock from where to read
+ * @len: how many bytes to read
+ *
+ * This function reads data from offset @offset of physical eraseblock @pnum
+ * and stores the read data in the @buf buffer. The following return codes are
+ * possible:
+ *
+ * o %0 if all the requested data were successfully read;
+ * o %UBI_IO_BITFLIPS if all the requested data were successfully read, but
+ *   correctable bit-flips were detected; this is harmless but may indicate
+ *   that this eraseblock may become bad soon (but do not have to);
+ * o %-EBADMSG if the MTD subsystem reported about data data integrity
+ *   problems, for example it can me an ECC error in case of NAND; this most
+ *   probably means that the data is corrupted;
+ * o %-EIO if some I/O error occurred;
+ * o other negative error codes in case of other errors.
+ */
+int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
+		int len)
+{
+	int err, retries = 0;
+	size_t read;
+	loff_t addr;
+
+	dbg_io("read %d bytes from PEB %d:%d", len, pnum, offset);
+
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+	ubi_assert(offset >= 0 && offset + len <= ubi->peb_size);
+	ubi_assert(len > 0);
+
+	err = paranoid_check_not_bad(ubi, pnum);
+	if (err)
+		return err > 0 ? -EINVAL : err;
+
+	addr = (loff_t)pnum * ubi->peb_size + offset;
+retry:
+	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+	if (err) {
+		if (err == -EUCLEAN) {
+			/*
+			 * -EUCLEAN is reported if there was a bit-flip which
+			 * was corrected, so this is harmless.
+			 */
+			ubi_msg("fixable bit-flip detected at PEB %d", pnum);
+			ubi_assert(len == read);
+			return UBI_IO_BITFLIPS;
+		}
+
+		if (read != len && retries++ < UBI_IO_RETRIES) {
+			dbg_io("error %d while reading %d bytes from PEB %d:%d, "
+			       "read only %zd bytes, retry",
+			       err, len, pnum, offset, read);
+			yield();
+			goto retry;
+		}
+
+		ubi_err("error %d while reading %d bytes from PEB %d:%d, "
+			"read %zd bytes", err, len, pnum, offset, read);
+		ubi_dbg_dump_stack();
+	} else {
+		ubi_assert(len == read);
+
+		if (ubi_dbg_is_bitflip()) {
+			dbg_msg("bit-flip (emulated)");
+			err = UBI_IO_BITFLIPS;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * ubi_io_write - write data to a physical eraseblock.
+ * @ubi: UBI device description object
+ * @buf: buffer with the data to write
+ * @pnum: physical eraseblock number to write to
+ * @offset: offset within the physical eraseblock where to write
+ * @len: how many bytes to write
+ *
+ * This function writes @len bytes of data from buffer @buf to offset @offset
+ * of physical eraseblock @pnum. If all the data were successfully written,
+ * zero is returned. If an error occurred, this function returns a negative
+ * error code. If %-EIO is returned, the physical eraseblock most probably went
+ * bad.
+ *
+ * Note, in case of an error, it is possible that something was still written
+ * to the flash media, but may be some garbage.
+ */
+int ubi_io_write(const struct ubi_device *ubi, const void *buf, int pnum,
+		 int offset, int len)
+{
+	int err;
+	size_t written;
+	loff_t addr;
+
+	dbg_io("write %d bytes to PEB %d:%d", len, pnum, offset);
+
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+	ubi_assert(offset >= 0 && offset + len <= ubi->peb_size);
+	ubi_assert(offset % ubi->hdrs_min_io_size == 0);
+	ubi_assert(len > 0 && len % ubi->hdrs_min_io_size == 0);
+
+	if (ubi->ro_mode) {
+		ubi_err("read-only mode");
+		return -EROFS;
+	}
+
+	/* The below has to be compiled out if paranoid checks are disabled */
+
+	err = paranoid_check_not_bad(ubi, pnum);
+	if (err)
+		return err > 0 ? -EINVAL : err;
+
+	/* The area we are writing to has to contain all 0xFF bytes */
+	err = paranoid_check_all_ff(ubi, pnum, offset, len);
+	if (err)
+		return err > 0 ? -EINVAL : err;
+
+	if (offset >= ubi->leb_start) {
+		/*
+		 * We write to the data area of the physical eraseblock. Make
+		 * sure it has valid EC and VID headers.
+		 */
+		err = paranoid_check_peb_ec_hdr(ubi, pnum);
+		if (err)
+			return err > 0 ? -EINVAL : err;
+		err = paranoid_check_peb_vid_hdr(ubi, pnum);
+		if (err)
+			return err > 0 ? -EINVAL : err;
+	}
+
+	if (ubi_dbg_is_write_failure()) {
+		dbg_err("cannot write %d bytes to PEB %d:%d "
+			"(emulated)", len, pnum, offset);
+		ubi_dbg_dump_stack();
+		return -EIO;
+	}
+
+	addr = (loff_t)pnum * ubi->peb_size + offset;
+	err = ubi->mtd->write(ubi->mtd, addr, len, &written, buf);
+	if (err) {
+		ubi_err("error %d while writing %d bytes to PEB %d:%d, written"
+			" %zd bytes", err, len, pnum, offset, written);
+		ubi_dbg_dump_stack();
+	} else
+		ubi_assert(written == len);
+
+	return err;
+}
+
+/**
+ * erase_callback - MTD erasure call-back.
+ * @ei: MTD erase information object.
+ *
+ * Note, even though MTD erase interface is asynchronous, all the current
+ * implementations are synchronous anyway.
+ */
+static void erase_callback(struct erase_info *ei)
+{
+	wake_up_interruptible((wait_queue_head_t *)ei->priv);
+}
+
+/**
+ * do_sync_erase - synchronously erase a physical eraseblock.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to erase
+ *
+ * This function synchronously erases physical eraseblock @pnum and returns
+ * zero in case of success and a negative error code in case of failure. If
+ * %-EIO is returned, the physical eraseblock most probably went bad.
+ */
+static int do_sync_erase(const struct ubi_device *ubi, int pnum)
+{
+	int err, retries = 0;
+	struct erase_info ei;
+	wait_queue_head_t wq;
+
+	dbg_io("erase PEB %d", pnum);
+
+retry:
+	init_waitqueue_head(&wq);
+	memset(&ei, 0, sizeof(struct erase_info));
+
+	ei.mtd      = ubi->mtd;
+	ei.addr     = pnum * ubi->peb_size;
+	ei.len      = ubi->peb_size;
+	ei.callback = erase_callback;
+	ei.priv     = (unsigned long)&wq;
+
+	err = ubi->mtd->erase(ubi->mtd, &ei);
+	if (err) {
+		if (retries++ < UBI_IO_RETRIES) {
+			dbg_io("error %d while erasing PEB %d, retry",
+			       err, pnum);
+			yield();
+			goto retry;
+		}
+		ubi_err("cannot erase PEB %d, error %d", pnum, err);
+		ubi_dbg_dump_stack();
+		return err;
+	}
+
+	err = wait_event_interruptible(wq, ei.state == MTD_ERASE_DONE ||
+					   ei.state == MTD_ERASE_FAILED);
+	if (err) {
+		ubi_err("interrupted PEB %d erasure", pnum);
+		return -EINTR;
+	}
+
+	if (ei.state == MTD_ERASE_FAILED) {
+		if (retries++ < UBI_IO_RETRIES) {
+			dbg_io("error while erasing PEB %d, retry", pnum);
+			yield();
+			goto retry;
+		}
+		ubi_err("cannot erase PEB %d", pnum);
+		ubi_dbg_dump_stack();
+		return -EIO;
+	}
+
+	err = paranoid_check_all_ff(ubi, pnum, 0, ubi->peb_size);
+	if (err)
+		return err > 0 ? -EINVAL : err;
+
+	if (ubi_dbg_is_erase_failure() && !err) {
+		dbg_err("cannot erase PEB %d (emulated)", pnum);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * check_pattern - check if buffer contains only a certain byte pattern.
+ * @buf: buffer to check
+ * @patt: the pattern to check
+ * @size: buffer size in bytes
+ *
+ * This function returns %1 in there are only @patt bytes in @buf, and %0 if
+ * something else was also found.
+ */
+static int check_pattern(const void *buf, uint8_t patt, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i++)
+		if (((const uint8_t *)buf)[i] != patt)
+			return 0;
+	return 1;
+}
+
+/* Patterns to write to a physical eraseblock when torturing it */
+static uint8_t patterns[] = {0xa5, 0x5a, 0x0};
+
+/**
+ * torture_peb - test a supposedly bad physical eraseblock.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to test
+ *
+ * This function returns %-EIO if the physical eraseblock did not pass the
+ * test, a positive number of erase operations done if the test was
+ * successfully passed, and other negative error codes in case of other errors.
+ */
+static int torture_peb(const struct ubi_device *ubi, int pnum)
+{
+	void *buf;
+	int err, i, patt_count;
+
+	buf = kmalloc(ubi->peb_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	patt_count = ARRAY_SIZE(patterns);
+	ubi_assert(patt_count > 0);
+
+	for (i = 0; i < patt_count; i++) {
+		err = do_sync_erase(ubi, pnum);
+		if (err)
+			goto out;
+
+		/* Make sure the PEB contains only 0xFF bytes */
+		err = ubi_io_read(ubi, buf, pnum, 0, ubi->peb_size);
+		if (err)
+			goto out;
+
+		err = check_pattern(buf, 0xFF, ubi->peb_size);
+		if (err == 0) {
+			ubi_err("erased PEB %d, but a non-0xFF byte found",
+				pnum);
+			err = -EIO;
+			goto out;
+		}
+
+		/* Write a pattern and check it */
+		memset(buf, patterns[i], ubi->peb_size);
+		err = ubi_io_write(ubi, buf, pnum, 0, ubi->peb_size);
+		if (err)
+			goto out;
+
+		memset(buf, ~patterns[i], ubi->peb_size);
+		err = ubi_io_read(ubi, buf, pnum, 0, ubi->peb_size);
+		if (err)
+			goto out;
+
+		err = check_pattern(buf, patterns[i], ubi->peb_size);
+		if (err == 0) {
+			ubi_err("pattern %x checking failed for PEB %d",
+				patterns[i], pnum);
+			err = -EIO;
+			goto out;
+		}
+	}
+
+	err = patt_count;
+
+out:
+	if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
+		/*
+		 * If a bit-flip or data integrity error was detected, the test
+		 * has not passed because it happened on a freshly erased
+		 * physical eraseblock which means something is wrong with it.
+		 */
+		err = -EIO;
+	kfree(buf);
+	return err;
+}
+
+/**
+ * ubi_io_sync_erase - synchronously erase a physical eraseblock.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number to erase
+ * @torture: if this physical eraseblock has to be tortured
+ *
+ * This function synchronously erases physical eraseblock @pnum. If @torture
+ * flag is not zero, the physical eraseblock is checked by means of writing
+ * different patterns to it and reading them back. If the torturing is enabled,
+ * the physical eraseblock is erased more then once.
+ *
+ * This function returns the number of erasures made in case of success, %-EIO
+ * if the erasure failed or the torturing test failed, and other negative error
+ * codes in case of other errors. Note, %-EIO means that the physical
+ * eraseblock is bad.
+ */
+int ubi_io_sync_erase(const struct ubi_device *ubi, int pnum, int torture)
+{
+	int err, ret = 0;
+
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+
+	err = paranoid_check_not_bad(ubi, pnum);
+	if (err != 0)
+		return err > 0 ? -EINVAL : err;
+
+	if (ubi->ro_mode) {
+		ubi_err("read-only mode");
+		return -EROFS;
+	}
+
+	if (torture) {
+		ret = torture_peb(ubi, pnum);
+		if (ret < 0)
+			return ret;
+	}
+
+	err = do_sync_erase(ubi, pnum);
+	if (err)
+		return err;
+
+	return ret + 1;
+}
+
+/**
+ * ubi_io_is_bad - check if a physical eraseblock is bad.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ *
+ * This function returns a positive number if the physical eraseblock is bad,
+ * zero if not, and a negative error code if an error occurred.
+ */
+int ubi_io_is_bad(const struct ubi_device *ubi, int pnum)
+{
+	struct mtd_info *mtd = ubi->mtd;
+
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+
+	if (ubi->bad_allowed) {
+		int ret;
+
+		ret = mtd->block_isbad(mtd, (loff_t)pnum * ubi->peb_size);
+		if (ret < 0)
+			ubi_err("error %d while checking if PEB %d is bad",
+				ret, pnum);
+		else if (ret)
+			dbg_io("PEB %d is bad", pnum);
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * ubi_io_mark_bad - mark a physical eraseblock as bad.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to mark
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum)
+{
+	int err;
+	struct mtd_info *mtd = ubi->mtd;
+
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+
+	if (ubi->ro_mode) {
+		ubi_err("read-only mode");
+		return -EROFS;
+	}
+
+	if (!ubi->bad_allowed)
+		return 0;
+
+	err = mtd->block_markbad(mtd, (loff_t)pnum * ubi->peb_size);
+	if (err)
+		ubi_err("cannot mark PEB %d bad, error %d", pnum, err);
+	return err;
+}
+
+/**
+ * validate_ec_hdr - validate an erase counter header.
+ * @ubi: UBI device description object
+ * @ec_hdr: the erase counter header to check
+ *
+ * This function returns zero if the erase counter header is OK, and %1 if
+ * not.
+ */
+static int validate_ec_hdr(const struct ubi_device *ubi,
+			   const struct ubi_ec_hdr *ec_hdr)
+{
+	long long ec;
+	int vid_hdr_offset, leb_start;
+
+	ec = ubi64_to_cpu(ec_hdr->ec);
+	vid_hdr_offset = ubi32_to_cpu(ec_hdr->vid_hdr_offset);
+	leb_start = ubi32_to_cpu(ec_hdr->data_offset);
+
+	if (ec_hdr->version != UBI_VERSION) {
+		ubi_err("node with incompatible UBI version found: "
+			"this UBI version is %d, image version is %d",
+			UBI_VERSION, (int)ec_hdr->version);
+		goto bad;
+	}
+
+	if (vid_hdr_offset != ubi->vid_hdr_offset) {
+		ubi_err("bad VID header offset %d, expected %d",
+			vid_hdr_offset, ubi->vid_hdr_offset);
+		goto bad;
+	}
+
+	if (leb_start != ubi->leb_start) {
+		ubi_err("bad data offset %d, expected %d",
+			leb_start, ubi->leb_start);
+		goto bad;
+	}
+
+	if (ec < 0 || ec > UBI_MAX_ERASECOUNTER) {
+		ubi_err("bad erase counter %lld", ec);
+		goto bad;
+	}
+
+	return 0;
+
+bad:
+	ubi_err("bad EC header");
+	ubi_dbg_dump_ec_hdr(ec_hdr);
+	ubi_dbg_dump_stack();
+	return 1;
+}
+
+/**
+ * ubi_io_read_ec_hdr - read and check an erase counter header.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock to read from
+ * @ec_hdr: a &struct ubi_ec_hdr object where to store the read erase counter
+ * header
+ * @verbose: be verbose if the header is corrupted or was not found
+ *
+ * This function reads erase counter header from physical eraseblock @pnum and
+ * stores it in @ec_hdr. This function also checks CRC checksum of the read
+ * erase counter header. The following codes may be returned:
+ *
+ * o %0 if the CRC checksum is correct and the header was successfully read;
+ * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected
+ *   and corrected by the flash driver; this is harmless but may indicate that
+ *   this eraseblock may become bad soon (but may be not);
+ * o %UBI_IO_BAD_EC_HDR if the erase counter header is corrupted (a CRC error);
+ * o %UBI_IO_PEB_EMPTY if the physical eraseblock is empty;
+ * o a negative error code in case of failure.
+ */
+int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum,
+		       struct ubi_ec_hdr *ec_hdr, int verbose)
+{
+	int err, read_err = 0;
+	uint32_t crc, magic, hdr_crc;
+
+	dbg_io("read EC header from PEB %d", pnum);
+	ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
+
+	err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE);
+	if (err) {
+		if (err != UBI_IO_BITFLIPS && err != -EBADMSG)
+			return err;
+
+		/*
+		 * We read all the data, but either a correctable bit-flip
+		 * occurred, or MTD reported about some data integrity error,
+		 * like an ECC error in case of NAND. The former is harmless,
+		 * the later may mean that the read data is corrupted. But we
+		 * have a CRC check-sum and we will detect this. If the EC
+		 * header is still OK, we just report this as there was a
+		 * bit-flip.
+		 */
+		read_err = err;
+	}
+
+	magic = ubi32_to_cpu(ec_hdr->magic);
+	if (magic != UBI_EC_HDR_MAGIC) {
+		/*
+		 * The magic field is wrong. Let's check if we have read all
+		 * 0xFF. If yes, this physical eraseblock is assumed to be
+		 * empty.
+		 *
+		 * But if there was a read error, we do not test it for all
+		 * 0xFFs. Even if it does contain all 0xFFs, this error
+		 * indicates that something is still wrong with this physical
+		 * eraseblock and we anyway cannot treat it as empty.
+		 */
+		if (read_err != -EBADMSG &&
+		    check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) {
+			/* The physical eraseblock is supposedly empty */
+
+			/*
+			 * The below is just a paranoid check, it has to be
+			 * compiled out if paranoid checks are disabled.
+			 */
+			err = paranoid_check_all_ff(ubi, pnum, 0,
+						    ubi->peb_size);
+			if (err)
+				return err > 0 ? UBI_IO_BAD_EC_HDR : err;
+
+			if (verbose)
+				ubi_warn("no EC header found at PEB %d, "
+					 "only 0xFF bytes", pnum);
+			return UBI_IO_PEB_EMPTY;
+		}
+
+		/*
+		 * This is not a valid erase counter header, and these are not
+		 * 0xFF bytes. Report that the header is corrupted.
+		 */
+		if (verbose) {
+			ubi_warn("bad magic number at PEB %d: %08x instead of "
+				 "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
+			ubi_dbg_dump_ec_hdr(ec_hdr);
+		}
+		return UBI_IO_BAD_EC_HDR;
+	}
+
+	crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
+	hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc);
+
+	if (hdr_crc != crc) {
+		if (verbose) {
+			ubi_warn("bad EC header CRC at PEB %d, calculated %#08x,"
+				 " read %#08x", pnum, crc, hdr_crc);
+			ubi_dbg_dump_ec_hdr(ec_hdr);
+		}
+		return UBI_IO_BAD_EC_HDR;
+	}
+
+	/* And of course validate what has just been read from the media */
+	err = validate_ec_hdr(ubi, ec_hdr);
+	if (err) {
+		ubi_err("validation failed for PEB %d", pnum);
+		return -EINVAL;
+	}
+
+	return read_err ? UBI_IO_BITFLIPS : 0;
+}
+
+/**
+ * ubi_io_write_ec_hdr - write an erase counter header.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock to write to
+ * @ec_hdr: the erase counter header to write
+ *
+ * This function writes erase counter header described by @ec_hdr to physical
+ * eraseblock @pnum. It also fills most fields of @ec_hdr before writing, so
+ * the caller do not have to fill them. Callers must only fill the @ec_hdr->ec
+ * field.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure. If %-EIO is returned, the physical eraseblock most probably
+ * went bad.
+ */
+int ubi_io_write_ec_hdr(const struct ubi_device *ubi, int pnum,
+			struct ubi_ec_hdr *ec_hdr)
+{
+	int err;
+	uint32_t crc;
+
+	dbg_io("write EC header to PEB %d", pnum);
+	ubi_assert(pnum >= 0 &&  pnum < ubi->peb_count);
+
+	ec_hdr->magic = cpu_to_ubi32(UBI_EC_HDR_MAGIC);
+	ec_hdr->version = UBI_VERSION;
+	ec_hdr->vid_hdr_offset = cpu_to_ubi32(ubi->vid_hdr_offset);
+	ec_hdr->data_offset = cpu_to_ubi32(ubi->leb_start);
+	crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
+	ec_hdr->hdr_crc = cpu_to_ubi32(crc);
+
+	err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr);
+	if (err)
+		return -EINVAL;
+
+	err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize);
+	return err;
+}
+
+/**
+ * validate_vid_hdr - validate a volume identifier header.
+ * @ubi: UBI device description object
+ * @vid_hdr: the volume identifier header to check
+ *
+ * This function checks that data stored in the volume identifier header
+ * @vid_hdr. Returns zero if the VID header is OK and %1 if not.
+ */
+static int validate_vid_hdr(const struct ubi_device *ubi,
+			    const struct ubi_vid_hdr *vid_hdr)
+{
+	int vol_type = vid_hdr->vol_type;
+	int copy_flag = vid_hdr->copy_flag;
+	int vol_id = ubi32_to_cpu(vid_hdr->vol_id);
+	int lnum = ubi32_to_cpu(vid_hdr->lnum);
+	int compat = vid_hdr->compat;
+	int data_size = ubi32_to_cpu(vid_hdr->data_size);
+	int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
+	int data_pad = ubi32_to_cpu(vid_hdr->data_pad);
+	int data_crc = ubi32_to_cpu(vid_hdr->data_crc);
+	int usable_leb_size = ubi->leb_size - data_pad;
+
+	if (copy_flag != 0 && copy_flag != 1) {
+		dbg_err("bad copy_flag");
+		goto bad;
+	}
+
+	if (vol_id < 0 || lnum < 0 || data_size < 0 || used_ebs < 0 ||
+	    data_pad < 0) {
+		dbg_err("negative values");
+		goto bad;
+	}
+
+	if (vol_id >= UBI_MAX_VOLUMES && vol_id < UBI_INTERNAL_VOL_START) {
+		dbg_err("bad vol_id");
+		goto bad;
+	}
+
+	if (vol_id < UBI_INTERNAL_VOL_START && compat != 0) {
+		dbg_err("bad compat");
+		goto bad;
+	}
+
+	if (vol_id >= UBI_INTERNAL_VOL_START && compat != UBI_COMPAT_DELETE &&
+	    compat != UBI_COMPAT_RO && compat != UBI_COMPAT_PRESERVE &&
+	    compat != UBI_COMPAT_REJECT) {
+		dbg_err("bad compat");
+		goto bad;
+	}
+
+	if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
+		dbg_err("bad vol_type");
+		goto bad;
+	}
+
+	if (data_pad >= ubi->leb_size / 2) {
+		dbg_err("bad data_pad");
+		goto bad;
+	}
+
+	if (vol_type == UBI_VID_STATIC) {
+		/*
+		 * Although from high-level point of view static volumes may
+		 * contain zero bytes of data, but no VID headers can contain
+		 * zero at these fields, because they empty volumes do not have
+		 * mapped logical eraseblocks.
+		 */
+		if (used_ebs == 0) {
+			dbg_err("zero used_ebs");
+			goto bad;
+		}
+		if (data_size == 0) {
+			dbg_err("zero data_size");
+			goto bad;
+		}
+		if (lnum < used_ebs - 1) {
+			if (data_size != usable_leb_size) {
+				dbg_err("bad data_size");
+				goto bad;
+			}
+		} else if (lnum == used_ebs - 1) {
+			if (data_size == 0) {
+				dbg_err("bad data_size at last LEB");
+				goto bad;
+			}
+		} else {
+			dbg_err("too high lnum");
+			goto bad;
+		}
+	} else {
+		if (copy_flag == 0) {
+			if (data_crc != 0) {
+				dbg_err("non-zero data CRC");
+				goto bad;
+			}
+			if (data_size != 0) {
+				dbg_err("non-zero data_size");
+				goto bad;
+			}
+		} else {
+			if (data_size == 0) {
+				dbg_err("zero data_size of copy");
+				goto bad;
+			}
+		}
+		if (used_ebs != 0) {
+			dbg_err("bad used_ebs");
+			goto bad;
+		}
+	}
+
+	return 0;
+
+bad:
+	ubi_err("bad VID header");
+	ubi_dbg_dump_vid_hdr(vid_hdr);
+	ubi_dbg_dump_stack();
+	return 1;
+}
+
+/**
+ * ubi_io_read_vid_hdr - read and check a volume identifier header.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number to read from
+ * @vid_hdr: &struct ubi_vid_hdr object where to store the read volume
+ * identifier header
+ * @verbose: be verbose if the header is corrupted or wasn't found
+ *
+ * This function reads the volume identifier header from physical eraseblock
+ * @pnum and stores it in @vid_hdr. It also checks CRC checksum of the read
+ * volume identifier header. The following codes may be returned:
+ *
+ * o %0 if the CRC checksum is correct and the header was successfully read;
+ * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected
+ *   and corrected by the flash driver; this is harmless but may indicate that
+ *   this eraseblock may become bad soon;
+ * o %UBI_IO_BAD_VID_HRD if the volume identifier header is corrupted (a CRC
+ *   error detected);
+ * o %UBI_IO_PEB_FREE if the physical eraseblock is free (i.e., there is no VID
+ *   header there);
+ * o a negative error code in case of failure.
+ */
+int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum,
+			struct ubi_vid_hdr *vid_hdr, int verbose)
+{
+	int err, read_err = 0;
+	uint32_t crc, magic, hdr_crc;
+	void *p;
+
+	dbg_io("read VID header from PEB %d", pnum);
+	ubi_assert(pnum >= 0 &&  pnum < ubi->peb_count);
+
+	p = (char *)vid_hdr - ubi->vid_hdr_shift;
+	err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
+			  ubi->vid_hdr_alsize);
+	if (err) {
+		if (err != UBI_IO_BITFLIPS && err != -EBADMSG)
+			return err;
+
+		/*
+		 * We read all the data, but either a correctable bit-flip
+		 * occurred, or MTD reported about some data integrity error,
+		 * like an ECC error in case of NAND. The former is harmless,
+		 * the later may mean the read data is corrupted. But we have a
+		 * CRC check-sum and we will identify this. If the VID header is
+		 * still OK, we just report this as there was a bit-flip.
+		 */
+		read_err = err;
+	}
+
+	magic = ubi32_to_cpu(vid_hdr->magic);
+	if (magic != UBI_VID_HDR_MAGIC) {
+		/*
+		 * If we have read all 0xFF bytes, the VID header probably does
+		 * not exist and the physical eraseblock is assumed to be free.
+		 *
+		 * But if there was a read error, we do not test the data for
+		 * 0xFFs. Even if it does contain all 0xFFs, this error
+		 * indicates that something is still wrong with this physical
+		 * eraseblock and it cannot be regarded as free.
+		 */
+		if (read_err != -EBADMSG &&
+		    check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) {
+			/* The physical eraseblock is supposedly free */
+
+			/*
+			 * The below is just a paranoid check, it has to be
+			 * compiled out if paranoid checks are disabled.
+			 */
+			err = paranoid_check_all_ff(ubi, pnum, ubi->leb_start,
+						    ubi->leb_size);
+			if (err)
+				return err > 0 ? UBI_IO_BAD_VID_HDR : err;
+
+			if (verbose)
+				ubi_warn("no VID header found at PEB %d, "
+					 "only 0xFF bytes", pnum);
+			return UBI_IO_PEB_FREE;
+		}
+
+		/*
+		 * This is not a valid VID header, and these are not 0xFF
+		 * bytes. Report that the header is corrupted.
+		 */
+		if (verbose) {
+			ubi_warn("bad magic number at PEB %d: %08x instead of "
+				 "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
+			ubi_dbg_dump_vid_hdr(vid_hdr);
+		}
+		return UBI_IO_BAD_VID_HDR;
+	}
+
+	crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
+	hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc);
+
+	if (hdr_crc != crc) {
+		if (verbose) {
+			ubi_warn("bad CRC at PEB %d, calculated %#08x, "
+				 "read %#08x", pnum, crc, hdr_crc);
+			ubi_dbg_dump_vid_hdr(vid_hdr);
+		}
+		return UBI_IO_BAD_VID_HDR;
+	}
+
+	/* Validate the VID header that we have just read */
+	err = validate_vid_hdr(ubi, vid_hdr);
+	if (err) {
+		ubi_err("validation failed for PEB %d", pnum);
+		return -EINVAL;
+	}
+
+	return read_err ? UBI_IO_BITFLIPS : 0;
+}
+
+/**
+ * ubi_io_write_vid_hdr - write a volume identifier header.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to write to
+ * @vid_hdr: the volume identifier header to write
+ *
+ * This function writes the volume identifier header described by @vid_hdr to
+ * physical eraseblock @pnum. This function automatically fills the
+ * @vid_hdr->magic and the @vid_hdr->version fields, as well as calculates
+ * header CRC checksum and stores it at vid_hdr->hdr_crc.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure. If %-EIO is returned, the physical eraseblock probably went
+ * bad.
+ */
+int ubi_io_write_vid_hdr(const struct ubi_device *ubi, int pnum,
+			 struct ubi_vid_hdr *vid_hdr)
+{
+	int err;
+	uint32_t crc;
+	void *p;
+
+	dbg_io("write VID header to PEB %d", pnum);
+	ubi_assert(pnum >= 0 &&  pnum < ubi->peb_count);
+
+	err = paranoid_check_peb_ec_hdr(ubi, pnum);
+	if (err)
+		return err > 0 ? -EINVAL: err;
+
+	vid_hdr->magic = cpu_to_ubi32(UBI_VID_HDR_MAGIC);
+	vid_hdr->version = UBI_VERSION;
+	crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
+	vid_hdr->hdr_crc = cpu_to_ubi32(crc);
+
+	err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr);
+	if (err)
+		return -EINVAL;
+
+	p = (char *)vid_hdr - ubi->vid_hdr_shift;
+	err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset,
+			   ubi->vid_hdr_alsize);
+	return err;
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+
+/**
+ * paranoid_check_not_bad - ensure that a physical eraseblock is not bad.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number to check
+ *
+ * This function returns zero if the physical eraseblock is good, a positive
+ * number if it is bad and a negative error code if an error occurred.
+ */
+static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum)
+{
+	int err;
+
+	err = ubi_io_is_bad(ubi, pnum);
+	if (!err)
+		return err;
+
+	ubi_err("paranoid check failed for PEB %d", pnum);
+	ubi_dbg_dump_stack();
+	return err;
+}
+
+/**
+ * paranoid_check_ec_hdr - check if an erase counter header is all right.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number the erase counter header belongs to
+ * @ec_hdr: the erase counter header to check
+ *
+ * This function returns zero if the erase counter header contains valid
+ * values, and %1 if not.
+ */
+static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
+				 const struct ubi_ec_hdr *ec_hdr)
+{
+	int err;
+	uint32_t magic;
+
+	magic = ubi32_to_cpu(ec_hdr->magic);
+	if (magic != UBI_EC_HDR_MAGIC) {
+		ubi_err("bad magic %#08x, must be %#08x",
+			magic, UBI_EC_HDR_MAGIC);
+		goto fail;
+	}
+
+	err = validate_ec_hdr(ubi, ec_hdr);
+	if (err) {
+		ubi_err("paranoid check failed for PEB %d", pnum);
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	ubi_dbg_dump_ec_hdr(ec_hdr);
+	ubi_dbg_dump_stack();
+	return 1;
+}
+
+/**
+ * paranoid_check_peb_ec_hdr - check that the erase counter header of a
+ * physical eraseblock is in-place and is all right.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ *
+ * This function returns zero if the erase counter header is all right, %1 if
+ * not, and a negative error code if an error occurred.
+ */
+static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum)
+{
+	int err;
+	uint32_t crc, hdr_crc;
+	struct ubi_ec_hdr *ec_hdr;
+
+	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+	if (!ec_hdr)
+		return -ENOMEM;
+
+	err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE);
+	if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG)
+		goto exit;
+
+	crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
+	hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc);
+	if (hdr_crc != crc) {
+		ubi_err("bad CRC, calculated %#08x, read %#08x", crc, hdr_crc);
+		ubi_err("paranoid check failed for PEB %d", pnum);
+		ubi_dbg_dump_ec_hdr(ec_hdr);
+		ubi_dbg_dump_stack();
+		err = 1;
+		goto exit;
+	}
+
+	err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr);
+
+exit:
+	kfree(ec_hdr);
+	return err;
+}
+
+/**
+ * paranoid_check_vid_hdr - check that a volume identifier header is all right.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock number the volume identifier header belongs to
+ * @vid_hdr: the volume identifier header to check
+ *
+ * This function returns zero if the volume identifier header is all right, and
+ * %1 if not.
+ */
+static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
+				  const struct ubi_vid_hdr *vid_hdr)
+{
+	int err;
+	uint32_t magic;
+
+	magic = ubi32_to_cpu(vid_hdr->magic);
+	if (magic != UBI_VID_HDR_MAGIC) {
+		ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x",
+			magic, pnum, UBI_VID_HDR_MAGIC);
+		goto fail;
+	}
+
+	err = validate_vid_hdr(ubi, vid_hdr);
+	if (err) {
+		ubi_err("paranoid check failed for PEB %d", pnum);
+		goto fail;
+	}
+
+	return err;
+
+fail:
+	ubi_err("paranoid check failed for PEB %d", pnum);
+	ubi_dbg_dump_vid_hdr(vid_hdr);
+	ubi_dbg_dump_stack();
+	return 1;
+
+}
+
+/**
+ * paranoid_check_peb_vid_hdr - check that the volume identifier header of a
+ * physical eraseblock is in-place and is all right.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ *
+ * This function returns zero if the volume identifier header is all right,
+ * %1 if not, and a negative error code if an error occurred.
+ */
+static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum)
+{
+	int err;
+	uint32_t crc, hdr_crc;
+	struct ubi_vid_hdr *vid_hdr;
+	void *p;
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr)
+		return -ENOMEM;
+
+	p = (char *)vid_hdr - ubi->vid_hdr_shift;
+	err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
+			  ubi->vid_hdr_alsize);
+	if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG)
+		goto exit;
+
+	crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC);
+	hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc);
+	if (hdr_crc != crc) {
+		ubi_err("bad VID header CRC at PEB %d, calculated %#08x, "
+			"read %#08x", pnum, crc, hdr_crc);
+		ubi_err("paranoid check failed for PEB %d", pnum);
+		ubi_dbg_dump_vid_hdr(vid_hdr);
+		ubi_dbg_dump_stack();
+		err = 1;
+		goto exit;
+	}
+
+	err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr);
+
+exit:
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return err;
+}
+
+/**
+ * paranoid_check_all_ff - check that a region of flash is empty.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ * @offset: the starting offset within the physical eraseblock to check
+ * @len: the length of the region to check
+ *
+ * This function returns zero if only 0xFF bytes are present at offset
+ * @offset of the physical eraseblock @pnum, %1 if not, and a negative error
+ * code if an error occurred.
+ */
+static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum,
+				 int offset, int len)
+{
+	size_t read;
+	int err;
+	void *buf;
+	loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
+
+	buf = kzalloc(len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
+	if (err && err != -EUCLEAN) {
+		ubi_err("error %d while reading %d bytes from PEB %d:%d, "
+			"read %zd bytes", err, len, pnum, offset, read);
+		goto error;
+	}
+
+	err = check_pattern(buf, 0xFF, len);
+	if (err == 0) {
+		ubi_err("flash region at PEB %d:%d, length %d does not "
+			"contain all 0xFF bytes", pnum, offset, len);
+		goto fail;
+	}
+
+	kfree(buf);
+	return 0;
+
+fail:
+	ubi_err("paranoid check failed for PEB %d", pnum);
+	dbg_msg("hex dump of the %d-%d region", offset, offset + len);
+	ubi_dbg_hexdump(buf, len);
+	err = 1;
+error:
+	ubi_dbg_dump_stack();
+	kfree(buf);
+	return err;
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
new file mode 100644
index 00000000000..d352c4575c3
--- /dev/null
+++ b/drivers/mtd/ubi/kapi.c
@@ -0,0 +1,575 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/* This file mostly implements UBI kernel API functions */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <asm/div64.h>
+#include "ubi.h"
+
+/**
+ * ubi_get_device_info - get information about UBI device.
+ * @ubi_num: UBI device number
+ * @di: the information is stored here
+ *
+ * This function returns %0 in case of success and a %-ENODEV if there is no
+ * such UBI device.
+ */
+int ubi_get_device_info(int ubi_num, struct ubi_device_info *di)
+{
+	const struct ubi_device *ubi;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES ||
+	    !ubi_devices[ubi_num]) {
+		module_put(THIS_MODULE);
+		return -ENODEV;
+	}
+
+	ubi = ubi_devices[ubi_num];
+	di->ubi_num = ubi->ubi_num;
+	di->leb_size = ubi->leb_size;
+	di->min_io_size = ubi->min_io_size;
+	di->ro_mode = ubi->ro_mode;
+	di->cdev = MKDEV(ubi->major, 0);
+	module_put(THIS_MODULE);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ubi_get_device_info);
+
+/**
+ * ubi_get_volume_info - get information about UBI volume.
+ * @desc: volume descriptor
+ * @vi: the information is stored here
+ */
+void ubi_get_volume_info(struct ubi_volume_desc *desc,
+			 struct ubi_volume_info *vi)
+{
+	const struct ubi_volume *vol = desc->vol;
+	const struct ubi_device *ubi = vol->ubi;
+
+	vi->vol_id = vol->vol_id;
+	vi->ubi_num = ubi->ubi_num;
+	vi->size = vol->reserved_pebs;
+	vi->used_bytes = vol->used_bytes;
+	vi->vol_type = vol->vol_type;
+	vi->corrupted = vol->corrupted;
+	vi->upd_marker = vol->upd_marker;
+	vi->alignment = vol->alignment;
+	vi->usable_leb_size = vol->usable_leb_size;
+	vi->name_len = vol->name_len;
+	vi->name = vol->name;
+	vi->cdev = MKDEV(ubi->major, vi->vol_id + 1);
+}
+EXPORT_SYMBOL_GPL(ubi_get_volume_info);
+
+/**
+ * ubi_open_volume - open UBI volume.
+ * @ubi_num: UBI device number
+ * @vol_id: volume ID
+ * @mode: open mode
+ *
+ * The @mode parameter specifies if the volume should be opened in read-only
+ * mode, read-write mode, or exclusive mode. The exclusive mode guarantees that
+ * nobody else will be able to open this volume. UBI allows to have many volume
+ * readers and one writer at a time.
+ *
+ * If a static volume is being opened for the first time since boot, it will be
+ * checked by this function, which means it will be fully read and the CRC
+ * checksum of each logical eraseblock will be checked.
+ *
+ * This function returns volume descriptor in case of success and a negative
+ * error code in case of failure.
+ */
+struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode)
+{
+	int err;
+	struct ubi_volume_desc *desc;
+	struct ubi_device *ubi = ubi_devices[ubi_num];
+	struct ubi_volume *vol;
+
+	dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
+
+	err = -ENODEV;
+	if (!try_module_get(THIS_MODULE))
+		return ERR_PTR(err);
+
+	if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || !ubi)
+		goto out_put;
+
+	err = -EINVAL;
+	if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
+		goto out_put;
+	if (mode != UBI_READONLY && mode != UBI_READWRITE &&
+	    mode != UBI_EXCLUSIVE)
+		goto out_put;
+
+	desc = kmalloc(sizeof(struct ubi_volume_desc), GFP_KERNEL);
+	if (!desc) {
+		err = -ENOMEM;
+		goto out_put;
+	}
+
+	spin_lock(&ubi->volumes_lock);
+	vol = ubi->volumes[vol_id];
+	if (!vol) {
+		err = -ENODEV;
+		goto out_unlock;
+	}
+
+	err = -EBUSY;
+	switch (mode) {
+	case UBI_READONLY:
+		if (vol->exclusive)
+			goto out_unlock;
+		vol->readers += 1;
+		break;
+
+	case UBI_READWRITE:
+		if (vol->exclusive || vol->writers > 0)
+			goto out_unlock;
+		vol->writers += 1;
+		break;
+
+	case UBI_EXCLUSIVE:
+		if (vol->exclusive || vol->writers || vol->readers)
+			goto out_unlock;
+		vol->exclusive = 1;
+		break;
+	}
+	spin_unlock(&ubi->volumes_lock);
+
+	desc->vol = vol;
+	desc->mode = mode;
+
+	/*
+	 * To prevent simultaneous checks of the same volume we use @vtbl_mutex,
+	 * although it is not the purpose it was introduced for.
+	 */
+	mutex_lock(&ubi->vtbl_mutex);
+	if (!vol->checked) {
+		/* This is the first open - check the volume */
+		err = ubi_check_volume(ubi, vol_id);
+		if (err < 0) {
+			mutex_unlock(&ubi->vtbl_mutex);
+			ubi_close_volume(desc);
+			return ERR_PTR(err);
+		}
+		if (err == 1) {
+			ubi_warn("volume %d on UBI device %d is corrupted",
+				 vol_id, ubi->ubi_num);
+			vol->corrupted = 1;
+		}
+		vol->checked = 1;
+	}
+	mutex_unlock(&ubi->vtbl_mutex);
+	return desc;
+
+out_unlock:
+	spin_unlock(&ubi->volumes_lock);
+	kfree(desc);
+out_put:
+	module_put(THIS_MODULE);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(ubi_open_volume);
+
+/**
+ * ubi_open_volume_nm - open UBI volume by name.
+ * @ubi_num: UBI device number
+ * @name: volume name
+ * @mode: open mode
+ *
+ * This function is similar to 'ubi_open_volume()', but opens a volume by name.
+ */
+struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
+					   int mode)
+{
+	int i, vol_id = -1, len;
+	struct ubi_volume_desc *ret;
+	struct ubi_device *ubi;
+
+	dbg_msg("open volume %s, mode %d", name, mode);
+
+	if (!name)
+		return ERR_PTR(-EINVAL);
+
+	len = strnlen(name, UBI_VOL_NAME_MAX + 1);
+	if (len > UBI_VOL_NAME_MAX)
+		return ERR_PTR(-EINVAL);
+
+	ret = ERR_PTR(-ENODEV);
+	if (!try_module_get(THIS_MODULE))
+		return ret;
+
+	if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || !ubi_devices[ubi_num])
+		goto out_put;
+
+	ubi = ubi_devices[ubi_num];
+
+	spin_lock(&ubi->volumes_lock);
+	/* Walk all volumes of this UBI device */
+	for (i = 0; i < ubi->vtbl_slots; i++) {
+		struct ubi_volume *vol = ubi->volumes[i];
+
+		if (vol && len == vol->name_len && !strcmp(name, vol->name)) {
+			vol_id = i;
+			break;
+		}
+	}
+	spin_unlock(&ubi->volumes_lock);
+
+	if (vol_id < 0)
+		goto out_put;
+
+	ret = ubi_open_volume(ubi_num, vol_id, mode);
+
+out_put:
+	module_put(THIS_MODULE);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ubi_open_volume_nm);
+
+/**
+ * ubi_close_volume - close UBI volume.
+ * @desc: volume descriptor
+ */
+void ubi_close_volume(struct ubi_volume_desc *desc)
+{
+	struct ubi_volume *vol = desc->vol;
+
+	dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode);
+
+	spin_lock(&vol->ubi->volumes_lock);
+	switch (desc->mode) {
+	case UBI_READONLY:
+		vol->readers -= 1;
+		break;
+	case UBI_READWRITE:
+		vol->writers -= 1;
+		break;
+	case UBI_EXCLUSIVE:
+		vol->exclusive = 0;
+	}
+	spin_unlock(&vol->ubi->volumes_lock);
+
+	kfree(desc);
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL_GPL(ubi_close_volume);
+
+/**
+ * ubi_leb_read - read data.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number to read from
+ * @buf: buffer where to store the read data
+ * @offset: offset within the logical eraseblock to read from
+ * @len: how many bytes to read
+ * @check: whether UBI has to check the read data's CRC or not.
+ *
+ * This function reads data from offset @offset of logical eraseblock @lnum and
+ * stores the data at @buf. When reading from static volumes, @check specifies
+ * whether the data has to be checked or not. If yes, the whole logical
+ * eraseblock will be read and its CRC checksum will be checked (i.e., the CRC
+ * checksum is per-eraseblock). So checking may substantially slow down the
+ * read speed. The @check argument is ignored for dynamic volumes.
+ *
+ * In case of success, this function returns zero. In case of failure, this
+ * function returns a negative error code.
+ *
+ * %-EBADMSG error code is returned:
+ * o for both static and dynamic volumes if MTD driver has detected a data
+ *   integrity problem (unrecoverable ECC checksum mismatch in case of NAND);
+ * o for static volumes in case of data CRC mismatch.
+ *
+ * If the volume is damaged because of an interrupted update this function just
+ * returns immediately with %-EBADF error code.
+ */
+int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
+		 int len, int check)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int err, vol_id = vol->vol_id;
+
+	dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset);
+
+	if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 ||
+	    lnum >= vol->used_ebs || offset < 0 || len < 0 ||
+	    offset + len > vol->usable_leb_size)
+		return -EINVAL;
+
+	if (vol->vol_type == UBI_STATIC_VOLUME && lnum == vol->used_ebs - 1 &&
+	    offset + len > vol->last_eb_bytes)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+	if (len == 0)
+		return 0;
+
+	err = ubi_eba_read_leb(ubi, vol_id, lnum, buf, offset, len, check);
+	if (err && err == -EBADMSG && vol->vol_type == UBI_STATIC_VOLUME) {
+		ubi_warn("mark volume %d as corrupted", vol_id);
+		vol->corrupted = 1;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(ubi_leb_read);
+
+/**
+ * ubi_leb_write - write data.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number to write to
+ * @buf: data to write
+ * @offset: offset within the logical eraseblock where to write
+ * @len: how many bytes to write
+ * @dtype: expected data type
+ *
+ * This function writes @len bytes of data from @buf to offset @offset of
+ * logical eraseblock @lnum. The @dtype argument describes expected lifetime of
+ * the data.
+ *
+ * This function takes care of physical eraseblock write failures. If write to
+ * the physical eraseblock write operation fails, the logical eraseblock is
+ * re-mapped to another physical eraseblock, the data is recovered, and the
+ * write finishes. UBI has a pool of reserved physical eraseblocks for this.
+ *
+ * If all the data were successfully written, zero is returned. If an error
+ * occurred and UBI has not been able to recover from it, this function returns
+ * a negative error code. Note, in case of an error, it is possible that
+ * something was still written to the flash media, but that may be some
+ * garbage.
+ *
+ * If the volume is damaged because of an interrupted update this function just
+ * returns immediately with %-EBADF code.
+ */
+int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
+		  int offset, int len, int dtype)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int vol_id = vol->vol_id;
+
+	dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset);
+
+	if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
+		return -EINVAL;
+
+	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 ||
+	    offset + len > vol->usable_leb_size || offset % ubi->min_io_size ||
+	    len % ubi->min_io_size)
+		return -EINVAL;
+
+	if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
+	    dtype != UBI_UNKNOWN)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	if (len == 0)
+		return 0;
+
+	return ubi_eba_write_leb(ubi, vol_id, lnum, buf, offset, len, dtype);
+}
+EXPORT_SYMBOL_GPL(ubi_leb_write);
+
+/*
+ * ubi_leb_change - change logical eraseblock atomically.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number to change
+ * @buf: data to write
+ * @len: how many bytes to write
+ * @dtype: expected data type
+ *
+ * This function changes the contents of a logical eraseblock atomically. @buf
+ * has to contain new logical eraseblock data, and @len - the length of the
+ * data, which has to be aligned. The length may be shorter then the logical
+ * eraseblock size, ant the logical eraseblock may be appended to more times
+ * later on. This function guarantees that in case of an unclean reboot the old
+ * contents is preserved. Returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
+		   int len, int dtype)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int vol_id = vol->vol_id;
+
+	dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum);
+
+	if (vol_id < 0 || vol_id >= ubi->vtbl_slots)
+		return -EINVAL;
+
+	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 ||
+	    len > vol->usable_leb_size || len % ubi->min_io_size)
+		return -EINVAL;
+
+	if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM &&
+	    dtype != UBI_UNKNOWN)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	if (len == 0)
+		return 0;
+
+	return ubi_eba_atomic_leb_change(ubi, vol_id, lnum, buf, len, dtype);
+}
+EXPORT_SYMBOL_GPL(ubi_leb_change);
+
+/**
+ * ubi_leb_erase - erase logical eraseblock.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number
+ *
+ * This function un-maps logical eraseblock @lnum and synchronously erases the
+ * correspondent physical eraseblock. Returns zero in case of success and a
+ * negative error code in case of failure.
+ *
+ * If the volume is damaged because of an interrupted update this function just
+ * returns immediately with %-EBADF code.
+ */
+int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int err, vol_id = vol->vol_id;
+
+	dbg_msg("erase LEB %d:%d", vol_id, lnum);
+
+	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	err = ubi_eba_unmap_leb(ubi, vol_id, lnum);
+	if (err)
+		return err;
+
+	return ubi_wl_flush(ubi);
+}
+EXPORT_SYMBOL_GPL(ubi_leb_erase);
+
+/**
+ * ubi_leb_unmap - un-map logical eraseblock.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number
+ *
+ * This function un-maps logical eraseblock @lnum and schedules the
+ * corresponding physical eraseblock for erasure, so that it will eventually be
+ * physically erased in background. This operation is much faster then the
+ * erase operation.
+ *
+ * Unlike erase, the un-map operation does not guarantee that the logical
+ * eraseblock will contain all 0xFF bytes when UBI is initialized again. For
+ * example, if several logical eraseblocks are un-mapped, and an unclean reboot
+ * happens after this, the logical eraseblocks will not necessarily be
+ * un-mapped again when this MTD device is attached. They may actually be
+ * mapped to the same physical eraseblocks again. So, this function has to be
+ * used with care.
+ *
+ * In other words, when un-mapping a logical eraseblock, UBI does not store
+ * any information about this on the flash media, it just marks the logical
+ * eraseblock as "un-mapped" in RAM. If UBI is detached before the physical
+ * eraseblock is physically erased, it will be mapped again to the same logical
+ * eraseblock when the MTD device is attached again.
+ *
+ * The main and obvious use-case of this function is when the contents of a
+ * logical eraseblock has to be re-written. Then it is much more efficient to
+ * first un-map it, then write new data, rather then first erase it, then write
+ * new data. Note, once new data has been written to the logical eraseblock,
+ * UBI guarantees that the old contents has gone forever. In other words, if an
+ * unclean reboot happens after the logical eraseblock has been un-mapped and
+ * then written to, it will contain the last written data.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure. If the volume is damaged because of an interrupted update
+ * this function just returns immediately with %-EBADF code.
+ */
+int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int vol_id = vol->vol_id;
+
+	dbg_msg("unmap LEB %d:%d", vol_id, lnum);
+
+	if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME)
+		return -EROFS;
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	return ubi_eba_unmap_leb(ubi, vol_id, lnum);
+}
+EXPORT_SYMBOL_GPL(ubi_leb_unmap);
+
+/**
+ * ubi_is_mapped - check if logical eraseblock is mapped.
+ * @desc: volume descriptor
+ * @lnum: logical eraseblock number
+ *
+ * This function checks if logical eraseblock @lnum is mapped to a physical
+ * eraseblock. If a logical eraseblock is un-mapped, this does not necessarily
+ * mean it will still be un-mapped after the UBI device is re-attached. The
+ * logical eraseblock may become mapped to the physical eraseblock it was last
+ * mapped to.
+ *
+ * This function returns %1 if the LEB is mapped, %0 if not, and a negative
+ * error code in case of failure. If the volume is damaged because of an
+ * interrupted update this function just returns immediately with %-EBADF error
+ * code.
+ */
+int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum)
+{
+	struct ubi_volume *vol = desc->vol;
+
+	dbg_msg("test LEB %d:%d", vol->vol_id, lnum);
+
+	if (lnum < 0 || lnum >= vol->reserved_pebs)
+		return -EINVAL;
+
+	if (vol->upd_marker)
+		return -EBADF;
+
+	return vol->eba_tbl[lnum] >= 0;
+}
+EXPORT_SYMBOL_GPL(ubi_is_mapped);
diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c
new file mode 100644
index 00000000000..38d4e6757dc
--- /dev/null
+++ b/drivers/mtd/ubi/misc.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/* Here we keep miscellaneous functions which are used all over the UBI code */
+
+#include "ubi.h"
+
+/**
+ * calc_data_len - calculate how much real data is stored in a buffer.
+ * @ubi: UBI device description object
+ * @buf: a buffer with the contents of the physical eraseblock
+ * @length: the buffer length
+ *
+ * This function calculates how much "real data" is stored in @buf and returnes
+ * the length. Continuous 0xFF bytes at the end of the buffer are not
+ * considered as "real data".
+ */
+int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf,
+		      int length)
+{
+	int i;
+
+	ubi_assert(length % ubi->min_io_size == 0);
+
+	for (i = length - 1; i >= 0; i--)
+		if (((const uint8_t *)buf)[i] != 0xFF)
+			break;
+
+	/* The resulting length must be aligned to the minimum flash I/O size */
+	length = ALIGN(i + 1, ubi->min_io_size);
+	return length;
+}
+
+/**
+ * ubi_check_volume - check the contents of a static volume.
+ * @ubi: UBI device description object
+ * @vol_id: ID of the volume to check
+ *
+ * This function checks if static volume @vol_id is corrupted by fully reading
+ * it and checking data CRC. This function returns %0 if the volume is not
+ * corrupted, %1 if it is corrupted and a negative error code in case of
+ * failure. Dynamic volumes are not checked and zero is returned immediately.
+ */
+int ubi_check_volume(struct ubi_device *ubi, int vol_id)
+{
+	void *buf;
+	int err = 0, i;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	if (vol->vol_type != UBI_STATIC_VOLUME)
+		return 0;
+
+	buf = kmalloc(vol->usable_leb_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	for (i = 0; i < vol->used_ebs; i++) {
+		int size;
+
+		if (i == vol->used_ebs - 1)
+			size = vol->last_eb_bytes;
+		else
+			size = vol->usable_leb_size;
+
+		err = ubi_eba_read_leb(ubi, vol_id, i, buf, 0, size, 1);
+		if (err) {
+			if (err == -EBADMSG)
+				err = 1;
+			break;
+		}
+	}
+
+	kfree(buf);
+	return err;
+}
+
+/**
+ * ubi_calculate_rsvd_pool - calculate how many PEBs must be reserved for bad
+ * eraseblock handling.
+ * @ubi: UBI device description object
+ */
+void ubi_calculate_reserved(struct ubi_device *ubi)
+{
+	ubi->beb_rsvd_level = ubi->good_peb_count/100;
+	ubi->beb_rsvd_level *= CONFIG_MTD_UBI_BEB_RESERVE;
+	if (ubi->beb_rsvd_level < MIN_RESEVED_PEBS)
+		ubi->beb_rsvd_level = MIN_RESEVED_PEBS;
+}
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
new file mode 100644
index 00000000000..473f3200b86
--- /dev/null
+++ b/drivers/mtd/ubi/scan.c
@@ -0,0 +1,1368 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * UBI scanning unit.
+ *
+ * This unit is responsible for scanning the flash media, checking UBI
+ * headers and providing complete information about the UBI flash image.
+ *
+ * The scanning information is reoresented by a &struct ubi_scan_info' object.
+ * Information about found volumes is represented by &struct ubi_scan_volume
+ * objects which are kept in volume RB-tree with root at the @volumes field.
+ * The RB-tree is indexed by the volume ID.
+ *
+ * Found logical eraseblocks are represented by &struct ubi_scan_leb objects.
+ * These objects are kept in per-volume RB-trees with the root at the
+ * corresponding &struct ubi_scan_volume object. To put it differently, we keep
+ * an RB-tree of per-volume objects and each of these objects is the root of
+ * RB-tree of per-eraseblock objects.
+ *
+ * Corrupted physical eraseblocks are put to the @corr list, free physical
+ * eraseblocks are put to the @free list and the physical eraseblock to be
+ * erased are put to the @erase list.
+ */
+
+#include <linux/err.h>
+#include <linux/crc32.h>
+#include "ubi.h"
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+static int paranoid_check_si(const struct ubi_device *ubi,
+			     struct ubi_scan_info *si);
+#else
+#define paranoid_check_si(ubi, si) 0
+#endif
+
+/* Temporary variables used during scanning */
+static struct ubi_ec_hdr *ech;
+static struct ubi_vid_hdr *vidh;
+
+int ubi_scan_add_to_list(struct ubi_scan_info *si, int pnum, int ec,
+			 struct list_head *list)
+{
+	struct ubi_scan_leb *seb;
+
+	if (list == &si->free)
+		dbg_bld("add to free: PEB %d, EC %d", pnum, ec);
+	else if (list == &si->erase)
+		dbg_bld("add to erase: PEB %d, EC %d", pnum, ec);
+	else if (list == &si->corr)
+		dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec);
+	else if (list == &si->alien)
+		dbg_bld("add to alien: PEB %d, EC %d", pnum, ec);
+	else
+		BUG();
+
+	seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);
+	if (!seb)
+		return -ENOMEM;
+
+	seb->pnum = pnum;
+	seb->ec = ec;
+	list_add_tail(&seb->u.list, list);
+	return 0;
+}
+
+/**
+ * commit_to_mean_value - commit intermediate results to the final mean erase
+ * counter value.
+ * @si: scanning information
+ *
+ * This is a helper function which calculates partial mean erase counter mean
+ * value and adds it to the resulting mean value. As we can work only in
+ * integer arithmetic and we want to calculate the mean value of erase counter
+ * accurately, we first sum erase counter values in @si->ec_sum variable and
+ * count these components in @si->ec_count. If this temporary @si->ec_sum is
+ * going to overflow, we calculate the partial mean value
+ * (@si->ec_sum/@si->ec_count) and add it to @si->mean_ec.
+ */
+static void commit_to_mean_value(struct ubi_scan_info *si)
+{
+	si->ec_sum /= si->ec_count;
+	if (si->ec_sum % si->ec_count >= si->ec_count / 2)
+		si->mean_ec += 1;
+	si->mean_ec += si->ec_sum;
+}
+
+/**
+ * validate_vid_hdr - check that volume identifier header is correct and
+ * consistent.
+ * @vid_hdr: the volume identifier header to check
+ * @sv: information about the volume this logical eraseblock belongs to
+ * @pnum: physical eraseblock number the VID header came from
+ *
+ * This function checks that data stored in @vid_hdr is consistent. Returns
+ * non-zero if an inconsistency was found and zero if not.
+ *
+ * Note, UBI does sanity check of everything it reads from the flash media.
+ * Most of the checks are done in the I/O unit. Here we check that the
+ * information in the VID header is consistent to the information in other VID
+ * headers of the same volume.
+ */
+static int validate_vid_hdr(const struct ubi_vid_hdr *vid_hdr,
+			    const struct ubi_scan_volume *sv, int pnum)
+{
+	int vol_type = vid_hdr->vol_type;
+	int vol_id = ubi32_to_cpu(vid_hdr->vol_id);
+	int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
+	int data_pad = ubi32_to_cpu(vid_hdr->data_pad);
+
+	if (sv->leb_count != 0) {
+		int sv_vol_type;
+
+		/*
+		 * This is not the first logical eraseblock belonging to this
+		 * volume. Ensure that the data in its VID header is consistent
+		 * to the data in previous logical eraseblock headers.
+		 */
+
+		if (vol_id != sv->vol_id) {
+			dbg_err("inconsistent vol_id");
+			goto bad;
+		}
+
+		if (sv->vol_type == UBI_STATIC_VOLUME)
+			sv_vol_type = UBI_VID_STATIC;
+		else
+			sv_vol_type = UBI_VID_DYNAMIC;
+
+		if (vol_type != sv_vol_type) {
+			dbg_err("inconsistent vol_type");
+			goto bad;
+		}
+
+		if (used_ebs != sv->used_ebs) {
+			dbg_err("inconsistent used_ebs");
+			goto bad;
+		}
+
+		if (data_pad != sv->data_pad) {
+			dbg_err("inconsistent data_pad");
+			goto bad;
+		}
+	}
+
+	return 0;
+
+bad:
+	ubi_err("inconsistent VID header at PEB %d", pnum);
+	ubi_dbg_dump_vid_hdr(vid_hdr);
+	ubi_dbg_dump_sv(sv);
+	return -EINVAL;
+}
+
+/**
+ * add_volume - add volume to the scanning information.
+ * @si: scanning information
+ * @vol_id: ID of the volume to add
+ * @pnum: physical eraseblock number
+ * @vid_hdr: volume identifier header
+ *
+ * If the volume corresponding to the @vid_hdr logical eraseblock is already
+ * present in the scanning information, this function does nothing. Otherwise
+ * it adds corresponding volume to the scanning information. Returns a pointer
+ * to the scanning volume object in case of success and a negative error code
+ * in case of failure.
+ */
+static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id,
+					  int pnum,
+					  const struct ubi_vid_hdr *vid_hdr)
+{
+	struct ubi_scan_volume *sv;
+	struct rb_node **p = &si->volumes.rb_node, *parent = NULL;
+
+	ubi_assert(vol_id == ubi32_to_cpu(vid_hdr->vol_id));
+
+	/* Walk the volume RB-tree to look if this volume is already present */
+	while (*p) {
+		parent = *p;
+		sv = rb_entry(parent, struct ubi_scan_volume, rb);
+
+		if (vol_id == sv->vol_id)
+			return sv;
+
+		if (vol_id > sv->vol_id)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	/* The volume is absent - add it */
+	sv = kmalloc(sizeof(struct ubi_scan_volume), GFP_KERNEL);
+	if (!sv)
+		return ERR_PTR(-ENOMEM);
+
+	sv->highest_lnum = sv->leb_count = 0;
+	si->max_sqnum = 0;
+	sv->vol_id = vol_id;
+	sv->root = RB_ROOT;
+	sv->used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
+	sv->data_pad = ubi32_to_cpu(vid_hdr->data_pad);
+	sv->compat = vid_hdr->compat;
+	sv->vol_type = vid_hdr->vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME
+							    : UBI_STATIC_VOLUME;
+	if (vol_id > si->highest_vol_id)
+		si->highest_vol_id = vol_id;
+
+	rb_link_node(&sv->rb, parent, p);
+	rb_insert_color(&sv->rb, &si->volumes);
+	si->vols_found += 1;
+	dbg_bld("added volume %d", vol_id);
+	return sv;
+}
+
+/**
+ * compare_lebs - find out which logical eraseblock is newer.
+ * @ubi: UBI device description object
+ * @seb: first logical eraseblock to compare
+ * @pnum: physical eraseblock number of the second logical eraseblock to
+ * compare
+ * @vid_hdr: volume identifier header of the second logical eraseblock
+ *
+ * This function compares 2 copies of a LEB and informs which one is newer. In
+ * case of success this function returns a positive value, in case of failure, a
+ * negative error code is returned. The success return codes use the following
+ * bits:
+ *     o bit 0 is cleared: the first PEB (described by @seb) is newer then the
+ *       second PEB (described by @pnum and @vid_hdr);
+ *     o bit 0 is set: the second PEB is newer;
+ *     o bit 1 is cleared: no bit-flips were detected in the newer LEB;
+ *     o bit 1 is set: bit-flips were detected in the newer LEB;
+ *     o bit 2 is cleared: the older LEB is not corrupted;
+ *     o bit 2 is set: the older LEB is corrupted.
+ */
+static int compare_lebs(const struct ubi_device *ubi,
+			const struct ubi_scan_leb *seb, int pnum,
+			const struct ubi_vid_hdr *vid_hdr)
+{
+	void *buf;
+	int len, err, second_is_newer, bitflips = 0, corrupted = 0;
+	uint32_t data_crc, crc;
+	struct ubi_vid_hdr *vidh = NULL;
+	unsigned long long sqnum2 = ubi64_to_cpu(vid_hdr->sqnum);
+
+	if (seb->sqnum == 0 && sqnum2 == 0) {
+		long long abs, v1 = seb->leb_ver, v2 = ubi32_to_cpu(vid_hdr->leb_ver);
+
+		/*
+		 * UBI constantly increases the logical eraseblock version
+		 * number and it can overflow. Thus, we have to bear in mind
+		 * that versions that are close to %0xFFFFFFFF are less then
+		 * versions that are close to %0.
+		 *
+		 * The UBI WL unit guarantees that the number of pending tasks
+		 * is not greater then %0x7FFFFFFF. So, if the difference
+		 * between any two versions is greater or equivalent to
+		 * %0x7FFFFFFF, there was an overflow and the logical
+		 * eraseblock with lower version is actually newer then the one
+		 * with higher version.
+		 *
+		 * FIXME: but this is anyway obsolete and will be removed at
+		 * some point.
+		 */
+
+		dbg_bld("using old crappy leb_ver stuff");
+
+		abs = v1 - v2;
+		if (abs < 0)
+			abs = -abs;
+
+		if (abs < 0x7FFFFFFF)
+			/* Non-overflow situation */
+			second_is_newer = (v2 > v1);
+		else
+			second_is_newer = (v2 < v1);
+	} else
+		/* Obviously the LEB with lower sequence counter is older */
+		second_is_newer = sqnum2 > seb->sqnum;
+
+	/*
+	 * Now we know which copy is newer. If the copy flag of the PEB with
+	 * newer version is not set, then we just return, otherwise we have to
+	 * check data CRC. For the second PEB we already have the VID header,
+	 * for the first one - we'll need to re-read it from flash.
+	 *
+	 * FIXME: this may be optimized so that we wouldn't read twice.
+	 */
+
+	if (second_is_newer) {
+		if (!vid_hdr->copy_flag) {
+			/* It is not a copy, so it is newer */
+			dbg_bld("second PEB %d is newer, copy_flag is unset",
+				pnum);
+			return 1;
+		}
+	} else {
+		pnum = seb->pnum;
+
+		vidh = ubi_zalloc_vid_hdr(ubi);
+		if (!vidh)
+			return -ENOMEM;
+
+		err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0);
+		if (err) {
+			if (err == UBI_IO_BITFLIPS)
+				bitflips = 1;
+			else {
+				dbg_err("VID of PEB %d header is bad, but it "
+					"was OK earlier", pnum);
+				if (err > 0)
+					err = -EIO;
+
+				goto out_free_vidh;
+			}
+		}
+
+		if (!vidh->copy_flag) {
+			/* It is not a copy, so it is newer */
+			dbg_bld("first PEB %d is newer, copy_flag is unset",
+				pnum);
+			err = bitflips << 1;
+			goto out_free_vidh;
+		}
+
+		vid_hdr = vidh;
+	}
+
+	/* Read the data of the copy and check the CRC */
+
+	len = ubi32_to_cpu(vid_hdr->data_size);
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf) {
+		err = -ENOMEM;
+		goto out_free_vidh;
+	}
+
+	err = ubi_io_read_data(ubi, buf, pnum, 0, len);
+	if (err && err != UBI_IO_BITFLIPS)
+		goto out_free_buf;
+
+	data_crc = ubi32_to_cpu(vid_hdr->data_crc);
+	crc = crc32(UBI_CRC32_INIT, buf, len);
+	if (crc != data_crc) {
+		dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x",
+			pnum, crc, data_crc);
+		corrupted = 1;
+		bitflips = 0;
+		second_is_newer = !second_is_newer;
+	} else {
+		dbg_bld("PEB %d CRC is OK", pnum);
+		bitflips = !!err;
+	}
+
+	kfree(buf);
+	ubi_free_vid_hdr(ubi, vidh);
+
+	if (second_is_newer)
+		dbg_bld("second PEB %d is newer, copy_flag is set", pnum);
+	else
+		dbg_bld("first PEB %d is newer, copy_flag is set", pnum);
+
+	return second_is_newer | (bitflips << 1) | (corrupted << 2);
+
+out_free_buf:
+	kfree(buf);
+out_free_vidh:
+	ubi_free_vid_hdr(ubi, vidh);
+	ubi_assert(err < 0);
+	return err;
+}
+
+/**
+ * ubi_scan_add_used - add information about a physical eraseblock to the
+ * scanning information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @pnum: the physical eraseblock number
+ * @ec: erase counter
+ * @vid_hdr: the volume identifier header
+ * @bitflips: if bit-flips were detected when this physical eraseblock was read
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
+		      int pnum, int ec, const struct ubi_vid_hdr *vid_hdr,
+		      int bitflips)
+{
+	int err, vol_id, lnum;
+	uint32_t leb_ver;
+	unsigned long long sqnum;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *seb;
+	struct rb_node **p, *parent = NULL;
+
+	vol_id = ubi32_to_cpu(vid_hdr->vol_id);
+	lnum = ubi32_to_cpu(vid_hdr->lnum);
+	sqnum = ubi64_to_cpu(vid_hdr->sqnum);
+	leb_ver = ubi32_to_cpu(vid_hdr->leb_ver);
+
+	dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d",
+		pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips);
+
+	sv = add_volume(si, vol_id, pnum, vid_hdr);
+	if (IS_ERR(sv) < 0)
+		return PTR_ERR(sv);
+
+	/*
+	 * Walk the RB-tree of logical eraseblocks of volume @vol_id to look
+	 * if this is the first instance of this logical eraseblock or not.
+	 */
+	p = &sv->root.rb_node;
+	while (*p) {
+		int cmp_res;
+
+		parent = *p;
+		seb = rb_entry(parent, struct ubi_scan_leb, u.rb);
+		if (lnum != seb->lnum) {
+			if (lnum < seb->lnum)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+			continue;
+		}
+
+		/*
+		 * There is already a physical eraseblock describing the same
+		 * logical eraseblock present.
+		 */
+
+		dbg_bld("this LEB already exists: PEB %d, sqnum %llu, "
+			"LEB ver %u, EC %d", seb->pnum, seb->sqnum,
+			seb->leb_ver, seb->ec);
+
+		/*
+		 * Make sure that the logical eraseblocks have different
+		 * versions. Otherwise the image is bad.
+		 */
+		if (seb->leb_ver == leb_ver && leb_ver != 0) {
+			ubi_err("two LEBs with same version %u", leb_ver);
+			ubi_dbg_dump_seb(seb, 0);
+			ubi_dbg_dump_vid_hdr(vid_hdr);
+			return -EINVAL;
+		}
+
+		/*
+		 * Make sure that the logical eraseblocks have different
+		 * sequence numbers. Otherwise the image is bad.
+		 *
+		 * FIXME: remove 'sqnum != 0' check when leb_ver is removed.
+		 */
+		if (seb->sqnum == sqnum && sqnum != 0) {
+			ubi_err("two LEBs with same sequence number %llu",
+				sqnum);
+			ubi_dbg_dump_seb(seb, 0);
+			ubi_dbg_dump_vid_hdr(vid_hdr);
+			return -EINVAL;
+		}
+
+		/*
+		 * Now we have to drop the older one and preserve the newer
+		 * one.
+		 */
+		cmp_res = compare_lebs(ubi, seb, pnum, vid_hdr);
+		if (cmp_res < 0)
+			return cmp_res;
+
+		if (cmp_res & 1) {
+			/*
+			 * This logical eraseblock is newer then the one
+			 * found earlier.
+			 */
+			err = validate_vid_hdr(vid_hdr, sv, pnum);
+			if (err)
+				return err;
+
+			if (cmp_res & 4)
+				err = ubi_scan_add_to_list(si, seb->pnum,
+							   seb->ec, &si->corr);
+			else
+				err = ubi_scan_add_to_list(si, seb->pnum,
+							   seb->ec, &si->erase);
+			if (err)
+				return err;
+
+			seb->ec = ec;
+			seb->pnum = pnum;
+			seb->scrub = ((cmp_res & 2) || bitflips);
+			seb->sqnum = sqnum;
+			seb->leb_ver = leb_ver;
+
+			if (sv->highest_lnum == lnum)
+				sv->last_data_size =
+					ubi32_to_cpu(vid_hdr->data_size);
+
+			return 0;
+		} else {
+			/*
+			 * This logical eraseblock is older then the one found
+			 * previously.
+			 */
+			if (cmp_res & 4)
+				return ubi_scan_add_to_list(si, pnum, ec,
+							    &si->corr);
+			else
+				return ubi_scan_add_to_list(si, pnum, ec,
+							    &si->erase);
+		}
+	}
+
+	/*
+	 * We've met this logical eraseblock for the first time, add it to the
+	 * scanning information.
+	 */
+
+	err = validate_vid_hdr(vid_hdr, sv, pnum);
+	if (err)
+		return err;
+
+	seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);
+	if (!seb)
+		return -ENOMEM;
+
+	seb->ec = ec;
+	seb->pnum = pnum;
+	seb->lnum = lnum;
+	seb->sqnum = sqnum;
+	seb->scrub = bitflips;
+	seb->leb_ver = leb_ver;
+
+	if (sv->highest_lnum <= lnum) {
+		sv->highest_lnum = lnum;
+		sv->last_data_size = ubi32_to_cpu(vid_hdr->data_size);
+	}
+
+	if (si->max_sqnum < sqnum)
+		si->max_sqnum = sqnum;
+
+	sv->leb_count += 1;
+	rb_link_node(&seb->u.rb, parent, p);
+	rb_insert_color(&seb->u.rb, &sv->root);
+	return 0;
+}
+
+/**
+ * ubi_scan_find_sv - find information about a particular volume in the
+ * scanning information.
+ * @si: scanning information
+ * @vol_id: the requested volume ID
+ *
+ * This function returns a pointer to the volume description or %NULL if there
+ * are no data about this volume in the scanning information.
+ */
+struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si,
+					 int vol_id)
+{
+	struct ubi_scan_volume *sv;
+	struct rb_node *p = si->volumes.rb_node;
+
+	while (p) {
+		sv = rb_entry(p, struct ubi_scan_volume, rb);
+
+		if (vol_id == sv->vol_id)
+			return sv;
+
+		if (vol_id > sv->vol_id)
+			p = p->rb_left;
+		else
+			p = p->rb_right;
+	}
+
+	return NULL;
+}
+
+/**
+ * ubi_scan_find_seb - find information about a particular logical
+ * eraseblock in the volume scanning information.
+ * @sv: a pointer to the volume scanning information
+ * @lnum: the requested logical eraseblock
+ *
+ * This function returns a pointer to the scanning logical eraseblock or %NULL
+ * if there are no data about it in the scanning volume information.
+ */
+struct ubi_scan_leb *ubi_scan_find_seb(const struct ubi_scan_volume *sv,
+				       int lnum)
+{
+	struct ubi_scan_leb *seb;
+	struct rb_node *p = sv->root.rb_node;
+
+	while (p) {
+		seb = rb_entry(p, struct ubi_scan_leb, u.rb);
+
+		if (lnum == seb->lnum)
+			return seb;
+
+		if (lnum > seb->lnum)
+			p = p->rb_left;
+		else
+			p = p->rb_right;
+	}
+
+	return NULL;
+}
+
+/**
+ * ubi_scan_rm_volume - delete scanning information about a volume.
+ * @si: scanning information
+ * @sv: the volume scanning information to delete
+ */
+void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv)
+{
+	struct rb_node *rb;
+	struct ubi_scan_leb *seb;
+
+	dbg_bld("remove scanning information about volume %d", sv->vol_id);
+
+	while ((rb = rb_first(&sv->root))) {
+		seb = rb_entry(rb, struct ubi_scan_leb, u.rb);
+		rb_erase(&seb->u.rb, &sv->root);
+		list_add_tail(&seb->u.list, &si->erase);
+	}
+
+	rb_erase(&sv->rb, &si->volumes);
+	kfree(sv);
+	si->vols_found -= 1;
+}
+
+/**
+ * ubi_scan_erase_peb - erase a physical eraseblock.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @pnum: physical eraseblock number to erase;
+ * @ec: erase counter value to write (%UBI_SCAN_UNKNOWN_EC if it is unknown)
+ *
+ * This function erases physical eraseblock 'pnum', and writes the erase
+ * counter header to it. This function should only be used on UBI device
+ * initialization stages, when the EBA unit had not been yet initialized. This
+ * function returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+int ubi_scan_erase_peb(const struct ubi_device *ubi,
+		       const struct ubi_scan_info *si, int pnum, int ec)
+{
+	int err;
+	struct ubi_ec_hdr *ec_hdr;
+
+	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+	if (!ec_hdr)
+		return -ENOMEM;
+
+	if ((long long)ec >= UBI_MAX_ERASECOUNTER) {
+		/*
+		 * Erase counter overflow. Upgrade UBI and use 64-bit
+		 * erase counters internally.
+		 */
+		ubi_err("erase counter overflow at PEB %d, EC %d", pnum, ec);
+		return -EINVAL;
+	}
+
+	ec_hdr->ec = cpu_to_ubi64(ec);
+
+	err = ubi_io_sync_erase(ubi, pnum, 0);
+	if (err < 0)
+		goto out_free;
+
+	err = ubi_io_write_ec_hdr(ubi, pnum, ec_hdr);
+
+out_free:
+	kfree(ec_hdr);
+	return err;
+}
+
+/**
+ * ubi_scan_get_free_peb - get a free physical eraseblock.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function returns a free physical eraseblock. It is supposed to be
+ * called on the UBI initialization stages when the wear-leveling unit is not
+ * initialized yet. This function picks a physical eraseblocks from one of the
+ * lists, writes the EC header if it is needed, and removes it from the list.
+ *
+ * This function returns scanning physical eraseblock information in case of
+ * success and an error code in case of failure.
+ */
+struct ubi_scan_leb *ubi_scan_get_free_peb(const struct ubi_device *ubi,
+					   struct ubi_scan_info *si)
+{
+	int err = 0, i;
+	struct ubi_scan_leb *seb;
+
+	if (!list_empty(&si->free)) {
+		seb = list_entry(si->free.next, struct ubi_scan_leb, u.list);
+		list_del(&seb->u.list);
+		dbg_bld("return free PEB %d, EC %d", seb->pnum, seb->ec);
+		return seb;
+	}
+
+	for (i = 0; i < 2; i++) {
+		struct list_head *head;
+		struct ubi_scan_leb *tmp_seb;
+
+		if (i == 0)
+			head = &si->erase;
+		else
+			head = &si->corr;
+
+		/*
+		 * We try to erase the first physical eraseblock from the @head
+		 * list and pick it if we succeed, or try to erase the
+		 * next one if not. And so forth. We don't want to take care
+		 * about bad eraseblocks here - they'll be handled later.
+		 */
+		list_for_each_entry_safe(seb, tmp_seb, head, u.list) {
+			if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+				seb->ec = si->mean_ec;
+
+			err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
+			if (err)
+				continue;
+
+			seb->ec += 1;
+			list_del(&seb->u.list);
+			dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
+			return seb;
+		}
+	}
+
+	ubi_err("no eraseblocks found");
+	return ERR_PTR(-ENOSPC);
+}
+
+/**
+ * process_eb - read UBI headers, check them and add corresponding data
+ * to the scanning information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @pnum: the physical eraseblock number
+ *
+ * This function returns a zero if the physical eraseblock was succesfully
+ * handled and a negative error code in case of failure.
+ */
+static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum)
+{
+	long long ec;
+	int err, bitflips = 0, vol_id, ec_corr = 0;
+
+	dbg_bld("scan PEB %d", pnum);
+
+	/* Skip bad physical eraseblocks */
+	err = ubi_io_is_bad(ubi, pnum);
+	if (err < 0)
+		return err;
+	else if (err) {
+		/*
+		 * FIXME: this is actually duty of the I/O unit to initialize
+		 * this, but MTD does not provide enough information.
+		 */
+		si->bad_peb_count += 1;
+		return 0;
+	}
+
+	err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0);
+	if (err < 0)
+		return err;
+	else if (err == UBI_IO_BITFLIPS)
+		bitflips = 1;
+	else if (err == UBI_IO_PEB_EMPTY)
+		return ubi_scan_add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC,
+					    &si->erase);
+	else if (err == UBI_IO_BAD_EC_HDR) {
+		/*
+		 * We have to also look at the VID header, possibly it is not
+		 * corrupted. Set %bitflips flag in order to make this PEB be
+		 * moved and EC be re-created.
+		 */
+		ec_corr = 1;
+		ec = UBI_SCAN_UNKNOWN_EC;
+		bitflips = 1;
+	}
+
+	si->is_empty = 0;
+
+	if (!ec_corr) {
+		/* Make sure UBI version is OK */
+		if (ech->version != UBI_VERSION) {
+			ubi_err("this UBI version is %d, image version is %d",
+				UBI_VERSION, (int)ech->version);
+			return -EINVAL;
+		}
+
+		ec = ubi64_to_cpu(ech->ec);
+		if (ec > UBI_MAX_ERASECOUNTER) {
+			/*
+			 * Erase counter overflow. The EC headers have 64 bits
+			 * reserved, but we anyway make use of only 31 bit
+			 * values, as this seems to be enough for any existing
+			 * flash. Upgrade UBI and use 64-bit erase counters
+			 * internally.
+			 */
+			ubi_err("erase counter overflow, max is %d",
+				UBI_MAX_ERASECOUNTER);
+			ubi_dbg_dump_ec_hdr(ech);
+			return -EINVAL;
+		}
+	}
+
+	/* OK, we've done with the EC header, let's look at the VID header */
+
+	err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0);
+	if (err < 0)
+		return err;
+	else if (err == UBI_IO_BITFLIPS)
+		bitflips = 1;
+	else if (err == UBI_IO_BAD_VID_HDR ||
+		 (err == UBI_IO_PEB_FREE && ec_corr)) {
+		/* VID header is corrupted */
+		err = ubi_scan_add_to_list(si, pnum, ec, &si->corr);
+		if (err)
+			return err;
+		goto adjust_mean_ec;
+	} else if (err == UBI_IO_PEB_FREE) {
+		/* No VID header - the physical eraseblock is free */
+		err = ubi_scan_add_to_list(si, pnum, ec, &si->free);
+		if (err)
+			return err;
+		goto adjust_mean_ec;
+	}
+
+	vol_id = ubi32_to_cpu(vidh->vol_id);
+	if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOL_ID) {
+		int lnum = ubi32_to_cpu(vidh->lnum);
+
+		/* Unsupported internal volume */
+		switch (vidh->compat) {
+		case UBI_COMPAT_DELETE:
+			ubi_msg("\"delete\" compatible internal volume %d:%d"
+				" found, remove it", vol_id, lnum);
+			err = ubi_scan_add_to_list(si, pnum, ec, &si->corr);
+			if (err)
+				return err;
+			break;
+
+		case UBI_COMPAT_RO:
+			ubi_msg("read-only compatible internal volume %d:%d"
+				" found, switch to read-only mode",
+				vol_id, lnum);
+			ubi->ro_mode = 1;
+			break;
+
+		case UBI_COMPAT_PRESERVE:
+			ubi_msg("\"preserve\" compatible internal volume %d:%d"
+				" found", vol_id, lnum);
+			err = ubi_scan_add_to_list(si, pnum, ec, &si->alien);
+			if (err)
+				return err;
+			si->alien_peb_count += 1;
+			return 0;
+
+		case UBI_COMPAT_REJECT:
+			ubi_err("incompatible internal volume %d:%d found",
+				vol_id, lnum);
+			return -EINVAL;
+		}
+	}
+
+	/* Both UBI headers seem to be fine */
+	err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips);
+	if (err)
+		return err;
+
+adjust_mean_ec:
+	if (!ec_corr) {
+		if (si->ec_sum + ec < ec) {
+			commit_to_mean_value(si);
+			si->ec_sum = 0;
+			si->ec_count = 0;
+		} else {
+			si->ec_sum += ec;
+			si->ec_count += 1;
+		}
+
+		if (ec > si->max_ec)
+			si->max_ec = ec;
+		if (ec < si->min_ec)
+			si->min_ec = ec;
+	}
+
+	return 0;
+}
+
+/**
+ * ubi_scan - scan an MTD device.
+ * @ubi: UBI device description object
+ *
+ * This function does full scanning of an MTD device and returns complete
+ * information about it. In case of failure, an error code is returned.
+ */
+struct ubi_scan_info *ubi_scan(struct ubi_device *ubi)
+{
+	int err, pnum;
+	struct rb_node *rb1, *rb2;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *seb;
+	struct ubi_scan_info *si;
+
+	si = kzalloc(sizeof(struct ubi_scan_info), GFP_KERNEL);
+	if (!si)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&si->corr);
+	INIT_LIST_HEAD(&si->free);
+	INIT_LIST_HEAD(&si->erase);
+	INIT_LIST_HEAD(&si->alien);
+	si->volumes = RB_ROOT;
+	si->is_empty = 1;
+
+	err = -ENOMEM;
+	ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+	if (!ech)
+		goto out_si;
+
+	vidh = ubi_zalloc_vid_hdr(ubi);
+	if (!vidh)
+		goto out_ech;
+
+	for (pnum = 0; pnum < ubi->peb_count; pnum++) {
+		cond_resched();
+
+		dbg_msg("process PEB %d", pnum);
+		err = process_eb(ubi, si, pnum);
+		if (err < 0)
+			goto out_vidh;
+	}
+
+	dbg_msg("scanning is finished");
+
+	/* Finish mean erase counter calculations */
+	if (si->ec_count)
+		commit_to_mean_value(si);
+
+	if (si->is_empty)
+		ubi_msg("empty MTD device detected");
+
+	/*
+	 * In case of unknown erase counter we use the mean erase counter
+	 * value.
+	 */
+	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
+		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb)
+			if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+				seb->ec = si->mean_ec;
+	}
+
+	list_for_each_entry(seb, &si->free, u.list) {
+		if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+			seb->ec = si->mean_ec;
+	}
+
+	list_for_each_entry(seb, &si->corr, u.list)
+		if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+			seb->ec = si->mean_ec;
+
+	list_for_each_entry(seb, &si->erase, u.list)
+		if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+			seb->ec = si->mean_ec;
+
+	err = paranoid_check_si(ubi, si);
+	if (err) {
+		if (err > 0)
+			err = -EINVAL;
+		goto out_vidh;
+	}
+
+	ubi_free_vid_hdr(ubi, vidh);
+	kfree(ech);
+
+	return si;
+
+out_vidh:
+	ubi_free_vid_hdr(ubi, vidh);
+out_ech:
+	kfree(ech);
+out_si:
+	ubi_scan_destroy_si(si);
+	return ERR_PTR(err);
+}
+
+/**
+ * destroy_sv - free the scanning volume information
+ * @sv: scanning volume information
+ *
+ * This function destroys the volume RB-tree (@sv->root) and the scanning
+ * volume information.
+ */
+static void destroy_sv(struct ubi_scan_volume *sv)
+{
+	struct ubi_scan_leb *seb;
+	struct rb_node *this = sv->root.rb_node;
+
+	while (this) {
+		if (this->rb_left)
+			this = this->rb_left;
+		else if (this->rb_right)
+			this = this->rb_right;
+		else {
+			seb = rb_entry(this, struct ubi_scan_leb, u.rb);
+			this = rb_parent(this);
+			if (this) {
+				if (this->rb_left == &seb->u.rb)
+					this->rb_left = NULL;
+				else
+					this->rb_right = NULL;
+			}
+
+			kfree(seb);
+		}
+	}
+	kfree(sv);
+}
+
+/**
+ * ubi_scan_destroy_si - destroy scanning information.
+ * @si: scanning information
+ */
+void ubi_scan_destroy_si(struct ubi_scan_info *si)
+{
+	struct ubi_scan_leb *seb, *seb_tmp;
+	struct ubi_scan_volume *sv;
+	struct rb_node *rb;
+
+	list_for_each_entry_safe(seb, seb_tmp, &si->alien, u.list) {
+		list_del(&seb->u.list);
+		kfree(seb);
+	}
+	list_for_each_entry_safe(seb, seb_tmp, &si->erase, u.list) {
+		list_del(&seb->u.list);
+		kfree(seb);
+	}
+	list_for_each_entry_safe(seb, seb_tmp, &si->corr, u.list) {
+		list_del(&seb->u.list);
+		kfree(seb);
+	}
+	list_for_each_entry_safe(seb, seb_tmp, &si->free, u.list) {
+		list_del(&seb->u.list);
+		kfree(seb);
+	}
+
+	/* Destroy the volume RB-tree */
+	rb = si->volumes.rb_node;
+	while (rb) {
+		if (rb->rb_left)
+			rb = rb->rb_left;
+		else if (rb->rb_right)
+			rb = rb->rb_right;
+		else {
+			sv = rb_entry(rb, struct ubi_scan_volume, rb);
+
+			rb = rb_parent(rb);
+			if (rb) {
+				if (rb->rb_left == &sv->rb)
+					rb->rb_left = NULL;
+				else
+					rb->rb_right = NULL;
+			}
+
+			destroy_sv(sv);
+		}
+	}
+
+	kfree(si);
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+
+/**
+ * paranoid_check_si - check if the scanning information is correct and
+ * consistent.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function returns zero if the scanning information is all right, %1 if
+ * not and a negative error code if an error occurred.
+ */
+static int paranoid_check_si(const struct ubi_device *ubi,
+			     struct ubi_scan_info *si)
+{
+	int pnum, err, vols_found = 0;
+	struct rb_node *rb1, *rb2;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *seb, *last_seb;
+	uint8_t *buf;
+
+	/*
+	 * At first, check that scanning information is ok.
+	 */
+	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
+		int leb_count = 0;
+
+		cond_resched();
+
+		vols_found += 1;
+
+		if (si->is_empty) {
+			ubi_err("bad is_empty flag");
+			goto bad_sv;
+		}
+
+		if (sv->vol_id < 0 || sv->highest_lnum < 0 ||
+		    sv->leb_count < 0 || sv->vol_type < 0 || sv->used_ebs < 0 ||
+		    sv->data_pad < 0 || sv->last_data_size < 0) {
+			ubi_err("negative values");
+			goto bad_sv;
+		}
+
+		if (sv->vol_id >= UBI_MAX_VOLUMES &&
+		    sv->vol_id < UBI_INTERNAL_VOL_START) {
+			ubi_err("bad vol_id");
+			goto bad_sv;
+		}
+
+		if (sv->vol_id > si->highest_vol_id) {
+			ubi_err("highest_vol_id is %d, but vol_id %d is there",
+				si->highest_vol_id, sv->vol_id);
+			goto out;
+		}
+
+		if (sv->vol_type != UBI_DYNAMIC_VOLUME &&
+		    sv->vol_type != UBI_STATIC_VOLUME) {
+			ubi_err("bad vol_type");
+			goto bad_sv;
+		}
+
+		if (sv->data_pad > ubi->leb_size / 2) {
+			ubi_err("bad data_pad");
+			goto bad_sv;
+		}
+
+		last_seb = NULL;
+		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
+			cond_resched();
+
+			last_seb = seb;
+			leb_count += 1;
+
+			if (seb->pnum < 0 || seb->ec < 0) {
+				ubi_err("negative values");
+				goto bad_seb;
+			}
+
+			if (seb->ec < si->min_ec) {
+				ubi_err("bad si->min_ec (%d), %d found",
+					si->min_ec, seb->ec);
+				goto bad_seb;
+			}
+
+			if (seb->ec > si->max_ec) {
+				ubi_err("bad si->max_ec (%d), %d found",
+					si->max_ec, seb->ec);
+				goto bad_seb;
+			}
+
+			if (seb->pnum >= ubi->peb_count) {
+				ubi_err("too high PEB number %d, total PEBs %d",
+					seb->pnum, ubi->peb_count);
+				goto bad_seb;
+			}
+
+			if (sv->vol_type == UBI_STATIC_VOLUME) {
+				if (seb->lnum >= sv->used_ebs) {
+					ubi_err("bad lnum or used_ebs");
+					goto bad_seb;
+				}
+			} else {
+				if (sv->used_ebs != 0) {
+					ubi_err("non-zero used_ebs");
+					goto bad_seb;
+				}
+			}
+
+			if (seb->lnum > sv->highest_lnum) {
+				ubi_err("incorrect highest_lnum or lnum");
+				goto bad_seb;
+			}
+		}
+
+		if (sv->leb_count != leb_count) {
+			ubi_err("bad leb_count, %d objects in the tree",
+				leb_count);
+			goto bad_sv;
+		}
+
+		if (!last_seb)
+			continue;
+
+		seb = last_seb;
+
+		if (seb->lnum != sv->highest_lnum) {
+			ubi_err("bad highest_lnum");
+			goto bad_seb;
+		}
+	}
+
+	if (vols_found != si->vols_found) {
+		ubi_err("bad si->vols_found %d, should be %d",
+			si->vols_found, vols_found);
+		goto out;
+	}
+
+	/* Check that scanning information is correct */
+	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
+		last_seb = NULL;
+		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
+			int vol_type;
+
+			cond_resched();
+
+			last_seb = seb;
+
+			err = ubi_io_read_vid_hdr(ubi, seb->pnum, vidh, 1);
+			if (err && err != UBI_IO_BITFLIPS) {
+				ubi_err("VID header is not OK (%d)", err);
+				if (err > 0)
+					err = -EIO;
+				return err;
+			}
+
+			vol_type = vidh->vol_type == UBI_VID_DYNAMIC ?
+				   UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME;
+			if (sv->vol_type != vol_type) {
+				ubi_err("bad vol_type");
+				goto bad_vid_hdr;
+			}
+
+			if (seb->sqnum != ubi64_to_cpu(vidh->sqnum)) {
+				ubi_err("bad sqnum %llu", seb->sqnum);
+				goto bad_vid_hdr;
+			}
+
+			if (sv->vol_id != ubi32_to_cpu(vidh->vol_id)) {
+				ubi_err("bad vol_id %d", sv->vol_id);
+				goto bad_vid_hdr;
+			}
+
+			if (sv->compat != vidh->compat) {
+				ubi_err("bad compat %d", vidh->compat);
+				goto bad_vid_hdr;
+			}
+
+			if (seb->lnum != ubi32_to_cpu(vidh->lnum)) {
+				ubi_err("bad lnum %d", seb->lnum);
+				goto bad_vid_hdr;
+			}
+
+			if (sv->used_ebs != ubi32_to_cpu(vidh->used_ebs)) {
+				ubi_err("bad used_ebs %d", sv->used_ebs);
+				goto bad_vid_hdr;
+			}
+
+			if (sv->data_pad != ubi32_to_cpu(vidh->data_pad)) {
+				ubi_err("bad data_pad %d", sv->data_pad);
+				goto bad_vid_hdr;
+			}
+
+			if (seb->leb_ver != ubi32_to_cpu(vidh->leb_ver)) {
+				ubi_err("bad leb_ver %u", seb->leb_ver);
+				goto bad_vid_hdr;
+			}
+		}
+
+		if (!last_seb)
+			continue;
+
+		if (sv->highest_lnum != ubi32_to_cpu(vidh->lnum)) {
+			ubi_err("bad highest_lnum %d", sv->highest_lnum);
+			goto bad_vid_hdr;
+		}
+
+		if (sv->last_data_size != ubi32_to_cpu(vidh->data_size)) {
+			ubi_err("bad last_data_size %d", sv->last_data_size);
+			goto bad_vid_hdr;
+		}
+	}
+
+	/*
+	 * Make sure that all the physical eraseblocks are in one of the lists
+	 * or trees.
+	 */
+	buf = kmalloc(ubi->peb_count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	memset(buf, 1, ubi->peb_count);
+	for (pnum = 0; pnum < ubi->peb_count; pnum++) {
+		err = ubi_io_is_bad(ubi, pnum);
+		if (err < 0)
+			return err;
+		else if (err)
+			buf[pnum] = 0;
+	}
+
+	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb)
+		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb)
+			buf[seb->pnum] = 0;
+
+	list_for_each_entry(seb, &si->free, u.list)
+		buf[seb->pnum] = 0;
+
+	list_for_each_entry(seb, &si->corr, u.list)
+		buf[seb->pnum] = 0;
+
+	list_for_each_entry(seb, &si->erase, u.list)
+		buf[seb->pnum] = 0;
+
+	list_for_each_entry(seb, &si->alien, u.list)
+		buf[seb->pnum] = 0;
+
+	err = 0;
+	for (pnum = 0; pnum < ubi->peb_count; pnum++)
+		if (buf[pnum]) {
+			ubi_err("PEB %d is not referred", pnum);
+			err = 1;
+		}
+
+	kfree(buf);
+	if (err)
+		goto out;
+	return 0;
+
+bad_seb:
+	ubi_err("bad scanning information about LEB %d", seb->lnum);
+	ubi_dbg_dump_seb(seb, 0);
+	ubi_dbg_dump_sv(sv);
+	goto out;
+
+bad_sv:
+	ubi_err("bad scanning information about volume %d", sv->vol_id);
+	ubi_dbg_dump_sv(sv);
+	goto out;
+
+bad_vid_hdr:
+	ubi_err("bad scanning information about volume %d", sv->vol_id);
+	ubi_dbg_dump_sv(sv);
+	ubi_dbg_dump_vid_hdr(vidh);
+
+out:
+	ubi_dbg_dump_stack();
+	return 1;
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h
new file mode 100644
index 00000000000..3949f6192c7
--- /dev/null
+++ b/drivers/mtd/ubi/scan.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+#ifndef __UBI_SCAN_H__
+#define __UBI_SCAN_H__
+
+/* The erase counter value for this physical eraseblock is unknown */
+#define UBI_SCAN_UNKNOWN_EC (-1)
+
+/**
+ * struct ubi_scan_leb - scanning information about a physical eraseblock.
+ * @ec: erase counter (%UBI_SCAN_UNKNOWN_EC if it is unknown)
+ * @pnum: physical eraseblock number
+ * @lnum: logical eraseblock number
+ * @scrub: if this physical eraseblock needs scrubbing
+ * @sqnum: sequence number
+ * @u: unions RB-tree or @list links
+ * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects
+ * @u.list: link in one of the eraseblock lists
+ * @leb_ver: logical eraseblock version (obsolete)
+ *
+ * One object of this type is allocated for each physical eraseblock during
+ * scanning.
+ */
+struct ubi_scan_leb {
+	int ec;
+	int pnum;
+	int lnum;
+	int scrub;
+	unsigned long long sqnum;
+	union {
+		struct rb_node rb;
+		struct list_head list;
+	} u;
+	uint32_t leb_ver;
+};
+
+/**
+ * struct ubi_scan_volume - scanning information about a volume.
+ * @vol_id: volume ID
+ * @highest_lnum: highest logical eraseblock number in this volume
+ * @leb_count: number of logical eraseblocks in this volume
+ * @vol_type: volume type
+ * @used_ebs: number of used logical eraseblocks in this volume (only for
+ * static volumes)
+ * @last_data_size: amount of data in the last logical eraseblock of this
+ * volume (always equivalent to the usable logical eraseblock size in case of
+ * dynamic volumes)
+ * @data_pad: how many bytes at the end of logical eraseblocks of this volume
+ * are not used (due to volume alignment)
+ * @compat: compatibility flags of this volume
+ * @rb: link in the volume RB-tree
+ * @root: root of the RB-tree containing all the eraseblock belonging to this
+ * volume (&struct ubi_scan_leb objects)
+ *
+ * One object of this type is allocated for each volume during scanning.
+ */
+struct ubi_scan_volume {
+	int vol_id;
+	int highest_lnum;
+	int leb_count;
+	int vol_type;
+	int used_ebs;
+	int last_data_size;
+	int data_pad;
+	int compat;
+	struct rb_node rb;
+	struct rb_root root;
+};
+
+/**
+ * struct ubi_scan_info - UBI scanning information.
+ * @volumes: root of the volume RB-tree
+ * @corr: list of corrupted physical eraseblocks
+ * @free: list of free physical eraseblocks
+ * @erase: list of physical eraseblocks which have to be erased
+ * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
+ * @bad_peb_count: count of bad physical eraseblocks
+ * those belonging to "preserve"-compatible internal volumes)
+ * @vols_found: number of volumes found during scanning
+ * @highest_vol_id: highest volume ID
+ * @alien_peb_count: count of physical eraseblocks in the @alien list
+ * @is_empty: flag indicating whether the MTD device is empty or not
+ * @min_ec: lowest erase counter value
+ * @max_ec: highest erase counter value
+ * @max_sqnum: highest sequence number value
+ * @mean_ec: mean erase counter value
+ * @ec_sum: a temporary variable used when calculating @mean_ec
+ * @ec_count: a temporary variable used when calculating @mean_ec
+ *
+ * This data structure contains the result of scanning and may be used by other
+ * UBI units to build final UBI data structures, further error-recovery and so
+ * on.
+ */
+struct ubi_scan_info {
+	struct rb_root volumes;
+	struct list_head corr;
+	struct list_head free;
+	struct list_head erase;
+	struct list_head alien;
+	int bad_peb_count;
+	int vols_found;
+	int highest_vol_id;
+	int alien_peb_count;
+	int is_empty;
+	int min_ec;
+	int max_ec;
+	unsigned long long max_sqnum;
+	int mean_ec;
+	int ec_sum;
+	int ec_count;
+};
+
+struct ubi_device;
+struct ubi_vid_hdr;
+
+/*
+ * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a
+ * list.
+ *
+ * @sv: volume scanning information
+ * @seb: scanning eraseblock infprmation
+ * @list: the list to move to
+ */
+static inline void ubi_scan_move_to_list(struct ubi_scan_volume *sv,
+					 struct ubi_scan_leb *seb,
+					 struct list_head *list)
+{
+		rb_erase(&seb->u.rb, &sv->root);
+		list_add_tail(&seb->u.list, list);
+}
+
+int ubi_scan_add_to_list(struct ubi_scan_info *si, int pnum, int ec,
+			 struct list_head *list);
+int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
+		      int pnum, int ec, const struct ubi_vid_hdr *vid_hdr,
+		      int bitflips);
+struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si,
+					 int vol_id);
+struct ubi_scan_leb *ubi_scan_find_seb(const struct ubi_scan_volume *sv,
+				       int lnum);
+void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv);
+struct ubi_scan_leb *ubi_scan_get_free_peb(const struct ubi_device *ubi,
+					   struct ubi_scan_info *si);
+int ubi_scan_erase_peb(const struct ubi_device *ubi,
+		       const struct ubi_scan_info *si, int pnum, int ec);
+struct ubi_scan_info *ubi_scan(struct ubi_device *ubi);
+void ubi_scan_destroy_si(struct ubi_scan_info *si);
+
+#endif /* !__UBI_SCAN_H__ */
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
new file mode 100644
index 00000000000..feb647f108f
--- /dev/null
+++ b/drivers/mtd/ubi/ubi.h
@@ -0,0 +1,535 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ * Copyright (c) Nokia Corporation, 2006, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+#ifndef __UBI_UBI_H__
+#define __UBI_UBI_H__
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/string.h>
+#include <linux/mtd/mtd.h>
+
+#include <mtd/ubi-header.h>
+#include <linux/mtd/ubi.h>
+
+#include "scan.h"
+#include "debug.h"
+
+/* Maximum number of supported UBI devices */
+#define UBI_MAX_DEVICES 32
+
+/* UBI name used for character devices, sysfs, etc */
+#define UBI_NAME_STR "ubi"
+
+/* Normal UBI messages */
+#define ubi_msg(fmt, ...) printk(KERN_NOTICE "UBI: " fmt "\n", ##__VA_ARGS__)
+/* UBI warning messages */
+#define ubi_warn(fmt, ...) printk(KERN_WARNING "UBI warning: %s: " fmt "\n", \
+				  __FUNCTION__, ##__VA_ARGS__)
+/* UBI error messages */
+#define ubi_err(fmt, ...) printk(KERN_ERR "UBI error: %s: " fmt "\n", \
+				 __FUNCTION__, ##__VA_ARGS__)
+
+/* Lowest number PEBs reserved for bad PEB handling */
+#define MIN_RESEVED_PEBS 2
+
+/* Background thread name pattern */
+#define UBI_BGT_NAME_PATTERN "ubi_bgt%dd"
+
+/* This marker in the EBA table means that the LEB is um-mapped */
+#define UBI_LEB_UNMAPPED -1
+
+/*
+ * In case of errors, UBI tries to repeat the operation several times before
+ * returning error. The below constant defines how many times UBI re-tries.
+ */
+#define UBI_IO_RETRIES 3
+
+/*
+ * Error codes returned by the I/O unit.
+ *
+ * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
+ * 0xFF bytes
+ * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
+ * valid erase counter header, and the rest are %0xFF bytes
+ * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
+ * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
+ * CRC)
+ * UBI_IO_BITFLIPS: bit-flips were detected and corrected
+ */
+enum {
+	UBI_IO_PEB_EMPTY = 1,
+	UBI_IO_PEB_FREE,
+	UBI_IO_BAD_EC_HDR,
+	UBI_IO_BAD_VID_HDR,
+	UBI_IO_BITFLIPS
+};
+
+extern int ubi_devices_cnt;
+extern struct ubi_device *ubi_devices[];
+
+struct ubi_volume_desc;
+
+/**
+ * struct ubi_volume - UBI volume description data structure.
+ * @dev: device object to make use of the the Linux device model
+ * @cdev: character device object to create character device
+ * @ubi: reference to the UBI device description object
+ * @vol_id: volume ID
+ * @readers: number of users holding this volume in read-only mode
+ * @writers: number of users holding this volume in read-write mode
+ * @exclusive: whether somebody holds this volume in exclusive mode
+ * @removed: if the volume was removed
+ * @checked: if this static volume was checked
+ *
+ * @reserved_pebs: how many physical eraseblocks are reserved for this volume
+ * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
+ * @usable_leb_size: logical eraseblock size without padding
+ * @used_ebs: how many logical eraseblocks in this volume contain data
+ * @last_eb_bytes: how many bytes are stored in the last logical eraseblock
+ * @used_bytes: how many bytes of data this volume contains
+ * @upd_marker: non-zero if the update marker is set for this volume
+ * @corrupted: non-zero if the volume is corrupted (static volumes only)
+ * @alignment: volume alignment
+ * @data_pad: how many bytes are not used at the end of physical eraseblocks to
+ * satisfy the requested alignment
+ * @name_len: volume name length
+ * @name: volume name
+ *
+ * @updating: whether the volume is being updated
+ * @upd_ebs: how many eraseblocks are expected to be updated
+ * @upd_bytes: how many bytes are expected to be received
+ * @upd_received: how many update bytes were already received
+ * @upd_buf: update buffer which is used to collect update data
+ *
+ * @eba_tbl: EBA table of this volume (LEB->PEB mapping)
+ *
+ * @gluebi_desc: gluebi UBI volume descriptor
+ * @gluebi_refcount: reference count of the gluebi MTD device
+ * @gluebi_mtd: MTD device description object of the gluebi MTD device
+ *
+ * The @corrupted field indicates that the volume's contents is corrupted.
+ * Since UBI protects only static volumes, this field is not relevant to
+ * dynamic volumes - it is user's responsibility to assure their data
+ * integrity.
+ *
+ * The @upd_marker flag indicates that this volume is either being updated at
+ * the moment or is damaged because of an unclean reboot.
+ */
+struct ubi_volume {
+	struct device dev;
+	struct cdev cdev;
+	struct ubi_device *ubi;
+	int vol_id;
+	int readers;
+	int writers;
+	int exclusive;
+	int removed;
+	int checked;
+
+	int reserved_pebs;
+	int vol_type;
+	int usable_leb_size;
+	int used_ebs;
+	int last_eb_bytes;
+	long long used_bytes;
+	int upd_marker;
+	int corrupted;
+	int alignment;
+	int data_pad;
+	int name_len;
+	char name[UBI_VOL_NAME_MAX+1];
+
+	int updating;
+	int upd_ebs;
+	long long upd_bytes;
+	long long upd_received;
+	void *upd_buf;
+
+	int *eba_tbl;
+
+#ifdef CONFIG_MTD_UBI_GLUEBI
+	/* Gluebi-related stuff may be compiled out */
+	struct ubi_volume_desc *gluebi_desc;
+	int gluebi_refcount;
+	struct mtd_info gluebi_mtd;
+#endif
+};
+
+/**
+ * struct ubi_volume_desc - descriptor of the UBI volume returned when it is
+ * opened.
+ * @vol: reference to the corresponding volume description object
+ * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE)
+ */
+struct ubi_volume_desc {
+	struct ubi_volume *vol;
+	int mode;
+};
+
+struct ubi_wl_entry;
+
+/**
+ * struct ubi_device - UBI device description structure
+ * @dev: class device object to use the the Linux device model
+ * @cdev: character device object to create character device
+ * @ubi_num: UBI device number
+ * @ubi_name: UBI device name
+ * @major: character device major number
+ * @vol_count: number of volumes in this UBI device
+ * @volumes: volumes of this UBI device
+ * @volumes_lock: protects @volumes, @rsvd_pebs, @avail_pebs, beb_rsvd_pebs,
+ * @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count, @vol->readers,
+ * @vol->writers, @vol->exclusive, @vol->removed, @vol->mapping and
+ * @vol->eba_tbl.
+ *
+ * @rsvd_pebs: count of reserved physical eraseblocks
+ * @avail_pebs: count of available physical eraseblocks
+ * @beb_rsvd_pebs: how many physical eraseblocks are reserved for bad PEB
+ * handling
+ * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling
+ *
+ * @vtbl_slots: how many slots are available in the volume table
+ * @vtbl_size: size of the volume table in bytes
+ * @vtbl: in-RAM volume table copy
+ *
+ * @max_ec: current highest erase counter value
+ * @mean_ec: current mean erase counter value
+ *
+ * global_sqnum: global sequence number
+ * @ltree_lock: protects the lock tree and @global_sqnum
+ * @ltree: the lock tree
+ * @vtbl_mutex: protects on-flash volume table
+ *
+ * @used: RB-tree of used physical eraseblocks
+ * @free: RB-tree of free physical eraseblocks
+ * @scrub: RB-tree of physical eraseblocks which need scrubbing
+ * @prot: protection trees
+ * @prot.pnum: protection tree indexed by physical eraseblock numbers
+ * @prot.aec: protection tree indexed by absolute erase counter value
+ * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from,
+ * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
+ * fields
+ * @wl_scheduled: non-zero if the wear-leveling was scheduled
+ * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
+ * physical eraseblock
+ * @abs_ec: absolute erase counter
+ * @move_from: physical eraseblock from where the data is being moved
+ * @move_to: physical eraseblock where the data is being moved to
+ * @move_from_put: if the "from" PEB was put
+ * @move_to_put: if the "to" PEB was put
+ * @works: list of pending works
+ * @works_count: count of pending works
+ * @bgt_thread: background thread description object
+ * @thread_enabled: if the background thread is enabled
+ * @bgt_name: background thread name
+ *
+ * @flash_size: underlying MTD device size (in bytes)
+ * @peb_count: count of physical eraseblocks on the MTD device
+ * @peb_size: physical eraseblock size
+ * @bad_peb_count: count of bad physical eraseblocks
+ * @good_peb_count: count of good physical eraseblocks
+ * @min_io_size: minimal input/output unit size of the underlying MTD device
+ * @hdrs_min_io_size: minimal I/O unit size used for VID and EC headers
+ * @ro_mode: if the UBI device is in read-only mode
+ * @leb_size: logical eraseblock size
+ * @leb_start: starting offset of logical eraseblocks within physical
+ * eraseblocks
+ * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size
+ * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size
+ * @vid_hdr_offset: starting offset of the volume identifier header (might be
+ * unaligned)
+ * @vid_hdr_aloffset: starting offset of the VID header aligned to
+ * @hdrs_min_io_size
+ * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
+ * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or
+ * not
+ * @mtd: MTD device descriptor
+ */
+struct ubi_device {
+	struct cdev cdev;
+	struct device dev;
+	int ubi_num;
+	char ubi_name[sizeof(UBI_NAME_STR)+5];
+	int major;
+	int vol_count;
+	struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT];
+	spinlock_t volumes_lock;
+
+	int rsvd_pebs;
+	int avail_pebs;
+	int beb_rsvd_pebs;
+	int beb_rsvd_level;
+
+	int vtbl_slots;
+	int vtbl_size;
+	struct ubi_vtbl_record *vtbl;
+	struct mutex vtbl_mutex;
+
+	int max_ec;
+	int mean_ec;
+
+	/* EBA unit's stuff */
+	unsigned long long global_sqnum;
+	spinlock_t ltree_lock;
+	struct rb_root ltree;
+
+	/* Wear-leveling unit's stuff */
+	struct rb_root used;
+	struct rb_root free;
+	struct rb_root scrub;
+	struct {
+		struct rb_root pnum;
+		struct rb_root aec;
+	} prot;
+	spinlock_t wl_lock;
+	int wl_scheduled;
+	struct ubi_wl_entry **lookuptbl;
+	unsigned long long abs_ec;
+	struct ubi_wl_entry *move_from;
+	struct ubi_wl_entry *move_to;
+	int move_from_put;
+	int move_to_put;
+	struct list_head works;
+	int works_count;
+	struct task_struct *bgt_thread;
+	int thread_enabled;
+	char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2];
+
+	/* I/O unit's stuff */
+	long long flash_size;
+	int peb_count;
+	int peb_size;
+	int bad_peb_count;
+	int good_peb_count;
+	int min_io_size;
+	int hdrs_min_io_size;
+	int ro_mode;
+	int leb_size;
+	int leb_start;
+	int ec_hdr_alsize;
+	int vid_hdr_alsize;
+	int vid_hdr_offset;
+	int vid_hdr_aloffset;
+	int vid_hdr_shift;
+	int bad_allowed;
+	struct mtd_info *mtd;
+};
+
+extern struct file_operations ubi_cdev_operations;
+extern struct file_operations ubi_vol_cdev_operations;
+extern struct class *ubi_class;
+
+/* vtbl.c */
+int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
+			   struct ubi_vtbl_record *vtbl_rec);
+int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si);
+
+/* vmt.c */
+int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req);
+int ubi_remove_volume(struct ubi_volume_desc *desc);
+int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs);
+int ubi_add_volume(struct ubi_device *ubi, int vol_id);
+void ubi_free_volume(struct ubi_device *ubi, int vol_id);
+
+/* upd.c */
+int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes);
+int ubi_more_update_data(struct ubi_device *ubi, int vol_id,
+			 const void __user *buf, int count);
+
+/* misc.c */
+int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length);
+int ubi_check_volume(struct ubi_device *ubi, int vol_id);
+void ubi_calculate_reserved(struct ubi_device *ubi);
+
+/* gluebi.c */
+#ifdef CONFIG_MTD_UBI_GLUEBI
+int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol);
+int ubi_destroy_gluebi(struct ubi_volume *vol);
+#else
+#define ubi_create_gluebi(ubi, vol) 0
+#define ubi_destroy_gluebi(vol) 0
+#endif
+
+/* eba.c */
+int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum);
+int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf,
+		     int offset, int len, int check);
+int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum,
+		      const void *buf, int offset, int len, int dtype);
+int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum,
+			 const void *buf, int len, int dtype,
+			 int used_ebs);
+int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum,
+			      const void *buf, int len, int dtype);
+int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
+		     struct ubi_vid_hdr *vid_hdr);
+int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
+void ubi_eba_close(const struct ubi_device *ubi);
+
+/* wl.c */
+int ubi_wl_get_peb(struct ubi_device *ubi, int dtype);
+int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture);
+int ubi_wl_flush(struct ubi_device *ubi);
+int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum);
+int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si);
+void ubi_wl_close(struct ubi_device *ubi);
+
+/* io.c */
+int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
+		int len);
+int ubi_io_write(const struct ubi_device *ubi, const void *buf, int pnum,
+		 int offset, int len);
+int ubi_io_sync_erase(const struct ubi_device *ubi, int pnum, int torture);
+int ubi_io_is_bad(const struct ubi_device *ubi, int pnum);
+int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum);
+int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum,
+		       struct ubi_ec_hdr *ec_hdr, int verbose);
+int ubi_io_write_ec_hdr(const struct ubi_device *ubi, int pnum,
+			struct ubi_ec_hdr *ec_hdr);
+int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum,
+			struct ubi_vid_hdr *vid_hdr, int verbose);
+int ubi_io_write_vid_hdr(const struct ubi_device *ubi, int pnum,
+			 struct ubi_vid_hdr *vid_hdr);
+
+/*
+ * ubi_rb_for_each_entry - walk an RB-tree.
+ * @rb: a pointer to type 'struct rb_node' to to use as a loop counter
+ * @pos: a pointer to RB-tree entry type to use as a loop counter
+ * @root: RB-tree's root
+ * @member: the name of the 'struct rb_node' within the RB-tree entry
+ */
+#define ubi_rb_for_each_entry(rb, pos, root, member)                         \
+	for (rb = rb_first(root),                                            \
+	     pos = (rb ? container_of(rb, typeof(*pos), member) : NULL);     \
+	     rb;                                                             \
+	     rb = rb_next(rb), pos = container_of(rb, typeof(*pos), member))
+
+/**
+ * ubi_zalloc_vid_hdr - allocate a volume identifier header object.
+ * @ubi: UBI device description object
+ *
+ * This function returns a pointer to the newly allocated and zero-filled
+ * volume identifier header object in case of success and %NULL in case of
+ * failure.
+ */
+static inline struct ubi_vid_hdr *ubi_zalloc_vid_hdr(const struct ubi_device *ubi)
+{
+	void *vid_hdr;
+
+	vid_hdr = kzalloc(ubi->vid_hdr_alsize, GFP_KERNEL);
+	if (!vid_hdr)
+		return NULL;
+
+	/*
+	 * VID headers may be stored at un-aligned flash offsets, so we shift
+	 * the pointer.
+	 */
+	return vid_hdr + ubi->vid_hdr_shift;
+}
+
+/**
+ * ubi_free_vid_hdr - free a volume identifier header object.
+ * @ubi: UBI device description object
+ * @vid_hdr: the object to free
+ */
+static inline void ubi_free_vid_hdr(const struct ubi_device *ubi,
+				    struct ubi_vid_hdr *vid_hdr)
+{
+	void *p = vid_hdr;
+
+	if (!p)
+		return;
+
+	kfree(p - ubi->vid_hdr_shift);
+}
+
+/*
+ * This function is equivalent to 'ubi_io_read()', but @offset is relative to
+ * the beginning of the logical eraseblock, not to the beginning of the
+ * physical eraseblock.
+ */
+static inline int ubi_io_read_data(const struct ubi_device *ubi, void *buf,
+				   int pnum, int offset, int len)
+{
+	ubi_assert(offset >= 0);
+	return ubi_io_read(ubi, buf, pnum, offset + ubi->leb_start, len);
+}
+
+/*
+ * This function is equivalent to 'ubi_io_write()', but @offset is relative to
+ * the beginning of the logical eraseblock, not to the beginning of the
+ * physical eraseblock.
+ */
+static inline int ubi_io_write_data(const struct ubi_device *ubi, const void *buf,
+				    int pnum, int offset, int len)
+{
+	ubi_assert(offset >= 0);
+	return ubi_io_write(ubi, buf, pnum, offset + ubi->leb_start, len);
+}
+
+/**
+ * ubi_ro_mode - switch to read-only mode.
+ * @ubi: UBI device description object
+ */
+static inline void ubi_ro_mode(struct ubi_device *ubi)
+{
+	ubi->ro_mode = 1;
+	ubi_warn("switch to read-only mode");
+}
+
+/**
+ * vol_id2idx - get table index by volume ID.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ */
+static inline int vol_id2idx(const struct ubi_device *ubi, int vol_id)
+{
+	if (vol_id >= UBI_INTERNAL_VOL_START)
+		return vol_id - UBI_INTERNAL_VOL_START + ubi->vtbl_slots;
+	else
+		return vol_id;
+}
+
+/**
+ * idx2vol_id - get volume ID by table index.
+ * @ubi: UBI device description object
+ * @idx: table index
+ */
+static inline int idx2vol_id(const struct ubi_device *ubi, int idx)
+{
+	if (idx >= ubi->vtbl_slots)
+		return idx - ubi->vtbl_slots + UBI_INTERNAL_VOL_START;
+	else
+		return idx;
+}
+
+#endif /* !__UBI_UBI_H__ */
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
new file mode 100644
index 00000000000..8925b977e3d
--- /dev/null
+++ b/drivers/mtd/ubi/upd.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ * Copyright (c) Nokia Corporation, 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ *
+ * Jan 2007: Alexander Schmidt, hacked per-volume update.
+ */
+
+/*
+ * This file contains implementation of the volume update functionality.
+ *
+ * The update operation is based on the per-volume update marker which is
+ * stored in the volume table. The update marker is set before the update
+ * starts, and removed after the update has been finished. So if the update was
+ * interrupted by an unclean re-boot or due to some other reasons, the update
+ * marker stays on the flash media and UBI finds it when it attaches the MTD
+ * device next time. If the update marker is set for a volume, the volume is
+ * treated as damaged and most I/O operations are prohibited. Only a new update
+ * operation is allowed.
+ *
+ * Note, in general it is possible to implement the update operation as a
+ * transaction with a roll-back capability.
+ */
+
+#include <linux/err.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h>
+#include "ubi.h"
+
+/**
+ * set_update_marker - set update marker.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ *
+ * This function sets the update marker flag for volume @vol_id. Returns zero
+ * in case of success and a negative error code in case of failure.
+ */
+static int set_update_marker(struct ubi_device *ubi, int vol_id)
+{
+	int err;
+	struct ubi_vtbl_record vtbl_rec;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	dbg_msg("set update marker for volume %d", vol_id);
+
+	if (vol->upd_marker) {
+		ubi_assert(ubi->vtbl[vol_id].upd_marker);
+		dbg_msg("already set");
+		return 0;
+	}
+
+	memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record));
+	vtbl_rec.upd_marker = 1;
+
+	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
+	vol->upd_marker = 1;
+	return err;
+}
+
+/**
+ * clear_update_marker - clear update marker.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @bytes: new data size in bytes
+ *
+ * This function clears the update marker for volume @vol_id, sets new volume
+ * data size and clears the "corrupted" flag (static volumes only). Returns
+ * zero in case of success and a negative error code in case of failure.
+ */
+static int clear_update_marker(struct ubi_device *ubi, int vol_id, long long bytes)
+{
+	int err;
+	uint64_t tmp;
+	struct ubi_vtbl_record vtbl_rec;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	dbg_msg("clear update marker for volume %d", vol_id);
+
+	memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record));
+	ubi_assert(vol->upd_marker && vtbl_rec.upd_marker);
+	vtbl_rec.upd_marker = 0;
+
+	if (vol->vol_type == UBI_STATIC_VOLUME) {
+		vol->corrupted = 0;
+		vol->used_bytes = tmp = bytes;
+		vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size);
+		vol->used_ebs = tmp;
+		if (vol->last_eb_bytes)
+			vol->used_ebs += 1;
+		else
+			vol->last_eb_bytes = vol->usable_leb_size;
+	}
+
+	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
+	vol->upd_marker = 0;
+	return err;
+}
+
+/**
+ * ubi_start_update - start volume update.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @bytes: update bytes
+ *
+ * This function starts volume update operation. If @bytes is zero, the volume
+ * is just wiped out. Returns zero in case of success and a negative error code
+ * in case of failure.
+ */
+int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes)
+{
+	int i, err;
+	uint64_t tmp;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	dbg_msg("start update of volume %d, %llu bytes", vol_id, bytes);
+	vol->updating = 1;
+
+	err = set_update_marker(ubi, vol_id);
+	if (err)
+		return err;
+
+	/* Before updating - wipe out the volume */
+	for (i = 0; i < vol->reserved_pebs; i++) {
+		err = ubi_eba_unmap_leb(ubi, vol_id, i);
+		if (err)
+			return err;
+	}
+
+	if (bytes == 0) {
+		err = clear_update_marker(ubi, vol_id, 0);
+		if (err)
+			return err;
+		err = ubi_wl_flush(ubi);
+		if (!err)
+			vol->updating = 0;
+	}
+
+	vol->upd_buf = kmalloc(ubi->leb_size, GFP_KERNEL);
+	if (!vol->upd_buf)
+		return -ENOMEM;
+
+	tmp = bytes;
+	vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size);
+	vol->upd_ebs += tmp;
+	vol->upd_bytes = bytes;
+	vol->upd_received = 0;
+	return 0;
+}
+
+/**
+ * write_leb - write update data.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ * @lnum: logical eraseblock number
+ * @buf: data to write
+ * @len: data size
+ * @used_ebs: how many logical eraseblocks will this volume contain (static
+ * volumes only)
+ *
+ * This function writes update data to corresponding logical eraseblock. In
+ * case of dynamic volume, this function checks if the data contains 0xFF bytes
+ * at the end. If yes, the 0xFF bytes are cut and not written. So if the whole
+ * buffer contains only 0xFF bytes, the LEB is left unmapped.
+ *
+ * The reason why we skip the trailing 0xFF bytes in case of dynamic volume is
+ * that we want to make sure that more data may be appended to the logical
+ * eraseblock in future. Indeed, writing 0xFF bytes may have side effects and
+ * this PEB won't be writable anymore. So if one writes the file-system image
+ * to the UBI volume where 0xFFs mean free space - UBI makes sure this free
+ * space is writable after the update.
+ *
+ * We do not do this for static volumes because they are read-only. But this
+ * also cannot be done because we have to store per-LEB CRC and the correct
+ * data length.
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int write_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf,
+		     int len, int used_ebs)
+{
+	int err, l;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+		l = ALIGN(len, ubi->min_io_size);
+		memset(buf + len, 0xFF, l - len);
+
+		l = ubi_calc_data_len(ubi, buf, l);
+		if (l == 0) {
+			dbg_msg("all %d bytes contain 0xFF - skip", len);
+			return 0;
+		}
+		if (len != l)
+			dbg_msg("skip last %d bytes (0xFF)", len - l);
+
+		err = ubi_eba_write_leb(ubi, vol_id, lnum, buf, 0, l,
+					UBI_UNKNOWN);
+	} else {
+		/*
+		 * When writing static volume, and this is the last logical
+		 * eraseblock, the length (@len) does not have to be aligned to
+		 * the minimal flash I/O unit. The 'ubi_eba_write_leb_st()'
+		 * function accepts exact (unaligned) length and stores it in
+		 * the VID header. And it takes care of proper alignment by
+		 * padding the buffer. Here we just make sure the padding will
+		 * contain zeros, not random trash.
+		 */
+		memset(buf + len, 0, vol->usable_leb_size - len);
+		err = ubi_eba_write_leb_st(ubi, vol_id, lnum, buf, len,
+					   UBI_UNKNOWN, used_ebs);
+	}
+
+	return err;
+}
+
+/**
+ * ubi_more_update_data - write more update data.
+ * @vol: volume description object
+ * @buf: write data (user-space memory buffer)
+ * @count: how much bytes to write
+ *
+ * This function writes more data to the volume which is being updated. It may
+ * be called arbitrary number of times until all of the update data arrive.
+ * This function returns %0 in case of success, number of bytes written during
+ * the last call if the whole volume update was successfully finished, and a
+ * negative error code in case of failure.
+ */
+int ubi_more_update_data(struct ubi_device *ubi, int vol_id,
+			 const void __user *buf, int count)
+{
+	uint64_t tmp;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+	int lnum, offs, err = 0, len, to_write = count;
+
+	dbg_msg("write %d of %lld bytes, %lld already passed",
+		count, vol->upd_bytes, vol->upd_received);
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	tmp = vol->upd_received;
+	offs = do_div(tmp, vol->usable_leb_size);
+	lnum = tmp;
+
+	if (vol->upd_received + count > vol->upd_bytes)
+		to_write = count = vol->upd_bytes - vol->upd_received;
+
+	/*
+	 * When updating volumes, we accumulate whole logical eraseblock of
+	 * data and write it at once.
+	 */
+	if (offs != 0) {
+		/*
+		 * This is a write to the middle of the logical eraseblock. We
+		 * copy the data to our update buffer and wait for more data or
+		 * flush it if the whole eraseblock is written or the update
+		 * is finished.
+		 */
+
+		len = vol->usable_leb_size - offs;
+		if (len > count)
+			len = count;
+
+		err = copy_from_user(vol->upd_buf + offs, buf, len);
+		if (err)
+			return -EFAULT;
+
+		if (offs + len == vol->usable_leb_size ||
+		    vol->upd_received + len == vol->upd_bytes) {
+			int flush_len = offs + len;
+
+			/*
+			 * OK, we gathered either the whole eraseblock or this
+			 * is the last chunk, it's time to flush the buffer.
+			 */
+			ubi_assert(flush_len <= vol->usable_leb_size);
+			err = write_leb(ubi, vol_id, lnum, vol->upd_buf,
+					flush_len, vol->upd_ebs);
+			if (err)
+				return err;
+		}
+
+		vol->upd_received += len;
+		count -= len;
+		buf += len;
+		lnum += 1;
+	}
+
+	/*
+	 * If we've got more to write, let's continue. At this point we know we
+	 * are starting from the beginning of an eraseblock.
+	 */
+	while (count) {
+		if (count > vol->usable_leb_size)
+			len = vol->usable_leb_size;
+		else
+			len = count;
+
+		err = copy_from_user(vol->upd_buf, buf, len);
+		if (err)
+			return -EFAULT;
+
+		if (len == vol->usable_leb_size ||
+		    vol->upd_received + len == vol->upd_bytes) {
+			err = write_leb(ubi, vol_id, lnum, vol->upd_buf, len,
+					vol->upd_ebs);
+			if (err)
+				break;
+		}
+
+		vol->upd_received += len;
+		count -= len;
+		lnum += 1;
+		buf += len;
+	}
+
+	ubi_assert(vol->upd_received <= vol->upd_bytes);
+	if (vol->upd_received == vol->upd_bytes) {
+		/* The update is finished, clear the update marker */
+		err = clear_update_marker(ubi, vol_id, vol->upd_bytes);
+		if (err)
+			return err;
+		err = ubi_wl_flush(ubi);
+		if (err == 0) {
+			err = to_write;
+			kfree(vol->upd_buf);
+			vol->updating = 0;
+		}
+	}
+
+	return err;
+}
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
new file mode 100644
index 00000000000..622d0d18952
--- /dev/null
+++ b/drivers/mtd/ubi/vmt.c
@@ -0,0 +1,809 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation;  either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file contains implementation of volume creation, deletion, updating and
+ * resizing.
+ */
+
+#include <linux/err.h>
+#include <asm/div64.h>
+#include "ubi.h"
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+static void paranoid_check_volumes(struct ubi_device *ubi);
+#else
+#define paranoid_check_volumes(ubi)
+#endif
+
+static ssize_t vol_attribute_show(struct device *dev,
+				  struct device_attribute *attr, char *buf);
+
+/* Device attributes corresponding to files in '/<sysfs>/class/ubi/ubiX_Y' */
+static struct device_attribute vol_reserved_ebs =
+	__ATTR(reserved_ebs, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_type =
+	__ATTR(type, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_name =
+	__ATTR(name, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_corrupted =
+	__ATTR(corrupted, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_alignment =
+	__ATTR(alignment, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_usable_eb_size =
+	__ATTR(usable_eb_size, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_data_bytes =
+	__ATTR(data_bytes, S_IRUGO, vol_attribute_show, NULL);
+static struct device_attribute vol_upd_marker =
+	__ATTR(upd_marker, S_IRUGO, vol_attribute_show, NULL);
+
+/*
+ * "Show" method for files in '/<sysfs>/class/ubi/ubiX_Y/'.
+ *
+ * Consider a situation:
+ * A. process 1 opens a sysfs file related to volume Y, say
+ *    /<sysfs>/class/ubi/ubiX_Y/reserved_ebs;
+ * B. process 2 removes volume Y;
+ * C. process 1 starts reading the /<sysfs>/class/ubi/ubiX_Y/reserved_ebs file;
+ *
+ * What we want to do in a situation like that is to return error when the file
+ * is read. This is done by means of the 'removed' flag and the 'vol_lock' of
+ * the UBI volume description object.
+ */
+static ssize_t vol_attribute_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	int ret;
+	struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev);
+
+	spin_lock(&vol->ubi->volumes_lock);
+	if (vol->removed) {
+		spin_unlock(&vol->ubi->volumes_lock);
+		return -ENODEV;
+	}
+	if (attr == &vol_reserved_ebs)
+		ret = sprintf(buf, "%d\n", vol->reserved_pebs);
+	else if (attr == &vol_type) {
+		const char *tp;
+		tp = vol->vol_type == UBI_DYNAMIC_VOLUME ? "dynamic" : "static";
+		ret = sprintf(buf, "%s\n", tp);
+	} else if (attr == &vol_name)
+		ret = sprintf(buf, "%s\n", vol->name);
+	else if (attr == &vol_corrupted)
+		ret = sprintf(buf, "%d\n", vol->corrupted);
+	else if (attr == &vol_alignment)
+		ret = sprintf(buf, "%d\n", vol->alignment);
+	else if (attr == &vol_usable_eb_size) {
+		ret = sprintf(buf, "%d\n", vol->usable_leb_size);
+	} else if (attr == &vol_data_bytes)
+		ret = sprintf(buf, "%lld\n", vol->used_bytes);
+	else if (attr == &vol_upd_marker)
+		ret = sprintf(buf, "%d\n", vol->upd_marker);
+	else
+		BUG();
+	spin_unlock(&vol->ubi->volumes_lock);
+	return ret;
+}
+
+/* Release method for volume devices */
+static void vol_release(struct device *dev)
+{
+	struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev);
+	ubi_assert(vol->removed);
+	kfree(vol);
+}
+
+/**
+ * volume_sysfs_init - initialize sysfs for new volume.
+ * @ubi: UBI device description object
+ * @vol: volume description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ *
+ * Note, this function does not free allocated resources in case of failure -
+ * the caller does it. This is because this would cause release() here and the
+ * caller would oops.
+ */
+static int volume_sysfs_init(struct ubi_device *ubi, struct ubi_volume *vol)
+{
+	int err;
+
+	err = device_create_file(&vol->dev, &vol_reserved_ebs);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_type);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_name);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_corrupted);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_alignment);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_usable_eb_size);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_data_bytes);
+	if (err)
+		return err;
+	err = device_create_file(&vol->dev, &vol_upd_marker);
+	if (err)
+		return err;
+	return 0;
+}
+
+/**
+ * volume_sysfs_close - close sysfs for a volume.
+ * @vol: volume description object
+ */
+static void volume_sysfs_close(struct ubi_volume *vol)
+{
+	device_remove_file(&vol->dev, &vol_upd_marker);
+	device_remove_file(&vol->dev, &vol_data_bytes);
+	device_remove_file(&vol->dev, &vol_usable_eb_size);
+	device_remove_file(&vol->dev, &vol_alignment);
+	device_remove_file(&vol->dev, &vol_corrupted);
+	device_remove_file(&vol->dev, &vol_name);
+	device_remove_file(&vol->dev, &vol_type);
+	device_remove_file(&vol->dev, &vol_reserved_ebs);
+	device_unregister(&vol->dev);
+}
+
+/**
+ * ubi_create_volume - create volume.
+ * @ubi: UBI device description object
+ * @req: volume creation request
+ *
+ * This function creates volume described by @req. If @req->vol_id id
+ * %UBI_VOL_NUM_AUTO, this function automatically assigne ID to the new volume
+ * and saves it in @req->vol_id. Returns zero in case of success and a negative
+ * error code in case of failure.
+ */
+int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
+{
+	int i, err, vol_id = req->vol_id;
+	struct ubi_volume *vol;
+	struct ubi_vtbl_record vtbl_rec;
+	uint64_t bytes;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
+	if (!vol)
+		return -ENOMEM;
+
+	spin_lock(&ubi->volumes_lock);
+
+	if (vol_id == UBI_VOL_NUM_AUTO) {
+		/* Find unused volume ID */
+		dbg_msg("search for vacant volume ID");
+		for (i = 0; i < ubi->vtbl_slots; i++)
+			if (!ubi->volumes[i]) {
+				vol_id = i;
+				break;
+			}
+
+		if (vol_id == UBI_VOL_NUM_AUTO) {
+			dbg_err("out of volume IDs");
+			err = -ENFILE;
+			goto out_unlock;
+		}
+		req->vol_id = vol_id;
+	}
+
+	dbg_msg("volume ID %d, %llu bytes, type %d, name %s",
+		vol_id, (unsigned long long)req->bytes,
+		(int)req->vol_type, req->name);
+
+	/* Ensure that this volume does not exist */
+	err = -EEXIST;
+	if (ubi->volumes[vol_id]) {
+		dbg_err("volume %d already exists", vol_id);
+		goto out_unlock;
+	}
+
+	/* Ensure that the name is unique */
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		if (ubi->volumes[i] &&
+		    ubi->volumes[i]->name_len == req->name_len &&
+		    strcmp(ubi->volumes[i]->name, req->name) == 0) {
+			dbg_err("volume \"%s\" exists (ID %d)", req->name, i);
+			goto out_unlock;
+		}
+
+        /* Calculate how many eraseblocks are requested */
+	vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment;
+	bytes = req->bytes;
+	if (do_div(bytes, vol->usable_leb_size))
+		vol->reserved_pebs = 1;
+	vol->reserved_pebs += bytes;
+
+	/* Reserve physical eraseblocks */
+	if (vol->reserved_pebs > ubi->avail_pebs) {
+		dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);
+		spin_unlock(&ubi->volumes_lock);
+		err = -ENOSPC;
+		goto out_unlock;
+	}
+	ubi->avail_pebs -= vol->reserved_pebs;
+	ubi->rsvd_pebs += vol->reserved_pebs;
+
+	vol->vol_id    = vol_id;
+	vol->alignment = req->alignment;
+	vol->data_pad  = ubi->leb_size % vol->alignment;
+	vol->vol_type  = req->vol_type;
+	vol->name_len  = req->name_len;
+	memcpy(vol->name, req->name, vol->name_len + 1);
+	vol->exclusive = 1;
+	vol->ubi = ubi;
+	ubi->volumes[vol_id] = vol;
+	spin_unlock(&ubi->volumes_lock);
+
+	/*
+	 * Finish all pending erases because there may be some LEBs belonging
+	 * to the same volume ID.
+	 */
+	err = ubi_wl_flush(ubi);
+	if (err)
+		goto out_acc;
+
+	vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), GFP_KERNEL);
+	if (!vol->eba_tbl) {
+		err = -ENOMEM;
+		goto out_acc;
+	}
+
+	for (i = 0; i < vol->reserved_pebs; i++)
+		vol->eba_tbl[i] = UBI_LEB_UNMAPPED;
+
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+		vol->used_ebs = vol->reserved_pebs;
+		vol->last_eb_bytes = vol->usable_leb_size;
+		vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+	} else {
+		bytes = vol->used_bytes;
+		vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size);
+		vol->used_ebs = bytes;
+		if (vol->last_eb_bytes)
+			vol->used_ebs += 1;
+		else
+			vol->last_eb_bytes = vol->usable_leb_size;
+	}
+
+	/* Register character device for the volume */
+	cdev_init(&vol->cdev, &ubi_vol_cdev_operations);
+	vol->cdev.owner = THIS_MODULE;
+	err = cdev_add(&vol->cdev, MKDEV(ubi->major, vol_id + 1), 1);
+	if (err) {
+		ubi_err("cannot add character device for volume %d", vol_id);
+		goto out_mapping;
+	}
+
+	err = ubi_create_gluebi(ubi, vol);
+	if (err)
+		goto out_cdev;
+
+	vol->dev.release = vol_release;
+	vol->dev.parent = &ubi->dev;
+	vol->dev.devt = MKDEV(ubi->major, vol->vol_id + 1);
+	vol->dev.class = ubi_class;
+	sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id);
+	err = device_register(&vol->dev);
+	if (err)
+		goto out_gluebi;
+
+	err = volume_sysfs_init(ubi, vol);
+	if (err)
+		goto out_sysfs;
+
+	/* Fill volume table record */
+	memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record));
+	vtbl_rec.reserved_pebs = cpu_to_ubi32(vol->reserved_pebs);
+	vtbl_rec.alignment     = cpu_to_ubi32(vol->alignment);
+	vtbl_rec.data_pad      = cpu_to_ubi32(vol->data_pad);
+	vtbl_rec.name_len      = cpu_to_ubi16(vol->name_len);
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME)
+		vtbl_rec.vol_type = UBI_VID_DYNAMIC;
+	else
+		vtbl_rec.vol_type = UBI_VID_STATIC;
+	memcpy(vtbl_rec.name, vol->name, vol->name_len + 1);
+
+	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
+	if (err)
+		goto out_sysfs;
+
+	spin_lock(&ubi->volumes_lock);
+	ubi->vol_count += 1;
+	vol->exclusive = 0;
+	spin_unlock(&ubi->volumes_lock);
+
+	paranoid_check_volumes(ubi);
+	return 0;
+
+out_gluebi:
+	err = ubi_destroy_gluebi(vol);
+out_cdev:
+	cdev_del(&vol->cdev);
+out_mapping:
+	kfree(vol->eba_tbl);
+out_acc:
+	spin_lock(&ubi->volumes_lock);
+	ubi->rsvd_pebs -= vol->reserved_pebs;
+	ubi->avail_pebs += vol->reserved_pebs;
+out_unlock:
+	spin_unlock(&ubi->volumes_lock);
+	kfree(vol);
+	return err;
+
+	/*
+	 * We are registered, so @vol is destroyed in the release function and
+	 * we have to de-initialize differently.
+	 */
+out_sysfs:
+	err = ubi_destroy_gluebi(vol);
+	cdev_del(&vol->cdev);
+	kfree(vol->eba_tbl);
+	spin_lock(&ubi->volumes_lock);
+	ubi->rsvd_pebs -= vol->reserved_pebs;
+	ubi->avail_pebs += vol->reserved_pebs;
+	spin_unlock(&ubi->volumes_lock);
+	volume_sysfs_close(vol);
+	return err;
+}
+
+/**
+ * ubi_remove_volume - remove volume.
+ * @desc: volume descriptor
+ *
+ * This function removes volume described by @desc. The volume has to be opened
+ * in "exclusive" mode. Returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+int ubi_remove_volume(struct ubi_volume_desc *desc)
+{
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs;
+
+	dbg_msg("remove UBI volume %d", vol_id);
+	ubi_assert(desc->mode == UBI_EXCLUSIVE);
+	ubi_assert(vol == ubi->volumes[vol_id]);
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	err = ubi_destroy_gluebi(vol);
+	if (err)
+		return err;
+
+	err = ubi_change_vtbl_record(ubi, vol_id, NULL);
+	if (err)
+		return err;
+
+	for (i = 0; i < vol->reserved_pebs; i++) {
+		err = ubi_eba_unmap_leb(ubi, vol_id, i);
+		if (err)
+			return err;
+	}
+
+	spin_lock(&ubi->volumes_lock);
+	vol->removed = 1;
+	ubi->volumes[vol_id] = NULL;
+	spin_unlock(&ubi->volumes_lock);
+
+	kfree(vol->eba_tbl);
+	vol->eba_tbl = NULL;
+	cdev_del(&vol->cdev);
+	volume_sysfs_close(vol);
+	kfree(desc);
+
+	spin_lock(&ubi->volumes_lock);
+	ubi->rsvd_pebs -= reserved_pebs;
+	ubi->avail_pebs += reserved_pebs;
+	i = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs;
+	if (i > 0) {
+		i = ubi->avail_pebs >= i ? i : ubi->avail_pebs;
+		ubi->avail_pebs -= i;
+		ubi->rsvd_pebs += i;
+		ubi->beb_rsvd_pebs += i;
+		if (i > 0)
+			ubi_msg("reserve more %d PEBs", i);
+	}
+	ubi->vol_count -= 1;
+	spin_unlock(&ubi->volumes_lock);
+
+	paranoid_check_volumes(ubi);
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+/**
+ * ubi_resize_volume - re-size volume.
+ * @desc: volume descriptor
+ * @reserved_pebs: new size in physical eraseblocks
+ *
+ * This function returns zero in case of success, and a negative error code in
+ * case of failure.
+ */
+int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
+{
+	int i, err, pebs, *new_mapping;
+	struct ubi_volume *vol = desc->vol;
+	struct ubi_device *ubi = vol->ubi;
+	struct ubi_vtbl_record vtbl_rec;
+	int vol_id = vol->vol_id;
+
+	if (ubi->ro_mode)
+		return -EROFS;
+
+	dbg_msg("re-size volume %d to from %d to %d PEBs",
+		vol_id, vol->reserved_pebs, reserved_pebs);
+	ubi_assert(desc->mode == UBI_EXCLUSIVE);
+	ubi_assert(vol == ubi->volumes[vol_id]);
+
+	if (vol->vol_type == UBI_STATIC_VOLUME &&
+	    reserved_pebs < vol->used_ebs) {
+		dbg_err("too small size %d, %d LEBs contain data",
+			reserved_pebs, vol->used_ebs);
+		return -EINVAL;
+	}
+
+	/* If the size is the same, we have nothing to do */
+	if (reserved_pebs == vol->reserved_pebs)
+		return 0;
+
+	new_mapping = kmalloc(reserved_pebs * sizeof(int), GFP_KERNEL);
+	if (!new_mapping)
+		return -ENOMEM;
+
+	for (i = 0; i < reserved_pebs; i++)
+		new_mapping[i] = UBI_LEB_UNMAPPED;
+
+	/* Reserve physical eraseblocks */
+	pebs = reserved_pebs - vol->reserved_pebs;
+	if (pebs > 0) {
+		spin_lock(&ubi->volumes_lock);
+		if (pebs > ubi->avail_pebs) {
+			dbg_err("not enough PEBs: requested %d, available %d",
+				pebs, ubi->avail_pebs);
+			spin_unlock(&ubi->volumes_lock);
+			err = -ENOSPC;
+			goto out_free;
+		}
+		ubi->avail_pebs -= pebs;
+		ubi->rsvd_pebs += pebs;
+		for (i = 0; i < vol->reserved_pebs; i++)
+			new_mapping[i] = vol->eba_tbl[i];
+		kfree(vol->eba_tbl);
+		vol->eba_tbl = new_mapping;
+		spin_unlock(&ubi->volumes_lock);
+	}
+
+	/* Change volume table record */
+	memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record));
+	vtbl_rec.reserved_pebs = cpu_to_ubi32(reserved_pebs);
+	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
+	if (err)
+		goto out_acc;
+
+	if (pebs < 0) {
+		for (i = 0; i < -pebs; i++) {
+			err = ubi_eba_unmap_leb(ubi, vol_id, reserved_pebs + i);
+			if (err)
+				goto out_acc;
+		}
+		spin_lock(&ubi->volumes_lock);
+		ubi->rsvd_pebs += pebs;
+		ubi->avail_pebs -= pebs;
+		pebs = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs;
+		if (pebs > 0) {
+			pebs = ubi->avail_pebs >= pebs ? pebs : ubi->avail_pebs;
+			ubi->avail_pebs -= pebs;
+			ubi->rsvd_pebs += pebs;
+			ubi->beb_rsvd_pebs += pebs;
+			if (pebs > 0)
+				ubi_msg("reserve more %d PEBs", pebs);
+		}
+		for (i = 0; i < reserved_pebs; i++)
+			new_mapping[i] = vol->eba_tbl[i];
+		kfree(vol->eba_tbl);
+		vol->eba_tbl = new_mapping;
+		spin_unlock(&ubi->volumes_lock);
+	}
+
+	vol->reserved_pebs = reserved_pebs;
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+		vol->used_ebs = reserved_pebs;
+		vol->last_eb_bytes = vol->usable_leb_size;
+		vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+	}
+
+	paranoid_check_volumes(ubi);
+	return 0;
+
+out_acc:
+	if (pebs > 0) {
+		spin_lock(&ubi->volumes_lock);
+		ubi->rsvd_pebs -= pebs;
+		ubi->avail_pebs += pebs;
+		spin_unlock(&ubi->volumes_lock);
+	}
+out_free:
+	kfree(new_mapping);
+	return err;
+}
+
+/**
+ * ubi_add_volume - add volume.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ *
+ * This function adds an existin volume and initializes all its data
+ * structures. Returnes zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubi_add_volume(struct ubi_device *ubi, int vol_id)
+{
+	int err;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	dbg_msg("add volume %d", vol_id);
+	ubi_dbg_dump_vol_info(vol);
+	ubi_assert(vol);
+
+	/* Register character device for the volume */
+	cdev_init(&vol->cdev, &ubi_vol_cdev_operations);
+	vol->cdev.owner = THIS_MODULE;
+	err = cdev_add(&vol->cdev, MKDEV(ubi->major, vol->vol_id + 1), 1);
+	if (err) {
+		ubi_err("cannot add character device for volume %d", vol_id);
+		return err;
+	}
+
+	err = ubi_create_gluebi(ubi, vol);
+	if (err)
+		goto out_cdev;
+
+	vol->dev.release = vol_release;
+	vol->dev.parent = &ubi->dev;
+	vol->dev.devt = MKDEV(ubi->major, vol->vol_id + 1);
+	vol->dev.class = ubi_class;
+	sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id);
+	err = device_register(&vol->dev);
+	if (err)
+		goto out_gluebi;
+
+	err = volume_sysfs_init(ubi, vol);
+	if (err) {
+		cdev_del(&vol->cdev);
+		err = ubi_destroy_gluebi(vol);
+		volume_sysfs_close(vol);
+		return err;
+	}
+
+	paranoid_check_volumes(ubi);
+	return 0;
+
+out_gluebi:
+	err = ubi_destroy_gluebi(vol);
+out_cdev:
+	cdev_del(&vol->cdev);
+	return err;
+}
+
+/**
+ * ubi_free_volume - free volume.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ *
+ * This function frees all resources for volume @vol_id but does not remove it.
+ * Used only when the UBI device is detached.
+ */
+void ubi_free_volume(struct ubi_device *ubi, int vol_id)
+{
+	int err;
+	struct ubi_volume *vol = ubi->volumes[vol_id];
+
+	dbg_msg("free volume %d", vol_id);
+	ubi_assert(vol);
+
+	vol->removed = 1;
+	err = ubi_destroy_gluebi(vol);
+	ubi->volumes[vol_id] = NULL;
+	cdev_del(&vol->cdev);
+	volume_sysfs_close(vol);
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+
+/**
+ * paranoid_check_volume - check volume information.
+ * @ubi: UBI device description object
+ * @vol_id: volume ID
+ */
+static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id)
+{
+	int idx = vol_id2idx(ubi, vol_id);
+	int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker;
+	const struct ubi_volume *vol = ubi->volumes[idx];
+	long long n;
+	const char *name;
+
+	reserved_pebs = ubi32_to_cpu(ubi->vtbl[vol_id].reserved_pebs);
+
+	if (!vol) {
+		if (reserved_pebs) {
+			ubi_err("no volume info, but volume exists");
+			goto fail;
+		}
+		return;
+	}
+
+	if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 ||
+	    vol->name_len < 0) {
+		ubi_err("negative values");
+		goto fail;
+	}
+	if (vol->alignment > ubi->leb_size || vol->alignment == 0) {
+		ubi_err("bad alignment");
+		goto fail;
+	}
+
+	n = vol->alignment % ubi->min_io_size;
+	if (vol->alignment != 1 && n) {
+		ubi_err("alignment is not multiple of min I/O unit");
+		goto fail;
+	}
+
+	n = ubi->leb_size % vol->alignment;
+	if (vol->data_pad != n) {
+		ubi_err("bad data_pad, has to be %lld", n);
+		goto fail;
+	}
+
+	if (vol->vol_type != UBI_DYNAMIC_VOLUME &&
+	    vol->vol_type != UBI_STATIC_VOLUME) {
+		ubi_err("bad vol_type");
+		goto fail;
+	}
+
+	if (vol->upd_marker != 0 && vol->upd_marker != 1) {
+		ubi_err("bad upd_marker");
+		goto fail;
+	}
+
+	if (vol->upd_marker && vol->corrupted) {
+		dbg_err("update marker and corrupted simultaneously");
+		goto fail;
+	}
+
+	if (vol->reserved_pebs > ubi->good_peb_count) {
+		ubi_err("too large reserved_pebs");
+		goto fail;
+	}
+
+	n = ubi->leb_size - vol->data_pad;
+	if (vol->usable_leb_size != ubi->leb_size - vol->data_pad) {
+		ubi_err("bad usable_leb_size, has to be %lld", n);
+		goto fail;
+	}
+
+	if (vol->name_len > UBI_VOL_NAME_MAX) {
+		ubi_err("too long volume name, max is %d", UBI_VOL_NAME_MAX);
+		goto fail;
+	}
+
+	if (!vol->name) {
+		ubi_err("NULL volume name");
+		goto fail;
+	}
+
+	n = strnlen(vol->name, vol->name_len + 1);
+	if (n != vol->name_len) {
+		ubi_err("bad name_len %lld", n);
+		goto fail;
+	}
+
+	n = vol->used_ebs * vol->usable_leb_size;
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+		if (vol->corrupted != 0) {
+			ubi_err("corrupted dynamic volume");
+			goto fail;
+		}
+		if (vol->used_ebs != vol->reserved_pebs) {
+			ubi_err("bad used_ebs");
+			goto fail;
+		}
+		if (vol->last_eb_bytes != vol->usable_leb_size) {
+			ubi_err("bad last_eb_bytes");
+			goto fail;
+		}
+		if (vol->used_bytes != n) {
+			ubi_err("bad used_bytes");
+			goto fail;
+		}
+	} else {
+		if (vol->corrupted != 0 && vol->corrupted != 1) {
+			ubi_err("bad corrupted");
+			goto fail;
+		}
+		if (vol->used_ebs < 0 || vol->used_ebs > vol->reserved_pebs) {
+			ubi_err("bad used_ebs");
+			goto fail;
+		}
+		if (vol->last_eb_bytes < 0 ||
+		    vol->last_eb_bytes > vol->usable_leb_size) {
+			ubi_err("bad last_eb_bytes");
+			goto fail;
+		}
+		if (vol->used_bytes < 0 || vol->used_bytes > n ||
+		    vol->used_bytes < n - vol->usable_leb_size) {
+			ubi_err("bad used_bytes");
+			goto fail;
+		}
+	}
+
+	alignment  = ubi32_to_cpu(ubi->vtbl[vol_id].alignment);
+	data_pad   = ubi32_to_cpu(ubi->vtbl[vol_id].data_pad);
+	name_len   = ubi16_to_cpu(ubi->vtbl[vol_id].name_len);
+	upd_marker = ubi->vtbl[vol_id].upd_marker;
+	name       = &ubi->vtbl[vol_id].name[0];
+	if (ubi->vtbl[vol_id].vol_type == UBI_VID_DYNAMIC)
+		vol_type = UBI_DYNAMIC_VOLUME;
+	else
+		vol_type = UBI_STATIC_VOLUME;
+
+	if (alignment != vol->alignment || data_pad != vol->data_pad ||
+	    upd_marker != vol->upd_marker || vol_type != vol->vol_type ||
+	    name_len!= vol->name_len || strncmp(name, vol->name, name_len)) {
+		ubi_err("volume info is different");
+		goto fail;
+	}
+
+	return;
+
+fail:
+	ubi_err("paranoid check failed");
+	ubi_dbg_dump_vol_info(vol);
+	ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
+	BUG();
+}
+
+/**
+ * paranoid_check_volumes - check information about all volumes.
+ * @ubi: UBI device description object
+ */
+static void paranoid_check_volumes(struct ubi_device *ubi)
+{
+	int i;
+
+	mutex_lock(&ubi->vtbl_mutex);
+	spin_lock(&ubi->volumes_lock);
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		paranoid_check_volume(ubi, i);
+	spin_unlock(&ubi->volumes_lock);
+	mutex_unlock(&ubi->vtbl_mutex);
+}
+#endif
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
new file mode 100644
index 00000000000..b6fd6bbd941
--- /dev/null
+++ b/drivers/mtd/ubi/vtbl.c
@@ -0,0 +1,809 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ * Copyright (c) Nokia Corporation, 2006, 2007
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Artem Bityutskiy (Битюцкий Артём)
+ */
+
+/*
+ * This file includes volume table manipulation code. The volume table is an
+ * on-flash table containing volume meta-data like name, number of reserved
+ * physical eraseblocks, type, etc. The volume table is stored in the so-called
+ * "layout volume".
+ *
+ * The layout volume is an internal volume which is organized as follows. It
+ * consists of two logical eraseblocks - LEB 0 and LEB 1. Each logical
+ * eraseblock stores one volume table copy, i.e. LEB 0 and LEB 1 duplicate each
+ * other. This redundancy guarantees robustness to unclean reboots. The volume
+ * table is basically an array of volume table records. Each record contains
+ * full information about the volume and protected by a CRC checksum.
+ *
+ * The volume table is changed, it is first changed in RAM. Then LEB 0 is
+ * erased, and the updated volume table is written back to LEB 0. Then same for
+ * LEB 1. This scheme guarantees recoverability from unclean reboots.
+ *
+ * In this UBI implementation the on-flash volume table does not contain any
+ * information about how many data static volumes contain. This information may
+ * be found from the scanning data.
+ *
+ * But it would still be beneficial to store this information in the volume
+ * table. For example, suppose we have a static volume X, and all its physical
+ * eraseblocks became bad for some reasons. Suppose we are attaching the
+ * corresponding MTD device, the scanning has found no logical eraseblocks
+ * corresponding to the volume X. According to the volume table volume X does
+ * exist. So we don't know whether it is just empty or all its physical
+ * eraseblocks went bad. So we cannot alarm the user about this corruption.
+ *
+ * The volume table also stores so-called "update marker", which is used for
+ * volume updates. Before updating the volume, the update marker is set, and
+ * after the update operation is finished, the update marker is cleared. So if
+ * the update operation was interrupted (e.g. by an unclean reboot) - the
+ * update marker is still there and we know that the volume's contents is
+ * damaged.
+ */
+
+#include <linux/crc32.h>
+#include <linux/err.h>
+#include <asm/div64.h>
+#include "ubi.h"
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+static void paranoid_vtbl_check(const struct ubi_device *ubi);
+#else
+#define paranoid_vtbl_check(ubi)
+#endif
+
+/* Empty volume table record */
+static struct ubi_vtbl_record empty_vtbl_record;
+
+/**
+ * ubi_change_vtbl_record - change volume table record.
+ * @ubi: UBI device description object
+ * @idx: table index to change
+ * @vtbl_rec: new volume table record
+ *
+ * This function changes volume table record @idx. If @vtbl_rec is %NULL, empty
+ * volume table record is written. The caller does not have to calculate CRC of
+ * the record as it is done by this function. Returns zero in case of success
+ * and a negative error code in case of failure.
+ */
+int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
+			   struct ubi_vtbl_record *vtbl_rec)
+{
+	int i, err;
+	uint32_t crc;
+
+	ubi_assert(idx >= 0 && idx < ubi->vtbl_slots);
+
+	if (!vtbl_rec)
+		vtbl_rec = &empty_vtbl_record;
+	else {
+		crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC);
+		vtbl_rec->crc = cpu_to_ubi32(crc);
+	}
+
+	dbg_msg("change record %d", idx);
+	ubi_dbg_dump_vtbl_record(vtbl_rec, idx);
+
+	mutex_lock(&ubi->vtbl_mutex);
+	memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record));
+	for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
+		err = ubi_eba_unmap_leb(ubi, UBI_LAYOUT_VOL_ID, i);
+		if (err) {
+			mutex_unlock(&ubi->vtbl_mutex);
+			return err;
+		}
+		err = ubi_eba_write_leb(ubi, UBI_LAYOUT_VOL_ID, i, ubi->vtbl, 0,
+					ubi->vtbl_size, UBI_LONGTERM);
+		if (err) {
+			mutex_unlock(&ubi->vtbl_mutex);
+			return err;
+		}
+	}
+
+	paranoid_vtbl_check(ubi);
+	mutex_unlock(&ubi->vtbl_mutex);
+	return ubi_wl_flush(ubi);
+}
+
+/**
+ * vol_til_check - check if volume table is not corrupted and contains sensible
+ * data.
+ *
+ * @ubi: UBI device description object
+ * @vtbl: volume table
+ *
+ * This function returns zero if @vtbl is all right, %1 if CRC is incorrect,
+ * and %-EINVAL if it contains inconsistent data.
+ */
+static int vtbl_check(const struct ubi_device *ubi,
+		      const struct ubi_vtbl_record *vtbl)
+{
+	int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len;
+	int upd_marker;
+	uint32_t crc;
+	const char *name;
+
+	for (i = 0; i < ubi->vtbl_slots; i++) {
+		cond_resched();
+
+		reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs);
+		alignment = ubi32_to_cpu(vtbl[i].alignment);
+		data_pad = ubi32_to_cpu(vtbl[i].data_pad);
+		upd_marker = vtbl[i].upd_marker;
+		vol_type = vtbl[i].vol_type;
+		name_len = ubi16_to_cpu(vtbl[i].name_len);
+		name = &vtbl[i].name[0];
+
+		crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC);
+		if (ubi32_to_cpu(vtbl[i].crc) != crc) {
+			ubi_err("bad CRC at record %u: %#08x, not %#08x",
+				 i, crc, ubi32_to_cpu(vtbl[i].crc));
+			ubi_dbg_dump_vtbl_record(&vtbl[i], i);
+			return 1;
+		}
+
+		if (reserved_pebs == 0) {
+			if (memcmp(&vtbl[i], &empty_vtbl_record,
+						UBI_VTBL_RECORD_SIZE)) {
+				dbg_err("bad empty record");
+				goto bad;
+			}
+			continue;
+		}
+
+		if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 ||
+		    name_len < 0) {
+			dbg_err("negative values");
+			goto bad;
+		}
+
+		if (alignment > ubi->leb_size || alignment == 0) {
+			dbg_err("bad alignment");
+			goto bad;
+		}
+
+		n = alignment % ubi->min_io_size;
+		if (alignment != 1 && n) {
+			dbg_err("alignment is not multiple of min I/O unit");
+			goto bad;
+		}
+
+		n = ubi->leb_size % alignment;
+		if (data_pad != n) {
+			dbg_err("bad data_pad, has to be %d", n);
+			goto bad;
+		}
+
+		if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
+			dbg_err("bad vol_type");
+			goto bad;
+		}
+
+		if (upd_marker != 0 && upd_marker != 1) {
+			dbg_err("bad upd_marker");
+			goto bad;
+		}
+
+		if (reserved_pebs > ubi->good_peb_count) {
+			dbg_err("too large reserved_pebs, good PEBs %d",
+				ubi->good_peb_count);
+			goto bad;
+		}
+
+		if (name_len > UBI_VOL_NAME_MAX) {
+			dbg_err("too long volume name, max %d",
+				UBI_VOL_NAME_MAX);
+			goto bad;
+		}
+
+		if (name[0] == '\0') {
+			dbg_err("NULL volume name");
+			goto bad;
+		}
+
+		if (name_len != strnlen(name, name_len + 1)) {
+			dbg_err("bad name_len");
+			goto bad;
+		}
+	}
+
+	/* Checks that all names are unique */
+	for (i = 0; i < ubi->vtbl_slots - 1; i++) {
+		for (n = i + 1; n < ubi->vtbl_slots; n++) {
+			int len1 = ubi16_to_cpu(vtbl[i].name_len);
+			int len2 = ubi16_to_cpu(vtbl[n].name_len);
+
+			if (len1 > 0 && len1 == len2 &&
+			    !strncmp(vtbl[i].name, vtbl[n].name, len1)) {
+				ubi_err("volumes %d and %d have the same name"
+					" \"%s\"", i, n, vtbl[i].name);
+				ubi_dbg_dump_vtbl_record(&vtbl[i], i);
+				ubi_dbg_dump_vtbl_record(&vtbl[n], n);
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+
+bad:
+	ubi_err("volume table check failed, record %d", i);
+	ubi_dbg_dump_vtbl_record(&vtbl[i], i);
+	return -EINVAL;
+}
+
+/**
+ * create_vtbl - create a copy of volume table.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @copy: number of the volume table copy
+ * @vtbl: contents of the volume table
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int create_vtbl(const struct ubi_device *ubi, struct ubi_scan_info *si,
+		       int copy, void *vtbl)
+{
+	int err, tries = 0;
+	static struct ubi_vid_hdr *vid_hdr;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *new_seb, *old_seb = NULL;
+
+	ubi_msg("create volume table (copy #%d)", copy + 1);
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr)
+		return -ENOMEM;
+
+	/*
+	 * Check if there is a logical eraseblock which would have to contain
+	 * this volume table copy was found during scanning. It has to be wiped
+	 * out.
+	 */
+	sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOL_ID);
+	if (sv)
+		old_seb = ubi_scan_find_seb(sv, copy);
+
+retry:
+	new_seb = ubi_scan_get_free_peb(ubi, si);
+	if (IS_ERR(new_seb)) {
+		err = PTR_ERR(new_seb);
+		goto out_free;
+	}
+
+	vid_hdr->vol_type = UBI_VID_DYNAMIC;
+	vid_hdr->vol_id = cpu_to_ubi32(UBI_LAYOUT_VOL_ID);
+	vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT;
+	vid_hdr->data_size = vid_hdr->used_ebs =
+			     vid_hdr->data_pad = cpu_to_ubi32(0);
+	vid_hdr->lnum = cpu_to_ubi32(copy);
+	vid_hdr->sqnum = cpu_to_ubi64(++si->max_sqnum);
+	vid_hdr->leb_ver = cpu_to_ubi32(old_seb ? old_seb->leb_ver + 1: 0);
+
+	/* The EC header is already there, write the VID header */
+	err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
+	if (err)
+		goto write_error;
+
+	/* Write the layout volume contents */
+	err = ubi_io_write_data(ubi, vtbl, new_seb->pnum, 0, ubi->vtbl_size);
+	if (err)
+		goto write_error;
+
+	/*
+	 * And add it to the scanning information. Don't delete the old
+	 * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'.
+	 */
+	err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec,
+				vid_hdr, 0);
+	kfree(new_seb);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return err;
+
+write_error:
+	kfree(new_seb);
+	/* May be this physical eraseblock went bad, try to pick another one */
+	if (++tries <= 5) {
+		err = ubi_scan_add_to_list(si, new_seb->pnum, new_seb->ec,
+					   &si->corr);
+		if (!err)
+			goto retry;
+	}
+out_free:
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return err;
+
+}
+
+/**
+ * process_lvol - process the layout volume.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @sv: layout volume scanning information
+ *
+ * This function is responsible for reading the layout volume, ensuring it is
+ * not corrupted, and recovering from corruptions if needed. Returns volume
+ * table in case of success and a negative error code in case of failure.
+ */
+static struct ubi_vtbl_record *process_lvol(const struct ubi_device *ubi,
+					    struct ubi_scan_info *si,
+					    struct ubi_scan_volume *sv)
+{
+	int err;
+	struct rb_node *rb;
+	struct ubi_scan_leb *seb;
+	struct ubi_vtbl_record *leb[UBI_LAYOUT_VOLUME_EBS] = { NULL, NULL };
+	int leb_corrupted[UBI_LAYOUT_VOLUME_EBS] = {1, 1};
+
+	/*
+	 * UBI goes through the following steps when it changes the layout
+	 * volume:
+	 * a. erase LEB 0;
+	 * b. write new data to LEB 0;
+	 * c. erase LEB 1;
+	 * d. write new data to LEB 1.
+	 *
+	 * Before the change, both LEBs contain the same data.
+	 *
+	 * Due to unclean reboots, the contents of LEB 0 may be lost, but there
+	 * should LEB 1. So it is OK if LEB 0 is corrupted while LEB 1 is not.
+	 * Similarly, LEB 1 may be lost, but there should be LEB 0. And
+	 * finally, unclean reboots may result in a situation when neither LEB
+	 * 0 nor LEB 1 are corrupted, but they are different. In this case, LEB
+	 * 0 contains more recent information.
+	 *
+	 * So the plan is to first check LEB 0. Then
+	 * a. if LEB 0 is OK, it must be containing the most resent data; then
+	 *    we compare it with LEB 1, and if they are different, we copy LEB
+	 *    0 to LEB 1;
+	 * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1
+	 *    to LEB 0.
+	 */
+
+	dbg_msg("check layout volume");
+
+	/* Read both LEB 0 and LEB 1 into memory */
+	ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
+		leb[seb->lnum] = kzalloc(ubi->vtbl_size, GFP_KERNEL);
+		if (!leb[seb->lnum]) {
+			err = -ENOMEM;
+			goto out_free;
+		}
+
+		err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
+				       ubi->vtbl_size);
+		if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
+			/* Scrub the PEB later */
+			seb->scrub = 1;
+		else if (err)
+			goto out_free;
+	}
+
+	err = -EINVAL;
+	if (leb[0]) {
+		leb_corrupted[0] = vtbl_check(ubi, leb[0]);
+		if (leb_corrupted[0] < 0)
+			goto out_free;
+	}
+
+	if (!leb_corrupted[0]) {
+		/* LEB 0 is OK */
+		if (leb[1])
+			leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size);
+		if (leb_corrupted[1]) {
+			ubi_warn("volume table copy #2 is corrupted");
+			err = create_vtbl(ubi, si, 1, leb[0]);
+			if (err)
+				goto out_free;
+			ubi_msg("volume table was restored");
+		}
+
+		/* Both LEB 1 and LEB 2 are OK and consistent */
+		kfree(leb[1]);
+		return leb[0];
+	} else {
+		/* LEB 0 is corrupted or does not exist */
+		if (leb[1]) {
+			leb_corrupted[1] = vtbl_check(ubi, leb[1]);
+			if (leb_corrupted[1] < 0)
+				goto out_free;
+		}
+		if (leb_corrupted[1]) {
+			/* Both LEB 0 and LEB 1 are corrupted */
+			ubi_err("both volume tables are corrupted");
+			goto out_free;
+		}
+
+		ubi_warn("volume table copy #1 is corrupted");
+		err = create_vtbl(ubi, si, 0, leb[1]);
+		if (err)
+			goto out_free;
+		ubi_msg("volume table was restored");
+
+		kfree(leb[0]);
+		return leb[1];
+	}
+
+out_free:
+	kfree(leb[0]);
+	kfree(leb[1]);
+	return ERR_PTR(err);
+}
+
+/**
+ * create_empty_lvol - create empty layout volume.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function returns volume table contents in case of success and a
+ * negative error code in case of failure.
+ */
+static struct ubi_vtbl_record *create_empty_lvol(const struct ubi_device *ubi,
+						 struct ubi_scan_info *si)
+{
+	int i;
+	struct ubi_vtbl_record *vtbl;
+
+	vtbl = kzalloc(ubi->vtbl_size, GFP_KERNEL);
+	if (!vtbl)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < ubi->vtbl_slots; i++)
+		memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE);
+
+	for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
+		int err;
+
+		err = create_vtbl(ubi, si, i, vtbl);
+		if (err) {
+			kfree(vtbl);
+			return ERR_PTR(err);
+		}
+	}
+
+	return vtbl;
+}
+
+/**
+ * init_volumes - initialize volume information for existing volumes.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ * @vtbl: volume table
+ *
+ * This function allocates volume description objects for existing volumes.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
+			const struct ubi_vtbl_record *vtbl)
+{
+	int i, reserved_pebs = 0;
+	struct ubi_scan_volume *sv;
+	struct ubi_volume *vol;
+
+	for (i = 0; i < ubi->vtbl_slots; i++) {
+		cond_resched();
+
+		if (ubi32_to_cpu(vtbl[i].reserved_pebs) == 0)
+			continue; /* Empty record */
+
+		vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
+		if (!vol)
+			return -ENOMEM;
+
+		vol->reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs);
+		vol->alignment = ubi32_to_cpu(vtbl[i].alignment);
+		vol->data_pad = ubi32_to_cpu(vtbl[i].data_pad);
+		vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ?
+					UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME;
+		vol->name_len = ubi16_to_cpu(vtbl[i].name_len);
+		vol->usable_leb_size = ubi->leb_size - vol->data_pad;
+		memcpy(vol->name, vtbl[i].name, vol->name_len);
+		vol->name[vol->name_len] = '\0';
+		vol->vol_id = i;
+
+		ubi_assert(!ubi->volumes[i]);
+		ubi->volumes[i] = vol;
+		ubi->vol_count += 1;
+		vol->ubi = ubi;
+		reserved_pebs += vol->reserved_pebs;
+
+		/*
+		 * In case of dynamic volume UBI knows nothing about how many
+		 * data is stored there. So assume the whole volume is used.
+		 */
+		if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
+			vol->used_ebs = vol->reserved_pebs;
+			vol->last_eb_bytes = vol->usable_leb_size;
+			vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+			continue;
+		}
+
+		/* Static volumes only */
+		sv = ubi_scan_find_sv(si, i);
+		if (!sv) {
+			/*
+			 * No eraseblocks belonging to this volume found. We
+			 * don't actually know whether this static volume is
+			 * completely corrupted or just contains no data. And
+			 * we cannot know this as long as data size is not
+			 * stored on flash. So we just assume the volume is
+			 * empty. FIXME: this should be handled.
+			 */
+			continue;
+		}
+
+		if (sv->leb_count != sv->used_ebs) {
+			/*
+			 * We found a static volume which misses several
+			 * eraseblocks. Treat it as corrupted.
+			 */
+			ubi_warn("static volume %d misses %d LEBs - corrupted",
+				 sv->vol_id, sv->used_ebs - sv->leb_count);
+			vol->corrupted = 1;
+			continue;
+		}
+
+		vol->used_ebs = sv->used_ebs;
+		vol->used_bytes = (vol->used_ebs - 1) * vol->usable_leb_size;
+		vol->used_bytes += sv->last_data_size;
+		vol->last_eb_bytes = sv->last_data_size;
+	}
+
+	vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
+	if (!vol)
+		return -ENOMEM;
+
+	vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS;
+	vol->alignment = 1;
+	vol->vol_type = UBI_DYNAMIC_VOLUME;
+	vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1;
+	memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1);
+	vol->usable_leb_size = ubi->leb_size;
+	vol->used_ebs = vol->reserved_pebs;
+	vol->last_eb_bytes = vol->reserved_pebs;
+	vol->used_bytes = vol->used_ebs * (ubi->leb_size - vol->data_pad);
+	vol->vol_id = UBI_LAYOUT_VOL_ID;
+
+	ubi_assert(!ubi->volumes[i]);
+	ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol;
+	reserved_pebs += vol->reserved_pebs;
+	ubi->vol_count += 1;
+	vol->ubi = ubi;
+
+	if (reserved_pebs > ubi->avail_pebs)
+		ubi_err("not enough PEBs, required %d, available %d",
+			reserved_pebs, ubi->avail_pebs);
+	ubi->rsvd_pebs += reserved_pebs;
+	ubi->avail_pebs -= reserved_pebs;
+
+	return 0;
+}
+
+/**
+ * check_sv - check volume scanning information.
+ * @vol: UBI volume description object
+ * @sv: volume scanning information
+ *
+ * This function returns zero if the volume scanning information is consistent
+ * to the data read from the volume tabla, and %-EINVAL if not.
+ */
+static int check_sv(const struct ubi_volume *vol,
+		    const struct ubi_scan_volume *sv)
+{
+	if (sv->highest_lnum >= vol->reserved_pebs) {
+		dbg_err("bad highest_lnum");
+		goto bad;
+	}
+	if (sv->leb_count > vol->reserved_pebs) {
+		dbg_err("bad leb_count");
+		goto bad;
+	}
+	if (sv->vol_type != vol->vol_type) {
+		dbg_err("bad vol_type");
+		goto bad;
+	}
+	if (sv->used_ebs > vol->reserved_pebs) {
+		dbg_err("bad used_ebs");
+		goto bad;
+	}
+	if (sv->data_pad != vol->data_pad) {
+		dbg_err("bad data_pad");
+		goto bad;
+	}
+	return 0;
+
+bad:
+	ubi_err("bad scanning information");
+	ubi_dbg_dump_sv(sv);
+	ubi_dbg_dump_vol_info(vol);
+	return -EINVAL;
+}
+
+/**
+ * check_scanning_info - check that scanning information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * Even though we protect on-flash data by CRC checksums, we still don't trust
+ * the media. This function ensures that scanning information is consistent to
+ * the information read from the volume table. Returns zero if the scanning
+ * information is OK and %-EINVAL if it is not.
+ */
+static int check_scanning_info(const struct ubi_device *ubi,
+			       struct ubi_scan_info *si)
+{
+	int err, i;
+	struct ubi_scan_volume *sv;
+	struct ubi_volume *vol;
+
+	if (si->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) {
+		ubi_err("scanning found %d volumes, maximum is %d + %d",
+			si->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots);
+		return -EINVAL;
+	}
+
+	if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&&
+	    si->highest_vol_id < UBI_INTERNAL_VOL_START) {
+		ubi_err("too large volume ID %d found by scanning",
+			si->highest_vol_id);
+		return -EINVAL;
+	}
+
+
+	for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
+		cond_resched();
+
+		sv = ubi_scan_find_sv(si, i);
+		vol = ubi->volumes[i];
+		if (!vol) {
+			if (sv)
+				ubi_scan_rm_volume(si, sv);
+			continue;
+		}
+
+		if (vol->reserved_pebs == 0) {
+			ubi_assert(i < ubi->vtbl_slots);
+
+			if (!sv)
+				continue;
+
+			/*
+			 * During scanning we found a volume which does not
+			 * exist according to the information in the volume
+			 * table. This must have happened due to an unclean
+			 * reboot while the volume was being removed. Discard
+			 * these eraseblocks.
+			 */
+			ubi_msg("finish volume %d removal", sv->vol_id);
+			ubi_scan_rm_volume(si, sv);
+		} else if (sv) {
+			err = check_sv(vol, sv);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ubi_read_volume_table - read volume table.
+ * information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function reads volume table, checks it, recover from errors if needed,
+ * or creates it if needed. Returns zero in case of success and a negative
+ * error code in case of failure.
+ */
+int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
+{
+	int i, err;
+	struct ubi_scan_volume *sv;
+
+	empty_vtbl_record.crc = cpu_to_ubi32(0xf116c36b);
+
+	/*
+	 * The number of supported volumes is limited by the eraseblock size
+	 * and by the UBI_MAX_VOLUMES constant.
+	 */
+	ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE;
+	if (ubi->vtbl_slots > UBI_MAX_VOLUMES)
+		ubi->vtbl_slots = UBI_MAX_VOLUMES;
+
+	ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE;
+	ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size);
+
+	sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOL_ID);
+	if (!sv) {
+		/*
+		 * No logical eraseblocks belonging to the layout volume were
+		 * found. This could mean that the flash is just empty. In
+		 * this case we create empty layout volume.
+		 *
+		 * But if flash is not empty this must be a corruption or the
+		 * MTD device just contains garbage.
+		 */
+		if (si->is_empty) {
+			ubi->vtbl = create_empty_lvol(ubi, si);
+			if (IS_ERR(ubi->vtbl))
+				return PTR_ERR(ubi->vtbl);
+		} else {
+			ubi_err("the layout volume was not found");
+			return -EINVAL;
+		}
+	} else {
+		if (sv->leb_count > UBI_LAYOUT_VOLUME_EBS) {
+			/* This must not happen with proper UBI images */
+			dbg_err("too many LEBs (%d) in layout volume",
+				sv->leb_count);
+			return -EINVAL;
+		}
+
+		ubi->vtbl = process_lvol(ubi, si, sv);
+		if (IS_ERR(ubi->vtbl))
+			return PTR_ERR(ubi->vtbl);
+	}
+
+	ubi->avail_pebs = ubi->good_peb_count;
+
+	/*
+	 * The layout volume is OK, initialize the corresponding in-RAM data
+	 * structures.
+	 */
+	err = init_volumes(ubi, si, ubi->vtbl);
+	if (err)
+		goto out_free;
+
+	/*
+	 * Get sure that the scanning information is consistent to the
+	 * information stored in the volume table.
+	 */
+	err = check_scanning_info(ubi, si);
+	if (err)
+		goto out_free;
+
+	return 0;
+
+out_free:
+	kfree(ubi->vtbl);
+	for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++)
+		if (ubi->volumes[i]) {
+			kfree(ubi->volumes[i]);
+			ubi->volumes[i] = NULL;
+		}
+	return err;
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+
+/**
+ * paranoid_vtbl_check - check volume table.
+ * @ubi: UBI device description object
+ */
+static void paranoid_vtbl_check(const struct ubi_device *ubi)
+{
+	if (vtbl_check(ubi, ubi->vtbl)) {
+		ubi_err("paranoid check failed");
+		BUG();
+	}
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
new file mode 100644
index 00000000000..9ecaf77eca9
--- /dev/null
+++ b/drivers/mtd/ubi/wl.c
@@ -0,0 +1,1671 @@
+/*
+ * Copyright (c) International Business Machines Corp., 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner
+ */
+
+/*
+ * UBI wear-leveling unit.
+ *
+ * This unit is responsible for wear-leveling. It works in terms of physical
+ * eraseblocks and erase counters and knows nothing about logical eraseblocks,
+ * volumes, etc. From this unit's perspective all physical eraseblocks are of
+ * two types - used and free. Used physical eraseblocks are those that were
+ * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are
+ * those that were put by the 'ubi_wl_put_peb()' function.
+ *
+ * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
+ * header. The rest of the physical eraseblock contains only 0xFF bytes.
+ *
+ * When physical eraseblocks are returned to the WL unit by means of the
+ * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
+ * done asynchronously in context of the per-UBI device background thread,
+ * which is also managed by the WL unit.
+ *
+ * The wear-leveling is ensured by means of moving the contents of used
+ * physical eraseblocks with low erase counter to free physical eraseblocks
+ * with high erase counter.
+ *
+ * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
+ * an "optimal" physical eraseblock. For example, when it is known that the
+ * physical eraseblock will be "put" soon because it contains short-term data,
+ * the WL unit may pick a free physical eraseblock with low erase counter, and
+ * so forth.
+ *
+ * If the WL unit fails to erase a physical eraseblock, it marks it as bad.
+ *
+ * This unit is also responsible for scrubbing. If a bit-flip is detected in a
+ * physical eraseblock, it has to be moved. Technically this is the same as
+ * moving it for wear-leveling reasons.
+ *
+ * As it was said, for the UBI unit all physical eraseblocks are either "free"
+ * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used
+ * eraseblocks are kept in a set of different RB-trees: @wl->used,
+ * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
+ *
+ * Note, in this implementation, we keep a small in-RAM object for each physical
+ * eraseblock. This is surely not a scalable solution. But it appears to be good
+ * enough for moderately large flashes and it is simple. In future, one may
+ * re-work this unit and make it more scalable.
+ *
+ * At the moment this unit does not utilize the sequence number, which was
+ * introduced relatively recently. But it would be wise to do this because the
+ * sequence number of a logical eraseblock characterizes how old is it. For
+ * example, when we move a PEB with low erase counter, and we need to pick the
+ * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
+ * pick target PEB with an average EC if our PEB is not very "old". This is a
+ * room for future re-works of the WL unit.
+ *
+ * FIXME: looks too complex, should be simplified (later).
+ */
+
+#include <linux/slab.h>
+#include <linux/crc32.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include "ubi.h"
+
+/* Number of physical eraseblocks reserved for wear-leveling purposes */
+#define WL_RESERVED_PEBS 1
+
+/*
+ * How many erase cycles are short term, unknown, and long term physical
+ * eraseblocks protected.
+ */
+#define ST_PROTECTION 16
+#define U_PROTECTION  10
+#define LT_PROTECTION 4
+
+/*
+ * Maximum difference between two erase counters. If this threshold is
+ * exceeded, the WL unit starts moving data from used physical eraseblocks with
+ * low erase counter to free physical eraseblocks with high erase counter.
+ */
+#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
+
+/*
+ * When a physical eraseblock is moved, the WL unit has to pick the target
+ * physical eraseblock to move to. The simplest way would be just to pick the
+ * one with the highest erase counter. But in certain workloads this could lead
+ * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
+ * situation when the picked physical eraseblock is constantly erased after the
+ * data is written to it. So, we have a constant which limits the highest erase
+ * counter of the free physical eraseblock to pick. Namely, the WL unit does
+ * not pick eraseblocks with erase counter greater then the lowest erase
+ * counter plus %WL_FREE_MAX_DIFF.
+ */
+#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
+
+/*
+ * Maximum number of consecutive background thread failures which is enough to
+ * switch to read-only mode.
+ */
+#define WL_MAX_FAILURES 32
+
+/**
+ * struct ubi_wl_entry - wear-leveling entry.
+ * @rb: link in the corresponding RB-tree
+ * @ec: erase counter
+ * @pnum: physical eraseblock number
+ *
+ * Each physical eraseblock has a corresponding &struct wl_entry object which
+ * may be kept in different RB-trees.
+ */
+struct ubi_wl_entry {
+	struct rb_node rb;
+	int ec;
+	int pnum;
+};
+
+/**
+ * struct ubi_wl_prot_entry - PEB protection entry.
+ * @rb_pnum: link in the @wl->prot.pnum RB-tree
+ * @rb_aec: link in the @wl->prot.aec RB-tree
+ * @abs_ec: the absolute erase counter value when the protection ends
+ * @e: the wear-leveling entry of the physical eraseblock under protection
+ *
+ * When the WL unit returns a physical eraseblock, the physical eraseblock is
+ * protected from being moved for some "time". For this reason, the physical
+ * eraseblock is not directly moved from the @wl->free tree to the @wl->used
+ * tree. There is one more tree in between where this physical eraseblock is
+ * temporarily stored (@wl->prot).
+ *
+ * All this protection stuff is needed because:
+ *  o we don't want to move physical eraseblocks just after we have given them
+ *    to the user; instead, we first want to let users fill them up with data;
+ *
+ *  o there is a chance that the user will put the physical eraseblock very
+ *    soon, so it makes sense not to move it for some time, but wait; this is
+ *    especially important in case of "short term" physical eraseblocks.
+ *
+ * Physical eraseblocks stay protected only for limited time. But the "time" is
+ * measured in erase cycles in this case. This is implemented with help of the
+ * absolute erase counter (@wl->abs_ec). When it reaches certain value, the
+ * physical eraseblocks are moved from the protection trees (@wl->prot.*) to
+ * the @wl->used tree.
+ *
+ * Protected physical eraseblocks are searched by physical eraseblock number
+ * (when they are put) and by the absolute erase counter (to check if it is
+ * time to move them to the @wl->used tree). So there are actually 2 RB-trees
+ * storing the protected physical eraseblocks: @wl->prot.pnum and
+ * @wl->prot.aec. They are referred to as the "protection" trees. The
+ * first one is indexed by the physical eraseblock number. The second one is
+ * indexed by the absolute erase counter. Both trees store
+ * &struct ubi_wl_prot_entry objects.
+ *
+ * Each physical eraseblock has 2 main states: free and used. The former state
+ * corresponds to the @wl->free tree. The latter state is split up on several
+ * sub-states:
+ * o the WL movement is allowed (@wl->used tree);
+ * o the WL movement is temporarily prohibited (@wl->prot.pnum and
+ * @wl->prot.aec trees);
+ * o scrubbing is needed (@wl->scrub tree).
+ *
+ * Depending on the sub-state, wear-leveling entries of the used physical
+ * eraseblocks may be kept in one of those trees.
+ */
+struct ubi_wl_prot_entry {
+	struct rb_node rb_pnum;
+	struct rb_node rb_aec;
+	unsigned long long abs_ec;
+	struct ubi_wl_entry *e;
+};
+
+/**
+ * struct ubi_work - UBI work description data structure.
+ * @list: a link in the list of pending works
+ * @func: worker function
+ * @priv: private data of the worker function
+ *
+ * @e: physical eraseblock to erase
+ * @torture: if the physical eraseblock has to be tortured
+ *
+ * The @func pointer points to the worker function. If the @cancel argument is
+ * not zero, the worker has to free the resources and exit immediately. The
+ * worker has to return zero in case of success and a negative error code in
+ * case of failure.
+ */
+struct ubi_work {
+	struct list_head list;
+	int (*func)(struct ubi_device *ubi, struct ubi_work *wrk, int cancel);
+	/* The below fields are only relevant to erasure works */
+	struct ubi_wl_entry *e;
+	int torture;
+};
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+static int paranoid_check_ec(const struct ubi_device *ubi, int pnum, int ec);
+static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
+				     struct rb_root *root);
+#else
+#define paranoid_check_ec(ubi, pnum, ec) 0
+#define paranoid_check_in_wl_tree(e, root)
+#endif
+
+/* Slab cache for wear-leveling entries */
+static struct kmem_cache *wl_entries_slab;
+
+/**
+ * tree_empty - a helper function to check if an RB-tree is empty.
+ * @root: the root of the tree
+ *
+ * This function returns non-zero if the RB-tree is empty and zero if not.
+ */
+static inline int tree_empty(struct rb_root *root)
+{
+	return root->rb_node == NULL;
+}
+
+/**
+ * wl_tree_add - add a wear-leveling entry to a WL RB-tree.
+ * @e: the wear-leveling entry to add
+ * @root: the root of the tree
+ *
+ * Note, we use (erase counter, physical eraseblock number) pairs as keys in
+ * the @ubi->used and @ubi->free RB-trees.
+ */
+static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
+{
+	struct rb_node **p, *parent = NULL;
+
+	p = &root->rb_node;
+	while (*p) {
+		struct ubi_wl_entry *e1;
+
+		parent = *p;
+		e1 = rb_entry(parent, struct ubi_wl_entry, rb);
+
+		if (e->ec < e1->ec)
+			p = &(*p)->rb_left;
+		else if (e->ec > e1->ec)
+			p = &(*p)->rb_right;
+		else {
+			ubi_assert(e->pnum != e1->pnum);
+			if (e->pnum < e1->pnum)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+		}
+	}
+
+	rb_link_node(&e->rb, parent, p);
+	rb_insert_color(&e->rb, root);
+}
+
+
+/*
+ * Helper functions to add and delete wear-leveling entries from different
+ * trees.
+ */
+
+static void free_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e)
+{
+	wl_tree_add(e, &ubi->free);
+}
+static inline void used_tree_add(struct ubi_device *ubi,
+				 struct ubi_wl_entry *e)
+{
+	wl_tree_add(e, &ubi->used);
+}
+static inline void scrub_tree_add(struct ubi_device *ubi,
+				  struct ubi_wl_entry *e)
+{
+	wl_tree_add(e, &ubi->scrub);
+}
+static inline void free_tree_del(struct ubi_device *ubi,
+				 struct ubi_wl_entry *e)
+{
+	paranoid_check_in_wl_tree(e, &ubi->free);
+	rb_erase(&e->rb, &ubi->free);
+}
+static inline void used_tree_del(struct ubi_device *ubi,
+				 struct ubi_wl_entry *e)
+{
+	paranoid_check_in_wl_tree(e, &ubi->used);
+	rb_erase(&e->rb, &ubi->used);
+}
+static inline void scrub_tree_del(struct ubi_device *ubi,
+				  struct ubi_wl_entry *e)
+{
+	paranoid_check_in_wl_tree(e, &ubi->scrub);
+	rb_erase(&e->rb, &ubi->scrub);
+}
+
+/**
+ * do_work - do one pending work.
+ * @ubi: UBI device description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int do_work(struct ubi_device *ubi)
+{
+	int err;
+	struct ubi_work *wrk;
+
+	spin_lock(&ubi->wl_lock);
+
+	if (list_empty(&ubi->works)) {
+		spin_unlock(&ubi->wl_lock);
+		return 0;
+	}
+
+	wrk = list_entry(ubi->works.next, struct ubi_work, list);
+	list_del(&wrk->list);
+	spin_unlock(&ubi->wl_lock);
+
+	/*
+	 * Call the worker function. Do not touch the work structure
+	 * after this call as it will have been freed or reused by that
+	 * time by the worker function.
+	 */
+	err = wrk->func(ubi, wrk, 0);
+	if (err)
+		ubi_err("work failed with error code %d", err);
+
+	spin_lock(&ubi->wl_lock);
+	ubi->works_count -= 1;
+	ubi_assert(ubi->works_count >= 0);
+	spin_unlock(&ubi->wl_lock);
+	return err;
+}
+
+/**
+ * produce_free_peb - produce a free physical eraseblock.
+ * @ubi: UBI device description object
+ *
+ * This function tries to make a free PEB by means of synchronous execution of
+ * pending works. This may be needed if, for example the background thread is
+ * disabled. Returns zero in case of success and a negative error code in case
+ * of failure.
+ */
+static int produce_free_peb(struct ubi_device *ubi)
+{
+	int err;
+
+	spin_lock(&ubi->wl_lock);
+	while (tree_empty(&ubi->free)) {
+		spin_unlock(&ubi->wl_lock);
+
+		dbg_wl("do one work synchronously");
+		err = do_work(ubi);
+		if (err)
+			return err;
+
+		spin_lock(&ubi->wl_lock);
+	}
+	spin_unlock(&ubi->wl_lock);
+
+	return 0;
+}
+
+/**
+ * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree.
+ * @e: the wear-leveling entry to check
+ * @root: the root of the tree
+ *
+ * This function returns non-zero if @e is in the @root RB-tree and zero if it
+ * is not.
+ */
+static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
+{
+	struct rb_node *p;
+
+	p = root->rb_node;
+	while (p) {
+		struct ubi_wl_entry *e1;
+
+		e1 = rb_entry(p, struct ubi_wl_entry, rb);
+
+		if (e->pnum == e1->pnum) {
+			ubi_assert(e == e1);
+			return 1;
+		}
+
+		if (e->ec < e1->ec)
+			p = p->rb_left;
+		else if (e->ec > e1->ec)
+			p = p->rb_right;
+		else {
+			ubi_assert(e->pnum != e1->pnum);
+			if (e->pnum < e1->pnum)
+				p = p->rb_left;
+			else
+				p = p->rb_right;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * prot_tree_add - add physical eraseblock to protection trees.
+ * @ubi: UBI device description object
+ * @e: the physical eraseblock to add
+ * @pe: protection entry object to use
+ * @abs_ec: absolute erase counter value when this physical eraseblock has
+ * to be removed from the protection trees.
+ *
+ * @wl->lock has to be locked.
+ */
+static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e,
+			  struct ubi_wl_prot_entry *pe, int abs_ec)
+{
+	struct rb_node **p, *parent = NULL;
+	struct ubi_wl_prot_entry *pe1;
+
+	pe->e = e;
+	pe->abs_ec = ubi->abs_ec + abs_ec;
+
+	p = &ubi->prot.pnum.rb_node;
+	while (*p) {
+		parent = *p;
+		pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum);
+
+		if (e->pnum < pe1->e->pnum)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&pe->rb_pnum, parent, p);
+	rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum);
+
+	p = &ubi->prot.aec.rb_node;
+	parent = NULL;
+	while (*p) {
+		parent = *p;
+		pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec);
+
+		if (pe->abs_ec < pe1->abs_ec)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&pe->rb_aec, parent, p);
+	rb_insert_color(&pe->rb_aec, &ubi->prot.aec);
+}
+
+/**
+ * find_wl_entry - find wear-leveling entry closest to certain erase counter.
+ * @root: the RB-tree where to look for
+ * @max: highest possible erase counter
+ *
+ * This function looks for a wear leveling entry with erase counter closest to
+ * @max and less then @max.
+ */
+static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
+{
+	struct rb_node *p;
+	struct ubi_wl_entry *e;
+
+	e = rb_entry(rb_first(root), struct ubi_wl_entry, rb);
+	max += e->ec;
+
+	p = root->rb_node;
+	while (p) {
+		struct ubi_wl_entry *e1;
+
+		e1 = rb_entry(p, struct ubi_wl_entry, rb);
+		if (e1->ec >= max)
+			p = p->rb_left;
+		else {
+			p = p->rb_right;
+			e = e1;
+		}
+	}
+
+	return e;
+}
+
+/**
+ * ubi_wl_get_peb - get a physical eraseblock.
+ * @ubi: UBI device description object
+ * @dtype: type of data which will be stored in this physical eraseblock
+ *
+ * This function returns a physical eraseblock in case of success and a
+ * negative error code in case of failure. Might sleep.
+ */
+int ubi_wl_get_peb(struct ubi_device *ubi, int dtype)
+{
+	int err, protect, medium_ec;
+	struct ubi_wl_entry *e, *first, *last;
+	struct ubi_wl_prot_entry *pe;
+
+	ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM ||
+		   dtype == UBI_UNKNOWN);
+
+	pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_KERNEL);
+	if (!pe)
+		return -ENOMEM;
+
+retry:
+	spin_lock(&ubi->wl_lock);
+	if (tree_empty(&ubi->free)) {
+		if (ubi->works_count == 0) {
+			ubi_assert(list_empty(&ubi->works));
+			ubi_err("no free eraseblocks");
+			spin_unlock(&ubi->wl_lock);
+			kfree(pe);
+			return -ENOSPC;
+		}
+		spin_unlock(&ubi->wl_lock);
+
+		err = produce_free_peb(ubi);
+		if (err < 0) {
+			kfree(pe);
+			return err;
+		}
+		goto retry;
+	}
+
+	switch (dtype) {
+		case UBI_LONGTERM:
+			/*
+			 * For long term data we pick a physical eraseblock
+			 * with high erase counter. But the highest erase
+			 * counter we can pick is bounded by the the lowest
+			 * erase counter plus %WL_FREE_MAX_DIFF.
+			 */
+			e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+			protect = LT_PROTECTION;
+			break;
+		case UBI_UNKNOWN:
+			/*
+			 * For unknown data we pick a physical eraseblock with
+			 * medium erase counter. But we by no means can pick a
+			 * physical eraseblock with erase counter greater or
+			 * equivalent than the lowest erase counter plus
+			 * %WL_FREE_MAX_DIFF.
+			 */
+			first = rb_entry(rb_first(&ubi->free),
+					 struct ubi_wl_entry, rb);
+			last = rb_entry(rb_last(&ubi->free),
+					struct ubi_wl_entry, rb);
+
+			if (last->ec - first->ec < WL_FREE_MAX_DIFF)
+				e = rb_entry(ubi->free.rb_node,
+						struct ubi_wl_entry, rb);
+			else {
+				medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
+				e = find_wl_entry(&ubi->free, medium_ec);
+			}
+			protect = U_PROTECTION;
+			break;
+		case UBI_SHORTTERM:
+			/*
+			 * For short term data we pick a physical eraseblock
+			 * with the lowest erase counter as we expect it will
+			 * be erased soon.
+			 */
+			e = rb_entry(rb_first(&ubi->free),
+				     struct ubi_wl_entry, rb);
+			protect = ST_PROTECTION;
+			break;
+		default:
+			protect = 0;
+			e = NULL;
+			BUG();
+	}
+
+	/*
+	 * Move the physical eraseblock to the protection trees where it will
+	 * be protected from being moved for some time.
+	 */
+	free_tree_del(ubi, e);
+	prot_tree_add(ubi, e, pe, protect);
+
+	dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect);
+	spin_unlock(&ubi->wl_lock);
+
+	return e->pnum;
+}
+
+/**
+ * prot_tree_del - remove a physical eraseblock from the protection trees
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock to remove
+ */
+static void prot_tree_del(struct ubi_device *ubi, int pnum)
+{
+	struct rb_node *p;
+	struct ubi_wl_prot_entry *pe = NULL;
+
+	p = ubi->prot.pnum.rb_node;
+	while (p) {
+
+		pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum);
+
+		if (pnum == pe->e->pnum)
+			break;
+
+		if (pnum < pe->e->pnum)
+			p = p->rb_left;
+		else
+			p = p->rb_right;
+	}
+
+	ubi_assert(pe->e->pnum == pnum);
+	rb_erase(&pe->rb_aec, &ubi->prot.aec);
+	rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
+	kfree(pe);
+}
+
+/**
+ * sync_erase - synchronously erase a physical eraseblock.
+ * @ubi: UBI device description object
+ * @e: the the physical eraseblock to erase
+ * @torture: if the physical eraseblock has to be tortured
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture)
+{
+	int err;
+	struct ubi_ec_hdr *ec_hdr;
+	unsigned long long ec = e->ec;
+
+	dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
+
+	err = paranoid_check_ec(ubi, e->pnum, e->ec);
+	if (err > 0)
+		return -EINVAL;
+
+	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+	if (!ec_hdr)
+		return -ENOMEM;
+
+	err = ubi_io_sync_erase(ubi, e->pnum, torture);
+	if (err < 0)
+		goto out_free;
+
+	ec += err;
+	if (ec > UBI_MAX_ERASECOUNTER) {
+		/*
+		 * Erase counter overflow. Upgrade UBI and use 64-bit
+		 * erase counters internally.
+		 */
+		ubi_err("erase counter overflow at PEB %d, EC %llu",
+			e->pnum, ec);
+		err = -EINVAL;
+		goto out_free;
+	}
+
+	dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
+
+	ec_hdr->ec = cpu_to_ubi64(ec);
+
+	err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
+	if (err)
+		goto out_free;
+
+	e->ec = ec;
+	spin_lock(&ubi->wl_lock);
+	if (e->ec > ubi->max_ec)
+		ubi->max_ec = e->ec;
+	spin_unlock(&ubi->wl_lock);
+
+out_free:
+	kfree(ec_hdr);
+	return err;
+}
+
+/**
+ * check_protection_over - check if it is time to stop protecting some
+ * physical eraseblocks.
+ * @ubi: UBI device description object
+ *
+ * This function is called after each erase operation, when the absolute erase
+ * counter is incremented, to check if some physical eraseblock  have not to be
+ * protected any longer. These physical eraseblocks are moved from the
+ * protection trees to the used tree.
+ */
+static void check_protection_over(struct ubi_device *ubi)
+{
+	struct ubi_wl_prot_entry *pe;
+
+	/*
+	 * There may be several protected physical eraseblock to remove,
+	 * process them all.
+	 */
+	while (1) {
+		spin_lock(&ubi->wl_lock);
+		if (tree_empty(&ubi->prot.aec)) {
+			spin_unlock(&ubi->wl_lock);
+			break;
+		}
+
+		pe = rb_entry(rb_first(&ubi->prot.aec),
+			      struct ubi_wl_prot_entry, rb_aec);
+
+		if (pe->abs_ec > ubi->abs_ec) {
+			spin_unlock(&ubi->wl_lock);
+			break;
+		}
+
+		dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu",
+		       pe->e->pnum, ubi->abs_ec, pe->abs_ec);
+		rb_erase(&pe->rb_aec, &ubi->prot.aec);
+		rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
+		used_tree_add(ubi, pe->e);
+		spin_unlock(&ubi->wl_lock);
+
+		kfree(pe);
+		cond_resched();
+	}
+}
+
+/**
+ * schedule_ubi_work - schedule a work.
+ * @ubi: UBI device description object
+ * @wrk: the work to schedule
+ *
+ * This function enqueues a work defined by @wrk to the tail of the pending
+ * works list.
+ */
+static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
+{
+	spin_lock(&ubi->wl_lock);
+	list_add_tail(&wrk->list, &ubi->works);
+	ubi_assert(ubi->works_count >= 0);
+	ubi->works_count += 1;
+	if (ubi->thread_enabled)
+		wake_up_process(ubi->bgt_thread);
+	spin_unlock(&ubi->wl_lock);
+}
+
+static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
+			int cancel);
+
+/**
+ * schedule_erase - schedule an erase work.
+ * @ubi: UBI device description object
+ * @e: the WL entry of the physical eraseblock to erase
+ * @torture: if the physical eraseblock has to be tortured
+ *
+ * This function returns zero in case of success and a %-ENOMEM in case of
+ * failure.
+ */
+static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
+			  int torture)
+{
+	struct ubi_work *wl_wrk;
+
+	dbg_wl("schedule erasure of PEB %d, EC %d, torture %d",
+	       e->pnum, e->ec, torture);
+
+	wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_KERNEL);
+	if (!wl_wrk)
+		return -ENOMEM;
+
+	wl_wrk->func = &erase_worker;
+	wl_wrk->e = e;
+	wl_wrk->torture = torture;
+
+	schedule_ubi_work(ubi, wl_wrk);
+	return 0;
+}
+
+/**
+ * wear_leveling_worker - wear-leveling worker function.
+ * @ubi: UBI device description object
+ * @wrk: the work object
+ * @cancel: non-zero if the worker has to free memory and exit
+ *
+ * This function copies a more worn out physical eraseblock to a less worn out
+ * one. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
+				int cancel)
+{
+	int err, put = 0;
+	struct ubi_wl_entry *e1, *e2;
+	struct ubi_vid_hdr *vid_hdr;
+
+	kfree(wrk);
+
+	if (cancel)
+		return 0;
+
+	vid_hdr = ubi_zalloc_vid_hdr(ubi);
+	if (!vid_hdr)
+		return -ENOMEM;
+
+	spin_lock(&ubi->wl_lock);
+
+	/*
+	 * Only one WL worker at a time is supported at this implementation, so
+	 * make sure a PEB is not being moved already.
+	 */
+	if (ubi->move_to || tree_empty(&ubi->free) ||
+	    (tree_empty(&ubi->used) && tree_empty(&ubi->scrub))) {
+		/*
+		 * Only one WL worker at a time is supported at this
+		 * implementation, so if a LEB is already being moved, cancel.
+		 *
+		 * No free physical eraseblocks? Well, we cancel wear-leveling
+		 * then. It will be triggered again when a free physical
+		 * eraseblock appears.
+		 *
+		 * No used physical eraseblocks? They must be temporarily
+		 * protected from being moved. They will be moved to the
+		 * @ubi->used tree later and the wear-leveling will be
+		 * triggered again.
+		 */
+		dbg_wl("cancel WL, a list is empty: free %d, used %d",
+		       tree_empty(&ubi->free), tree_empty(&ubi->used));
+		ubi->wl_scheduled = 0;
+		spin_unlock(&ubi->wl_lock);
+		ubi_free_vid_hdr(ubi, vid_hdr);
+		return 0;
+	}
+
+	if (tree_empty(&ubi->scrub)) {
+		/*
+		 * Now pick the least worn-out used physical eraseblock and a
+		 * highly worn-out free physical eraseblock. If the erase
+		 * counters differ much enough, start wear-leveling.
+		 */
+		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb);
+		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+
+		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
+			dbg_wl("no WL needed: min used EC %d, max free EC %d",
+			       e1->ec, e2->ec);
+			ubi->wl_scheduled = 0;
+			spin_unlock(&ubi->wl_lock);
+			ubi_free_vid_hdr(ubi, vid_hdr);
+			return 0;
+		}
+		used_tree_del(ubi, e1);
+		dbg_wl("move PEB %d EC %d to PEB %d EC %d",
+		       e1->pnum, e1->ec, e2->pnum, e2->ec);
+	} else {
+		e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb);
+		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+		scrub_tree_del(ubi, e1);
+		dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
+	}
+
+	free_tree_del(ubi, e2);
+	ubi_assert(!ubi->move_from && !ubi->move_to);
+	ubi_assert(!ubi->move_to_put && !ubi->move_from_put);
+	ubi->move_from = e1;
+	ubi->move_to = e2;
+	spin_unlock(&ubi->wl_lock);
+
+	/*
+	 * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum.
+	 * We so far do not know which logical eraseblock our physical
+	 * eraseblock (@e1) belongs to. We have to read the volume identifier
+	 * header first.
+	 */
+
+	err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
+	if (err && err != UBI_IO_BITFLIPS) {
+		if (err == UBI_IO_PEB_FREE) {
+			/*
+			 * We are trying to move PEB without a VID header. UBI
+			 * always write VID headers shortly after the PEB was
+			 * given, so we have a situation when it did not have
+			 * chance to write it down because it was preempted.
+			 * Just re-schedule the work, so that next time it will
+			 * likely have the VID header in place.
+			 */
+			dbg_wl("PEB %d has no VID header", e1->pnum);
+			err = 0;
+		} else {
+			ubi_err("error %d while reading VID header from PEB %d",
+				err, e1->pnum);
+			if (err > 0)
+				err = -EIO;
+		}
+		goto error;
+	}
+
+	err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
+	if (err) {
+		if (err == UBI_IO_BITFLIPS)
+			err = 0;
+		goto error;
+	}
+
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	spin_lock(&ubi->wl_lock);
+	if (!ubi->move_to_put)
+		used_tree_add(ubi, e2);
+	else
+		put = 1;
+	ubi->move_from = ubi->move_to = NULL;
+	ubi->move_from_put = ubi->move_to_put = 0;
+	ubi->wl_scheduled = 0;
+	spin_unlock(&ubi->wl_lock);
+
+	if (put) {
+		/*
+		 * Well, the target PEB was put meanwhile, schedule it for
+		 * erasure.
+		 */
+		dbg_wl("PEB %d was put meanwhile, erase", e2->pnum);
+		err = schedule_erase(ubi, e2, 0);
+		if (err) {
+			kmem_cache_free(wl_entries_slab, e2);
+			ubi_ro_mode(ubi);
+		}
+	}
+
+	err = schedule_erase(ubi, e1, 0);
+	if (err) {
+		kmem_cache_free(wl_entries_slab, e1);
+		ubi_ro_mode(ubi);
+	}
+
+	dbg_wl("done");
+	return err;
+
+	/*
+	 * Some error occurred. @e1 was not changed, so return it back. @e2
+	 * might be changed, schedule it for erasure.
+	 */
+error:
+	if (err)
+		dbg_wl("error %d occurred, cancel operation", err);
+	ubi_assert(err <= 0);
+
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	spin_lock(&ubi->wl_lock);
+	ubi->wl_scheduled = 0;
+	if (ubi->move_from_put)
+		put = 1;
+	else
+		used_tree_add(ubi, e1);
+	ubi->move_from = ubi->move_to = NULL;
+	ubi->move_from_put = ubi->move_to_put = 0;
+	spin_unlock(&ubi->wl_lock);
+
+	if (put) {
+		/*
+		 * Well, the target PEB was put meanwhile, schedule it for
+		 * erasure.
+		 */
+		dbg_wl("PEB %d was put meanwhile, erase", e1->pnum);
+		err = schedule_erase(ubi, e1, 0);
+		if (err) {
+			kmem_cache_free(wl_entries_slab, e1);
+			ubi_ro_mode(ubi);
+		}
+	}
+
+	err = schedule_erase(ubi, e2, 0);
+	if (err) {
+		kmem_cache_free(wl_entries_slab, e2);
+		ubi_ro_mode(ubi);
+	}
+
+	yield();
+	return err;
+}
+
+/**
+ * ensure_wear_leveling - schedule wear-leveling if it is needed.
+ * @ubi: UBI device description object
+ *
+ * This function checks if it is time to start wear-leveling and schedules it
+ * if yes. This function returns zero in case of success and a negative error
+ * code in case of failure.
+ */
+static int ensure_wear_leveling(struct ubi_device *ubi)
+{
+	int err = 0;
+	struct ubi_wl_entry *e1;
+	struct ubi_wl_entry *e2;
+	struct ubi_work *wrk;
+
+	spin_lock(&ubi->wl_lock);
+	if (ubi->wl_scheduled)
+		/* Wear-leveling is already in the work queue */
+		goto out_unlock;
+
+	/*
+	 * If the ubi->scrub tree is not empty, scrubbing is needed, and the
+	 * the WL worker has to be scheduled anyway.
+	 */
+	if (tree_empty(&ubi->scrub)) {
+		if (tree_empty(&ubi->used) || tree_empty(&ubi->free))
+			/* No physical eraseblocks - no deal */
+			goto out_unlock;
+
+		/*
+		 * We schedule wear-leveling only if the difference between the
+		 * lowest erase counter of used physical eraseblocks and a high
+		 * erase counter of free physical eraseblocks is greater then
+		 * %UBI_WL_THRESHOLD.
+		 */
+		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb);
+		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
+
+		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
+			goto out_unlock;
+		dbg_wl("schedule wear-leveling");
+	} else
+		dbg_wl("schedule scrubbing");
+
+	ubi->wl_scheduled = 1;
+	spin_unlock(&ubi->wl_lock);
+
+	wrk = kmalloc(sizeof(struct ubi_work), GFP_KERNEL);
+	if (!wrk) {
+		err = -ENOMEM;
+		goto out_cancel;
+	}
+
+	wrk->func = &wear_leveling_worker;
+	schedule_ubi_work(ubi, wrk);
+	return err;
+
+out_cancel:
+	spin_lock(&ubi->wl_lock);
+	ubi->wl_scheduled = 0;
+out_unlock:
+	spin_unlock(&ubi->wl_lock);
+	return err;
+}
+
+/**
+ * erase_worker - physical eraseblock erase worker function.
+ * @ubi: UBI device description object
+ * @wl_wrk: the work object
+ * @cancel: non-zero if the worker has to free memory and exit
+ *
+ * This function erases a physical eraseblock and perform torture testing if
+ * needed. It also takes care about marking the physical eraseblock bad if
+ * needed. Returns zero in case of success and a negative error code in case of
+ * failure.
+ */
+static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
+			int cancel)
+{
+	int err;
+	struct ubi_wl_entry *e = wl_wrk->e;
+	int pnum = e->pnum;
+
+	if (cancel) {
+		dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
+		kfree(wl_wrk);
+		kmem_cache_free(wl_entries_slab, e);
+		return 0;
+	}
+
+	dbg_wl("erase PEB %d EC %d", pnum, e->ec);
+
+	err = sync_erase(ubi, e, wl_wrk->torture);
+	if (!err) {
+		/* Fine, we've erased it successfully */
+		kfree(wl_wrk);
+
+		spin_lock(&ubi->wl_lock);
+		ubi->abs_ec += 1;
+		free_tree_add(ubi, e);
+		spin_unlock(&ubi->wl_lock);
+
+		/*
+		 * One more erase operation has happened, take care about protected
+		 * physical eraseblocks.
+		 */
+		check_protection_over(ubi);
+
+		/* And take care about wear-leveling */
+		err = ensure_wear_leveling(ubi);
+		return err;
+	}
+
+	kfree(wl_wrk);
+	kmem_cache_free(wl_entries_slab, e);
+
+	if (err != -EIO) {
+		/*
+		 * If this is not %-EIO, we have no idea what to do. Scheduling
+		 * this physical eraseblock for erasure again would cause
+		 * errors again and again. Well, lets switch to RO mode.
+		 */
+		ubi_ro_mode(ubi);
+		return err;
+	}
+
+	/* It is %-EIO, the PEB went bad */
+
+	if (!ubi->bad_allowed) {
+		ubi_err("bad physical eraseblock %d detected", pnum);
+		ubi_ro_mode(ubi);
+		err = -EIO;
+	} else {
+		int need;
+
+		spin_lock(&ubi->volumes_lock);
+		need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1;
+		if (need > 0) {
+			need = ubi->avail_pebs >= need ? need : ubi->avail_pebs;
+			ubi->avail_pebs -= need;
+			ubi->rsvd_pebs += need;
+			ubi->beb_rsvd_pebs += need;
+			if (need > 0)
+				ubi_msg("reserve more %d PEBs", need);
+		}
+
+		if (ubi->beb_rsvd_pebs == 0) {
+			spin_unlock(&ubi->volumes_lock);
+			ubi_err("no reserved physical eraseblocks");
+			ubi_ro_mode(ubi);
+			return -EIO;
+		}
+
+		spin_unlock(&ubi->volumes_lock);
+		ubi_msg("mark PEB %d as bad", pnum);
+
+		err = ubi_io_mark_bad(ubi, pnum);
+		if (err) {
+			ubi_ro_mode(ubi);
+			return err;
+		}
+
+		spin_lock(&ubi->volumes_lock);
+		ubi->beb_rsvd_pebs -= 1;
+		ubi->bad_peb_count += 1;
+		ubi->good_peb_count -= 1;
+		ubi_calculate_reserved(ubi);
+		if (ubi->beb_rsvd_pebs == 0)
+			ubi_warn("last PEB from the reserved pool was used");
+		spin_unlock(&ubi->volumes_lock);
+	}
+
+	return err;
+}
+
+/**
+ * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling
+ * unit.
+ * @ubi: UBI device description object
+ * @pnum: physical eraseblock to return
+ * @torture: if this physical eraseblock has to be tortured
+ *
+ * This function is called to return physical eraseblock @pnum to the pool of
+ * free physical eraseblocks. The @torture flag has to be set if an I/O error
+ * occurred to this @pnum and it has to be tested. This function returns zero
+ * in case of success and a negative error code in case of failure.
+ */
+int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
+{
+	int err;
+	struct ubi_wl_entry *e;
+
+	dbg_wl("PEB %d", pnum);
+	ubi_assert(pnum >= 0);
+	ubi_assert(pnum < ubi->peb_count);
+
+	spin_lock(&ubi->wl_lock);
+
+	e = ubi->lookuptbl[pnum];
+	if (e == ubi->move_from) {
+		/*
+		 * User is putting the physical eraseblock which was selected to
+		 * be moved. It will be scheduled for erasure in the
+		 * wear-leveling worker.
+		 */
+		dbg_wl("PEB %d is being moved", pnum);
+		ubi_assert(!ubi->move_from_put);
+		ubi->move_from_put = 1;
+		spin_unlock(&ubi->wl_lock);
+		return 0;
+	} else if (e == ubi->move_to) {
+		/*
+		 * User is putting the physical eraseblock which was selected
+		 * as the target the data is moved to. It may happen if the EBA
+		 * unit already re-mapped the LEB but the WL unit did has not
+		 * put the PEB to the "used" tree.
+		 */
+		dbg_wl("PEB %d is the target of data moving", pnum);
+		ubi_assert(!ubi->move_to_put);
+		ubi->move_to_put = 1;
+		spin_unlock(&ubi->wl_lock);
+		return 0;
+	} else {
+		if (in_wl_tree(e, &ubi->used))
+			used_tree_del(ubi, e);
+		else if (in_wl_tree(e, &ubi->scrub))
+			scrub_tree_del(ubi, e);
+		else
+			prot_tree_del(ubi, e->pnum);
+	}
+	spin_unlock(&ubi->wl_lock);
+
+	err = schedule_erase(ubi, e, torture);
+	if (err) {
+		spin_lock(&ubi->wl_lock);
+		used_tree_add(ubi, e);
+		spin_unlock(&ubi->wl_lock);
+	}
+
+	return err;
+}
+
+/**
+ * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock to schedule
+ *
+ * If a bit-flip in a physical eraseblock is detected, this physical eraseblock
+ * needs scrubbing. This function schedules a physical eraseblock for
+ * scrubbing which is done in background. This function returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
+{
+	struct ubi_wl_entry *e;
+
+	ubi_msg("schedule PEB %d for scrubbing", pnum);
+
+retry:
+	spin_lock(&ubi->wl_lock);
+	e = ubi->lookuptbl[pnum];
+	if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) {
+		spin_unlock(&ubi->wl_lock);
+		return 0;
+	}
+
+	if (e == ubi->move_to) {
+		/*
+		 * This physical eraseblock was used to move data to. The data
+		 * was moved but the PEB was not yet inserted to the proper
+		 * tree. We should just wait a little and let the WL worker
+		 * proceed.
+		 */
+		spin_unlock(&ubi->wl_lock);
+		dbg_wl("the PEB %d is not in proper tree, retry", pnum);
+		yield();
+		goto retry;
+	}
+
+	if (in_wl_tree(e, &ubi->used))
+		used_tree_del(ubi, e);
+	else
+		prot_tree_del(ubi, pnum);
+
+	scrub_tree_add(ubi, e);
+	spin_unlock(&ubi->wl_lock);
+
+	/*
+	 * Technically scrubbing is the same as wear-leveling, so it is done
+	 * by the WL worker.
+	 */
+	return ensure_wear_leveling(ubi);
+}
+
+/**
+ * ubi_wl_flush - flush all pending works.
+ * @ubi: UBI device description object
+ *
+ * This function returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+int ubi_wl_flush(struct ubi_device *ubi)
+{
+	int err, pending_count;
+
+	pending_count = ubi->works_count;
+
+	dbg_wl("flush (%d pending works)", pending_count);
+
+	/*
+	 * Erase while the pending works queue is not empty, but not more then
+	 * the number of currently pending works.
+	 */
+	while (pending_count-- > 0) {
+		err = do_work(ubi);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * tree_destroy - destroy an RB-tree.
+ * @root: the root of the tree to destroy
+ */
+static void tree_destroy(struct rb_root *root)
+{
+	struct rb_node *rb;
+	struct ubi_wl_entry *e;
+
+	rb = root->rb_node;
+	while (rb) {
+		if (rb->rb_left)
+			rb = rb->rb_left;
+		else if (rb->rb_right)
+			rb = rb->rb_right;
+		else {
+			e = rb_entry(rb, struct ubi_wl_entry, rb);
+
+			rb = rb_parent(rb);
+			if (rb) {
+				if (rb->rb_left == &e->rb)
+					rb->rb_left = NULL;
+				else
+					rb->rb_right = NULL;
+			}
+
+			kmem_cache_free(wl_entries_slab, e);
+		}
+	}
+}
+
+/**
+ * ubi_thread - UBI background thread.
+ * @u: the UBI device description object pointer
+ */
+static int ubi_thread(void *u)
+{
+	int failures = 0;
+	struct ubi_device *ubi = u;
+
+	ubi_msg("background thread \"%s\" started, PID %d",
+		ubi->bgt_name, current->pid);
+
+	for (;;) {
+		int err;
+
+		if (kthread_should_stop())
+			goto out;
+
+		if (try_to_freeze())
+			continue;
+
+		spin_lock(&ubi->wl_lock);
+		if (list_empty(&ubi->works) || ubi->ro_mode ||
+			       !ubi->thread_enabled) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			spin_unlock(&ubi->wl_lock);
+			schedule();
+			continue;
+		}
+		spin_unlock(&ubi->wl_lock);
+
+		err = do_work(ubi);
+		if (err) {
+			ubi_err("%s: work failed with error code %d",
+				ubi->bgt_name, err);
+			if (failures++ > WL_MAX_FAILURES) {
+				/*
+				 * Too many failures, disable the thread and
+				 * switch to read-only mode.
+				 */
+				ubi_msg("%s: %d consecutive failures",
+					ubi->bgt_name, WL_MAX_FAILURES);
+				ubi_ro_mode(ubi);
+				break;
+			}
+		} else
+			failures = 0;
+
+		cond_resched();
+	}
+
+out:
+	dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
+	return 0;
+}
+
+/**
+ * cancel_pending - cancel all pending works.
+ * @ubi: UBI device description object
+ */
+static void cancel_pending(struct ubi_device *ubi)
+{
+	while (!list_empty(&ubi->works)) {
+		struct ubi_work *wrk;
+
+		wrk = list_entry(ubi->works.next, struct ubi_work, list);
+		list_del(&wrk->list);
+		wrk->func(ubi, wrk, 1);
+		ubi->works_count -= 1;
+		ubi_assert(ubi->works_count >= 0);
+	}
+}
+
+/**
+ * ubi_wl_init_scan - initialize the wear-leveling unit using scanning
+ * information.
+ * @ubi: UBI device description object
+ * @si: scanning information
+ *
+ * This function returns zero in case of success, and a negative error code in
+ * case of failure.
+ */
+int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
+{
+	int err;
+	struct rb_node *rb1, *rb2;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *seb, *tmp;
+	struct ubi_wl_entry *e;
+
+
+	ubi->used = ubi->free = ubi->scrub = RB_ROOT;
+	ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
+	spin_lock_init(&ubi->wl_lock);
+	ubi->max_ec = si->max_ec;
+	INIT_LIST_HEAD(&ubi->works);
+
+	sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num);
+
+	ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
+	if (IS_ERR(ubi->bgt_thread)) {
+		err = PTR_ERR(ubi->bgt_thread);
+		ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name,
+			err);
+		return err;
+	}
+
+	if (ubi_devices_cnt == 0) {
+		wl_entries_slab = kmem_cache_create("ubi_wl_entry_slab",
+						    sizeof(struct ubi_wl_entry),
+						    0, 0, NULL, NULL);
+		if (!wl_entries_slab)
+			return -ENOMEM;
+	}
+
+	err = -ENOMEM;
+	ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL);
+	if (!ubi->lookuptbl)
+		goto out_free;
+
+	list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {
+		cond_resched();
+
+		e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+		if (!e)
+			goto out_free;
+
+		e->pnum = seb->pnum;
+		e->ec = seb->ec;
+		ubi->lookuptbl[e->pnum] = e;
+		if (schedule_erase(ubi, e, 0)) {
+			kmem_cache_free(wl_entries_slab, e);
+			goto out_free;
+		}
+	}
+
+	list_for_each_entry(seb, &si->free, u.list) {
+		cond_resched();
+
+		e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+		if (!e)
+			goto out_free;
+
+		e->pnum = seb->pnum;
+		e->ec = seb->ec;
+		ubi_assert(e->ec >= 0);
+		free_tree_add(ubi, e);
+		ubi->lookuptbl[e->pnum] = e;
+	}
+
+	list_for_each_entry(seb, &si->corr, u.list) {
+		cond_resched();
+
+		e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+		if (!e)
+			goto out_free;
+
+		e->pnum = seb->pnum;
+		e->ec = seb->ec;
+		ubi->lookuptbl[e->pnum] = e;
+		if (schedule_erase(ubi, e, 0)) {
+			kmem_cache_free(wl_entries_slab, e);
+			goto out_free;
+		}
+	}
+
+	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
+		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
+			cond_resched();
+
+			e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+			if (!e)
+				goto out_free;
+
+			e->pnum = seb->pnum;
+			e->ec = seb->ec;
+			ubi->lookuptbl[e->pnum] = e;
+			if (!seb->scrub) {
+				dbg_wl("add PEB %d EC %d to the used tree",
+				       e->pnum, e->ec);
+				used_tree_add(ubi, e);
+			} else {
+				dbg_wl("add PEB %d EC %d to the scrub tree",
+				       e->pnum, e->ec);
+				scrub_tree_add(ubi, e);
+			}
+		}
+	}
+
+	if (WL_RESERVED_PEBS > ubi->avail_pebs) {
+		ubi_err("no enough physical eraseblocks (%d, need %d)",
+			ubi->avail_pebs, WL_RESERVED_PEBS);
+		goto out_free;
+	}
+	ubi->avail_pebs -= WL_RESERVED_PEBS;
+	ubi->rsvd_pebs += WL_RESERVED_PEBS;
+
+	/* Schedule wear-leveling if needed */
+	err = ensure_wear_leveling(ubi);
+	if (err)
+		goto out_free;
+
+	return 0;
+
+out_free:
+	cancel_pending(ubi);
+	tree_destroy(&ubi->used);
+	tree_destroy(&ubi->free);
+	tree_destroy(&ubi->scrub);
+	kfree(ubi->lookuptbl);
+	if (ubi_devices_cnt == 0)
+		kmem_cache_destroy(wl_entries_slab);
+	return err;
+}
+
+/**
+ * protection_trees_destroy - destroy the protection RB-trees.
+ * @ubi: UBI device description object
+ */
+static void protection_trees_destroy(struct ubi_device *ubi)
+{
+	struct rb_node *rb;
+	struct ubi_wl_prot_entry *pe;
+
+	rb = ubi->prot.aec.rb_node;
+	while (rb) {
+		if (rb->rb_left)
+			rb = rb->rb_left;
+		else if (rb->rb_right)
+			rb = rb->rb_right;
+		else {
+			pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec);
+
+			rb = rb_parent(rb);
+			if (rb) {
+				if (rb->rb_left == &pe->rb_aec)
+					rb->rb_left = NULL;
+				else
+					rb->rb_right = NULL;
+			}
+
+			kmem_cache_free(wl_entries_slab, pe->e);
+			kfree(pe);
+		}
+	}
+}
+
+/**
+ * ubi_wl_close - close the wear-leveling unit.
+ * @ubi: UBI device description object
+ */
+void ubi_wl_close(struct ubi_device *ubi)
+{
+	dbg_wl("disable \"%s\"", ubi->bgt_name);
+	if (ubi->bgt_thread)
+		kthread_stop(ubi->bgt_thread);
+
+	dbg_wl("close the UBI wear-leveling unit");
+
+	cancel_pending(ubi);
+	protection_trees_destroy(ubi);
+	tree_destroy(&ubi->used);
+	tree_destroy(&ubi->free);
+	tree_destroy(&ubi->scrub);
+	kfree(ubi->lookuptbl);
+	if (ubi_devices_cnt == 1)
+		kmem_cache_destroy(wl_entries_slab);
+}
+
+#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
+
+/**
+ * paranoid_check_ec - make sure that the erase counter of a physical eraseblock
+ * is correct.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock number to check
+ * @ec: the erase counter to check
+ *
+ * This function returns zero if the erase counter of physical eraseblock @pnum
+ * is equivalent to @ec, %1 if not, and a negative error code if an error
+ * occurred.
+ */
+static int paranoid_check_ec(const struct ubi_device *ubi, int pnum, int ec)
+{
+	int err;
+	long long read_ec;
+	struct ubi_ec_hdr *ec_hdr;
+
+	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
+	if (!ec_hdr)
+		return -ENOMEM;
+
+	err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0);
+	if (err && err != UBI_IO_BITFLIPS) {
+		/* The header does not have to exist */
+		err = 0;
+		goto out_free;
+	}
+
+	read_ec = ubi64_to_cpu(ec_hdr->ec);
+	if (ec != read_ec) {
+		ubi_err("paranoid check failed for PEB %d", pnum);
+		ubi_err("read EC is %lld, should be %d", read_ec, ec);
+		ubi_dbg_dump_stack();
+		err = 1;
+	} else
+		err = 0;
+
+out_free:
+	kfree(ec_hdr);
+	return err;
+}
+
+/**
+ * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present
+ * in a WL RB-tree.
+ * @e: the wear-leveling entry to check
+ * @root: the root of the tree
+ *
+ * This function returns zero if @e is in the @root RB-tree and %1 if it
+ * is not.
+ */
+static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
+				     struct rb_root *root)
+{
+	if (in_wl_tree(e, root))
+		return 0;
+
+	ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ",
+		e->pnum, e->ec, root);
+	ubi_dbg_dump_stack();
+	return 1;
+}
+
+#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
diff --git a/drivers/net/3c501.c b/drivers/net/3c501.c
index 06e33786078..4bee99ba7db 100644
--- a/drivers/net/3c501.c
+++ b/drivers/net/3c501.c
@@ -735,7 +735,6 @@ static void el_receive(struct net_device *dev)
 	else
 	{
     		skb_reserve(skb,2);	/* Force 16 byte alignment */
-		skb->dev = dev;
 		/*
 		 *	The read increments through the bytes. The interrupt
 		 *	handler will fix the pointer when it returns to
diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c
index 702bfb2a5e9..e985a85a562 100644
--- a/drivers/net/3c505.c
+++ b/drivers/net/3c505.c
@@ -615,7 +615,6 @@ static void receive_packet(struct net_device *dev, int len)
 	if (test_and_set_bit(0, (void *) &adapter->dmaing))
 		printk(KERN_ERR "%s: rx blocked, DMA in progress, dir %d\n", dev->name, adapter->current_dma.direction);
 
-	skb->dev = dev;
 	adapter->current_dma.direction = 0;
 	adapter->current_dma.length = rlen;
 	adapter->current_dma.skb = skb;
@@ -1026,7 +1025,7 @@ static int send_packet(struct net_device *dev, struct sk_buff *skb)
 	adapter->current_dma.start_time = jiffies;
 
 	if ((unsigned long)(skb->data + nlen) >= MAX_DMA_ADDRESS || nlen != skb->len) {
-		memcpy(adapter->dma_buffer, skb->data, nlen);
+		skb_copy_from_linear_data(skb, adapter->dma_buffer, nlen);
 		memset(adapter->dma_buffer+skb->len, 0, nlen-skb->len);
 		target = isa_virt_to_bus(adapter->dma_buffer);
 	}
diff --git a/drivers/net/3c507.c b/drivers/net/3c507.c
index 54e1d5aebed..eed4299dc42 100644
--- a/drivers/net/3c507.c
+++ b/drivers/net/3c507.c
@@ -873,7 +873,6 @@ static void el16_rx(struct net_device *dev)
 			}
 
 			skb_reserve(skb,2);
-			skb->dev = dev;
 
 			/* 'skb->data' points to the start of sk_buff data area. */
 			memcpy_fromio(skb_put(skb,pkt_len), data_frame + 10, pkt_len);
diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c
index f791bf026e5..c7511c4d3b6 100644
--- a/drivers/net/3c509.c
+++ b/drivers/net/3c509.c
@@ -1091,7 +1091,6 @@ el3_rx(struct net_device *dev)
 				printk("Receiving packet size %d status %4.4x.\n",
 					   pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);     /* Align IP on 16 byte */
 
 				/* 'skb->data' points to the start of sk_buff data area. */
diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c
index c307ce66145..290166d5e7d 100644
--- a/drivers/net/3c515.c
+++ b/drivers/net/3c515.c
@@ -1292,7 +1292,6 @@ static int corkscrew_rx(struct net_device *dev)
 				printk("Receiving packet size %d status %4.4x.\n",
 				     pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				insl(ioaddr + RX_FIFO,
@@ -1363,7 +1362,6 @@ static int boomerang_rx(struct net_device *dev)
 			   copying to a properly sized skbuff. */
 			if (pkt_len < rx_copybreak
 			    && (skb = dev_alloc_skb(pkt_len + 4)) != 0) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				memcpy(skb_put(skb, pkt_len),
diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index 17d61eb0a7e..da1a22c1386 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -988,7 +988,6 @@ static void elmc_rcv_int(struct net_device *dev)
 				rbd->status = 0;
 				skb = (struct sk_buff *) dev_alloc_skb(totlen + 2);
 				if (skb != NULL) {
-					skb->dev = dev;
 					skb_reserve(skb, 2);	/* 16 byte alignment */
 					skb_put(skb,totlen);
 					eth_copy_and_sum(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen,0);
@@ -1146,7 +1145,7 @@ static int elmc_send_packet(struct sk_buff *skb, struct net_device *dev)
 
 	if (len != skb->len)
 		memset((char *) p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN);
-	memcpy((char *) p->xmit_cbuffs[p->xmit_count], (char *) (skb->data), skb->len);
+	skb_copy_from_linear_data(skb, (char *) p->xmit_cbuffs[p->xmit_count], skb->len);
 
 #if (NUM_XMIT_BUFFS == 1)
 #ifdef NO_NOPCOMMANDS
diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c
index 6c7437e60bd..c7b571be20e 100644
--- a/drivers/net/3c527.c
+++ b/drivers/net/3c527.c
@@ -1189,7 +1189,6 @@ static void mc32_rx_ring(struct net_device *dev)
 			}
 
 			skb->protocol=eth_type_trans(skb,dev);
-			skb->dev=dev;
 			dev->last_rx = jiffies;
  			lp->net_stats.rx_packets++;
  			lp->net_stats.rx_bytes += length;
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index b406ecfa726..80924f76dee 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -2414,7 +2414,6 @@ static int vortex_rx(struct net_device *dev)
 				printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
 					   pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				if (vp->bus_master &&
@@ -2491,7 +2490,6 @@ boomerang_rx(struct net_device *dev)
 			/* Check if the packet is long enough to just accept without
 			   copying to a properly sized skbuff. */
 			if (pkt_len < rx_copybreak && (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
 				/* 'skb_put()' points to the start of sk_buff data area. */
diff --git a/drivers/net/7990.c b/drivers/net/7990.c
index 1b3d11ed6cf..d396f996af5 100644
--- a/drivers/net/7990.c
+++ b/drivers/net/7990.c
@@ -331,7 +331,6 @@ static int lance_rx (struct net_device *dev)
                                 return 0;
                         }
 
-                        skb->dev = dev;
                         skb_reserve (skb, 2);           /* 16 byte align */
                         skb_put (skb, len);             /* make room */
                         eth_copy_and_sum(skb,
@@ -568,7 +567,7 @@ int lance_start_xmit (struct sk_buff *skb, struct net_device *dev)
 
     	if (skb->len < ETH_ZLEN)
     		memset((char *)&ib->tx_buf[entry][0], 0, ETH_ZLEN);
-        memcpy ((char *)&ib->tx_buf [entry][0], skb->data, skblen);
+        skb_copy_from_linear_data(skb, &ib->tx_buf[entry][0], skblen);
 
         /* Now, give the packet to the lance */
         ib->btx_ring [entry].tmd1_bits = (LE_T1_POK|LE_T1_OWN);
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 12c8453f44b..e8c9f27817b 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -573,7 +573,6 @@ rx_status_loop:
 		}
 
 		skb_reserve(new_skb, RX_OFFSET);
-		new_skb->dev = dev;
 
 		pci_unmap_single(cp->pdev, mapping,
 				 buflen, PCI_DMA_FROMDEVICE);
@@ -807,7 +806,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 		if (mss)
 			flags |= LargeSend | ((mss & MSSMask) << MSSShift);
 		else if (skb->ip_summed == CHECKSUM_PARTIAL) {
-			const struct iphdr *ip = skb->nh.iph;
+			const struct iphdr *ip = ip_hdr(skb);
 			if (ip->protocol == IPPROTO_TCP)
 				flags |= IPCS | TCPCS;
 			else if (ip->protocol == IPPROTO_UDP)
@@ -826,7 +825,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 		u32 first_len, first_eor;
 		dma_addr_t first_mapping;
 		int frag, first_entry = entry;
-		const struct iphdr *ip = skb->nh.iph;
+		const struct iphdr *ip = ip_hdr(skb);
 
 		/* We must give this initial chunk to the device last.
 		 * Otherwise we could race with the device.
@@ -1082,7 +1081,6 @@ static int cp_refill_rx (struct cp_private *cp)
 		if (!skb)
 			goto err_out;
 
-		skb->dev = cp->dev;
 		skb_reserve(skb, RX_OFFSET);
 
 		mapping = pci_map_single(cp->pdev, skb->data, cp->rx_buf_sz,
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index 99304b2aa86..a844b1fe2dc 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -1904,10 +1904,10 @@ static __inline__ void wrap_copy(struct sk_buff *skb, const unsigned char *ring,
 	u32 left = RX_BUF_LEN - offset;
 
 	if (size > left) {
-		memcpy(skb->data, ring + offset, left);
-		memcpy(skb->data+left, ring, size - left);
+		skb_copy_to_linear_data(skb, ring + offset, left);
+		skb_copy_to_linear_data_offset(skb, left, ring, size - left);
 	} else
-		memcpy(skb->data, ring + offset, size);
+		skb_copy_to_linear_data(skb, ring + offset, size);
 }
 #endif
 
@@ -2013,7 +2013,6 @@ no_early_rx:
 
 		skb = dev_alloc_skb (pkt_size + 2);
 		if (likely(skb)) {
-			skb->dev = dev;
 			skb_reserve (skb, 2);	/* 16 byte align the IP fields. */
 #if RX_BUF_IDX == 3
 			wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);
diff --git a/drivers/net/82596.c b/drivers/net/82596.c
index 640d7ca2ebc..3ff1155459a 100644
--- a/drivers/net/82596.c
+++ b/drivers/net/82596.c
@@ -830,7 +830,6 @@ memory_squeeze:
 				lp->stats.rx_dropped++;
 			}
 			else {
-				skb->dev = dev;
 				if (!rx_in_place) {
 					/* 16 byte align the data fields */
 					skb_reserve(skb, 2);
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 33af833667d..58527322a39 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -206,7 +206,7 @@ obj-$(CONFIG_TR) += tokenring/
 obj-$(CONFIG_WAN) += wan/
 obj-$(CONFIG_ARCNET) += arcnet/
 obj-$(CONFIG_NET_PCMCIA) += pcmcia/
-obj-$(CONFIG_NET_RADIO) += wireless/
+obj-y += wireless/
 obj-$(CONFIG_NET_TULIP) += tulip/
 obj-$(CONFIG_HAMRADIO) += hamradio/
 obj-$(CONFIG_IRDA) += irda/
diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index d76548e7535..1226cbba045 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -320,7 +320,6 @@ static int lance_rx (struct net_device *dev)
 				return 0;
 			}
 
-			skb->dev = dev;
 			skb_reserve (skb, 2);		/* 16 byte align */
 			skb_put (skb, len);		/* make room */
 			eth_copy_and_sum(skb,
@@ -599,7 +598,7 @@ static int lance_start_xmit (struct sk_buff *skb, struct net_device *dev)
 	ib->btx_ring [entry].length = (-len) | 0xf000;
 	ib->btx_ring [entry].misc = 0;
 
-	memcpy ((char *)&ib->tx_buf [entry][0], skb->data, skblen);
+	skb_copy_from_linear_data(skb, &ib->tx_buf [entry][0], skblen);
 
 	/* Clear the slack of the packet, do I need this? */
 	if (len != skblen)
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 7138e0e025b..7122b7ba8d6 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -2027,7 +2027,6 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
 		 */
 		csum = retdesc->tcp_udp_csum;
 
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 
 		/*
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index 962c954c2d5..675fe918421 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -798,9 +798,7 @@ static int amd8111e_rx_poll(struct net_device *dev, int * budget)
 			pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index],
 					 lp->rx_buff_len-2, PCI_DMA_FROMDEVICE);
 			skb_put(skb, pkt_len);
-			skb->dev = dev;
 			lp->rx_skbuff[rx_index] = new_skb;
-			new_skb->dev = dev;
 			lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev,
 								   new_skb->data,
 								   lp->rx_buff_len-2,
@@ -926,9 +924,7 @@ static int amd8111e_rx(struct net_device *dev)
 		pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index],
 			lp->rx_buff_len-2, PCI_DMA_FROMDEVICE);
 		skb_put(skb, pkt_len);
-		skb->dev = dev;
 		lp->rx_skbuff[rx_index] = new_skb;
-		new_skb->dev = dev;
 		lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev,
 			new_skb->data, lp->rx_buff_len-2,PCI_DMA_FROMDEVICE);
 
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index dba5e516545..da6ffa8cd81 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -853,9 +853,9 @@ static void cops_rx(struct net_device *dev)
                 return;
         }
 
-        skb->mac.raw    = skb->data;    /* Point to entire packet. */
+        skb_reset_mac_header(skb);    /* Point to entire packet. */
         skb_pull(skb,3);
-        skb->h.raw      = skb->data;    /* Point to data (Skip header). */
+        skb_reset_transport_header(skb);    /* Point to data (Skip header). */
 
         /* Update the counters. */
         lp->stats.rx_packets++;
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 2ea44ce4981..6a6cbd331a1 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -770,13 +770,13 @@ static int sendup_buffer (struct net_device *dev)
 	skb->data[0] = dnode;
 	skb->data[1] = snode;
 	skb->data[2] = llaptype;
-	skb->mac.raw = skb->data;	/* save pointer to llap header */
+	skb_reset_mac_header(skb);	/* save pointer to llap header */
 	skb_pull(skb,3);
 
 	/* copy ddp(s,e)hdr + contents */
-	memcpy(skb->data,(void*)ltdmabuf,len);
+	skb_copy_to_linear_data(skb, ltdmabuf, len);
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	stats->rx_packets++;
 	stats->rx_bytes+=skb->len;
@@ -917,13 +917,14 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	int i;
 	struct lt_sendlap cbuf;
+	unsigned char *hdr;
 
 	cbuf.command = LT_SENDLAP;
 	cbuf.dnode = skb->data[0];
 	cbuf.laptype = skb->data[2];
 	skb_pull(skb,3);	/* skip past LLAP header */
 	cbuf.length = skb->len;	/* this is host order */
-	skb->h.raw=skb->data;
+	skb_reset_transport_header(skb);
 
 	if(debug & DEBUG_UPPER) {
 		printk("command ");
@@ -932,11 +933,13 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 		printk("\n");
 	}
 
-	do_write(dev,&cbuf,sizeof(cbuf),skb->h.raw,skb->len);
+	hdr = skb_transport_header(skb);
+	do_write(dev, &cbuf, sizeof(cbuf), hdr, skb->len);
 
 	if(debug & DEBUG_UPPER) {
 		printk("sent %d ddp bytes\n",skb->len);
-		for(i=0;i<skb->len;i++) printk("%02x ",skb->h.raw[i]);
+		for (i = 0; i < skb->len; i++)
+			printk("%02x ", hdr[i]);
 		printk("\n");
 	}
 
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 6318814a11a..e0a18e7c73c 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -110,7 +110,7 @@ static void rx(struct net_device *dev, int bufnum,
 
 	pkt = (struct archdr *) skb->data;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ARC_HDR_SIZE);
 
 	/* up to sizeof(pkt->soft) has already been copied from the card */
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 83004fdab0a..681e20b8466 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -519,9 +519,12 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 		 * real header when we do rebuild_header.
 		 */
 		*(uint16_t *) skb_push(skb, 2) = type;
-		if (skb->nh.raw - skb->mac.raw != 2)
+		/*
+		 * XXX: Why not use skb->mac_len?
+		 */
+		if (skb->network_header - skb->mac_header != 2)
 			BUGMSG(D_NORMAL, "arcnet_header: Yikes!  diff (%d) is not 2!\n",
-			       (int)(skb->nh.raw - skb->mac.raw));
+			       (int)(skb->network_header - skb->mac_header));
 		return -2;	/* return error -- can't transmit yet! */
 	}
 	else {
@@ -554,11 +557,13 @@ static int arcnet_rebuild_header(struct sk_buff *skb)
 	unsigned short type;
 	uint8_t daddr=0;
 	struct ArcProto *proto;
-
-	if (skb->nh.raw - skb->mac.raw != 2) {
+	/*
+	 * XXX: Why not use skb->mac_len?
+	 */
+	if (skb->network_header - skb->mac_header != 2) {
 		BUGMSG(D_NORMAL,
-		     "rebuild_header: shouldn't be here! (hdrsize=%d)\n",
-		     (int)(skb->nh.raw - skb->mac.raw));
+		       "rebuild_header: shouldn't be here! (hdrsize=%d)\n",
+		       (int)(skb->network_header - skb->mac_header));
 		return 0;
 	}
 	type = *(uint16_t *) skb_pull(skb, 2);
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 66485585ab3..cc4610db639 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -122,10 +122,8 @@ static void rx(struct net_device *dev, int bufnum,
 	}
 	skb_put(skb, length + ARC_HDR_SIZE + sizeof(int));
 	skb->dev = dev;
-
-	pkt = (struct archdr *) skb->data;
-
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
+	pkt = (struct archdr *)skb_mac_header(skb);
 	skb_pull(skb, ARC_HDR_SIZE);
 
 	/* up to sizeof(pkt->soft) has already been copied from the card */
@@ -270,13 +268,13 @@ static int ack_tx(struct net_device *dev, int acked)
   skb_put(ackskb, length + ARC_HDR_SIZE );
   ackskb->dev = dev;
 
-  ackpkt = (struct archdr *) ackskb->data;
-
-  ackskb->mac.raw = ackskb->data;
+  skb_reset_mac_header(ackskb);
+  ackpkt = (struct archdr *)skb_mac_header(ackskb);
   /* skb_pull(ackskb, ARC_HDR_SIZE); */
 
 
-  memcpy(ackpkt, lp->outgoing.skb->data, ARC_HDR_SIZE+sizeof(struct arc_cap));
+  skb_copy_from_linear_data(lp->outgoing.skb, ackpkt,
+		ARC_HDR_SIZE + sizeof(struct arc_cap));
   ackpkt->soft.cap.proto=0; /* using protocol 0 for acknowledge */
   ackpkt->soft.cap.mes.ack=acked;
 
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index 6d6c69f036e..2de8877ece2 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -94,7 +94,7 @@ static unsigned short type_trans(struct sk_buff *skb, struct net_device *dev)
 	int hdr_size = ARC_HDR_SIZE + RFC1051_HDR_SIZE;
 
 	/* Pull off the arcnet header. */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, hdr_size);
 
 	if (pkt->hard.dest == 0)
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index bee34226abf..460a095000c 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -96,7 +96,7 @@ static unsigned short type_trans(struct sk_buff *skb, struct net_device *dev)
 	int hdr_size = ARC_HDR_SIZE + RFC1201_HDR_SIZE;
 
 	/* Pull off the arcnet header. */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, hdr_size);
 
 	if (pkt->hard.dest == 0)
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 9dfc09b181c..a0e68e71853 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -743,7 +743,6 @@ static int ariadne_rx(struct net_device *dev)
 	    }
 
 
-	    skb->dev = dev;
 	    skb_reserve(skb,2);		/* 16 byte align */
 	    skb_put(skb,pkt_len);	/* Make room */
 	    eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0);
diff --git a/drivers/net/arm/am79c961a.c b/drivers/net/arm/am79c961a.c
index ddd12d44ff2..8f0d7ce503c 100644
--- a/drivers/net/arm/am79c961a.c
+++ b/drivers/net/arm/am79c961a.c
@@ -526,7 +526,6 @@ am79c961_rx(struct net_device *dev, struct dev_priv *priv)
 		skb = dev_alloc_skb(len + 2);
 
 		if (skb) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 
 			am_readbuffer(dev, pktaddr, skb_put(skb, len), len);
diff --git a/drivers/net/arm/at91_ether.c b/drivers/net/arm/at91_ether.c
index 1621b8fe35c..152fa7a042b 100644
--- a/drivers/net/arm/at91_ether.c
+++ b/drivers/net/arm/at91_ether.c
@@ -858,7 +858,6 @@ static void at91ether_rx(struct net_device *dev)
 			skb_reserve(skb, 2);
 			memcpy(skb_put(skb, pktlen), p_recv, pktlen);
 
-			skb->dev = dev;
 			skb->protocol = eth_type_trans(skb, dev);
 			dev->last_rx = jiffies;
 			lp->stats.rx_bytes += pktlen;
diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index dd698b033a6..2438c5bff23 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -255,7 +255,6 @@ static int ep93xx_rx(struct net_device *dev, int *budget)
 
 		skb = dev_alloc_skb(length + 2);
 		if (likely(skb != NULL)) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 			dma_sync_single(NULL, ep->descs->rdesc[entry].buf_addr,
 						length, DMA_FROM_DEVICE);
diff --git a/drivers/net/arm/ether1.c b/drivers/net/arm/ether1.c
index a2921882eba..f075cebe84a 100644
--- a/drivers/net/arm/ether1.c
+++ b/drivers/net/arm/ether1.c
@@ -875,7 +875,6 @@ ether1_recv_done (struct net_device *dev)
 			skb = dev_alloc_skb (length + 2);
 
 			if (skb) {
-				skb->dev = dev;
 				skb_reserve (skb, 2);
 
 				ether1_readbuffer (dev, skb_put (skb, length), rbd.rbd_bufl, length);
diff --git a/drivers/net/arm/ether3.c b/drivers/net/arm/ether3.c
index 841178343a0..32da2eb9bce 100644
--- a/drivers/net/arm/ether3.c
+++ b/drivers/net/arm/ether3.c
@@ -661,7 +661,6 @@ if (next_ptr < RX_START || next_ptr >= RX_END) {
 			if (skb) {
 				unsigned char *buf;
 
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				buf = skb_put(skb, length);
 				ether3_readbuffer(dev, buf + 12, length - 12);
diff --git a/drivers/net/at1700.c b/drivers/net/at1700.c
index 56ae8babd91..bed8e0ebaf1 100644
--- a/drivers/net/at1700.c
+++ b/drivers/net/at1700.c
@@ -768,7 +768,6 @@ net_rx(struct net_device *dev)
 				lp->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = dev;
 			skb_reserve(skb,2);
 
 			insw(ioaddr + DATAPORT, skb_put(skb,pkt_len), (pkt_len + 1) >> 1);
diff --git a/drivers/net/atari_bionet.c b/drivers/net/atari_bionet.c
index 4e3bf6a1f22..3d87bd2b419 100644
--- a/drivers/net/atari_bionet.c
+++ b/drivers/net/atari_bionet.c
@@ -453,7 +453,8 @@ bionet_send_packet(struct sk_buff *skb, struct net_device *dev) {
 		stdma_lock(bionet_intr, NULL);
 		local_irq_restore(flags);
 		if( !STRAM_ADDR(buf+length-1) ) {
-			memcpy(nic_packet->buffer, skb->data, length);
+			skb_copy_from_linear_data(skb, nic_packet->buffer,
+						  length);
 			buf = (unsigned long)&((struct nic_pkt_s *)phys_nic_packet)->buffer;
 		}
 
@@ -544,13 +545,13 @@ bionet_poll_rx(struct net_device *dev) {
 				break;
 			}
 
-			skb->dev = dev;
 			skb_reserve( skb, 2 );		/* 16 Byte align  */
 			skb_put( skb, pkt_len );	/* make room */
 
 			/* 'skb->data' points to the start of sk_buff data area.
 			 */
-			memcpy(skb->data, nic_packet->buffer, pkt_len);
+			skb_copy_to_linear_data(skb, nic_packet->buffer,
+						pkt_len);
 			skb->protocol = eth_type_trans( skb, dev );
 			netif_rx(skb);
 			dev->last_rx = jiffies;
diff --git a/drivers/net/atari_pamsnet.c b/drivers/net/atari_pamsnet.c
index 3b543614928..54714409a09 100644
--- a/drivers/net/atari_pamsnet.c
+++ b/drivers/net/atari_pamsnet.c
@@ -717,7 +717,8 @@ pamsnet_send_packet(struct sk_buff *skb, struct net_device *dev) {
 
 		local_irq_restore(flags);
 		if( !STRAM_ADDR(buf+length-1) ) {
-			memcpy(nic_packet->buffer, skb->data, length);
+			skb_copy_from_linear_data(skb, nic_packet->buffer,
+						  length);
 			buf = (unsigned long)phys_nic_packet;
 		}
 
@@ -792,7 +793,8 @@ pamsnet_poll_rx(struct net_device *dev) {
 
 			/* 'skb->data' points to the start of sk_buff data area.
 			 */
-			memcpy(skb->data, nic_packet->buffer, pkt_len);
+			skb_copy_to_linear_data(skb, nic_packet->buffer,
+						pkt_len);
 			netif_rx(skb);
 			dev->last_rx = jiffies;
 			lp->stats.rx_packets++;
diff --git a/drivers/net/atarilance.c b/drivers/net/atarilance.c
index 7e37ac86a69..dfa8b9ba4c8 100644
--- a/drivers/net/atarilance.c
+++ b/drivers/net/atarilance.c
@@ -1047,7 +1047,6 @@ static int lance_rx( struct net_device *dev )
 						   pkt_len );
 				}
 
-				skb->dev = dev;
 				skb_reserve( skb, 2 );	/* 16 byte align */
 				skb_put( skb, pkt_len );	/* Make room */
 				lp->memcpy_f( skb->data, PKTBUF_ADDR(head), pkt_len );
diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 8606eac5bec..4b1d4d153ec 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -408,7 +408,6 @@ static void atl1_rx_checksum(struct atl1_adapter *adapter,
 static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
 {
 	struct atl1_rfd_ring *rfd_ring = &adapter->rfd_ring;
-	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
 	struct page *page;
 	unsigned long offset;
@@ -444,7 +443,6 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
 		 * the 14 byte MAC header is removed
 		 */
 		skb_reserve(skb, NET_IP_ALIGN);
-		skb->dev = netdev;
 
 		buffer_info->alloced = 1;
 		buffer_info->skb = skb;
@@ -1296,19 +1294,21 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 		}
 
 		if (skb->protocol == ntohs(ETH_P_IP)) {
-			skb->nh.iph->tot_len = 0;
-			skb->nh.iph->check = 0;
-			skb->h.th->check =
-			    ~csum_tcpudp_magic(skb->nh.iph->saddr,
-					       skb->nh.iph->daddr, 0,
-					       IPPROTO_TCP, 0);
-			ipofst = skb->nh.raw - skb->data;
+			struct iphdr *iph = ip_hdr(skb);
+
+			iph->tot_len = 0;
+			iph->check = 0;
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
+			ipofst = skb_network_offset(skb);
 			if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */
 				tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT;
 
-			tso->tsopl |= (skb->nh.iph->ihl &
+			tso->tsopl |= (iph->ihl &
 				CSUM_PARAM_IPHL_MASK) << CSUM_PARAM_IPHL_SHIFT;
-			tso->tsopl |= ((skb->h.th->doff << 2) &
+			tso->tsopl |= (tcp_hdrlen(skb) &
 				TSO_PARAM_TCPHDRLEN_MASK) << TSO_PARAM_TCPHDRLEN_SHIFT;
 			tso->tsopl |= (skb_shinfo(skb)->gso_size &
 				TSO_PARAM_MSS_MASK) << TSO_PARAM_MSS_SHIFT;
@@ -1327,8 +1327,8 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb,
 	u8 css, cso;
 
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		cso = skb->h.raw - skb->data;
-		css = (skb->h.raw + skb->csum_offset) - skb->data;
+		cso = skb_transport_offset(skb);
+		css = cso + skb->csum_offset;
 		if (unlikely(cso & 0x1)) {
 			printk(KERN_DEBUG "%s: payload offset != even number\n",
 				atl1_driver_name);
@@ -1370,8 +1370,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter,
 
 	if (tcp_seg) {
 		/* TSO/GSO */
-		proto_hdr_len =
-		    ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		buffer_info->length = proto_hdr_len;
 		page = virt_to_page(skb->data);
 		offset = (unsigned long)skb->data & ~PAGE_MASK;
@@ -1563,8 +1562,8 @@ static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	mss = skb_shinfo(skb)->gso_size;
 	if (mss) {
 		if (skb->protocol == htons(ETH_P_IP)) {
-			proto_hdr_len = ((skb->h.raw - skb->data) +
-					 (skb->h.th->doff << 2));
+			proto_hdr_len = (skb_transport_offset(skb) +
+					 tcp_hdrlen(skb));
 			if (unlikely(proto_hdr_len > len)) {
 				dev_kfree_skb_any(skb);
 				return NETDEV_TX_OK;
diff --git a/drivers/net/atp.c b/drivers/net/atp.c
index 2d306fcb7f3..18aba838c1f 100644
--- a/drivers/net/atp.c
+++ b/drivers/net/atp.c
@@ -793,7 +793,6 @@ static void net_rx(struct net_device *dev)
 			lp->stats.rx_dropped++;
 			goto done;
 		}
-		skb->dev = dev;
 
 		skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 		read_block(ioaddr, pkt_len, skb_put(skb,pkt_len), dev->if_port);
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index 69ae229b680..d10fb80e9a6 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -1125,7 +1125,7 @@ static int au1000_tx(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	pDB = aup->tx_db_inuse[aup->tx_head];
-	memcpy((void *)pDB->vaddr, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, pDB->vaddr, skb->len);
 	if (skb->len < ETH_ZLEN) {
 		for (i=skb->len; i<ETH_ZLEN; i++) {
 			((char *)pDB->vaddr)[i] = 0;
@@ -1205,7 +1205,6 @@ static int au1000_rx(struct net_device *dev)
 				aup->stats.rx_dropped++;
 				continue;
 			}
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* 16 byte IP header align */
 			eth_copy_and_sum(skb,
 				(unsigned char *)pDB->vaddr, frmlen, 0);
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index d742bfe2447..879a2fff474 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -825,12 +825,11 @@ static int b44_rx(struct b44 *bp, int budget)
 			if (copy_skb == NULL)
 				goto drop_it_no_recycle;
 
-			copy_skb->dev = bp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			/* DMA sync done above, copy just the actual packet */
-			memcpy(copy_skb->data, skb->data+bp->rx_offset, len);
-
+			skb_copy_from_linear_data_offset(skb, bp->rx_offset,
+							 copy_skb->data, len);
 			skb = copy_skb;
 		}
 		skb->ip_summed = CHECKSUM_NONE;
@@ -1007,7 +1006,8 @@ static int b44_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto err_out;
 		}
 
-		memcpy(skb_put(bounce_skb, len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(bounce_skb, len),
+					  skb->len);
 		dev_kfree_skb_any(skb);
 		skb = bounce_skb;
 	}
diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c
index c143304dcff..4612725965d 100644
--- a/drivers/net/bmac.c
+++ b/drivers/net/bmac.c
@@ -715,7 +715,6 @@ static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id)
 		if (skb != NULL) {
 			nb -= ETHERCRC;
 			skb_put(skb, nb);
-			skb->dev = dev;
 			skb->protocol = eth_type_trans(skb, dev);
 			netif_rx(skb);
 			dev->last_rx = jiffies;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index e85f5ec48f9..f98a2205a09 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -1884,10 +1884,8 @@ bnx2_rx_int(struct bnx2 *bp, int budget)
 				goto reuse_rx;
 
 			/* aligned copy */
-			memcpy(new_skb->data,
-				skb->data + bp->rx_offset - 2,
-				len + 2);
-
+			skb_copy_from_linear_data_offset(skb, bp->rx_offset - 2,
+				      new_skb->data, len + 2);
 			skb_reserve(new_skb, 2);
 			skb_put(new_skb, len);
 
@@ -4513,6 +4511,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if ((mss = skb_shinfo(skb)->gso_size) &&
 		(skb->len > (bp->dev->mtu + ETH_HLEN))) {
 		u32 tcp_opt_len, ip_tcp_len;
+		struct iphdr *iph;
 
 		if (skb_header_cloned(skb) &&
 		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
@@ -4520,25 +4519,23 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NETDEV_TX_OK;
 		}
 
-		tcp_opt_len = ((skb->h.th->doff - 5) * 4);
 		vlan_tag_flags |= TX_BD_FLAGS_SW_LSO;
 
 		tcp_opt_len = 0;
-		if (skb->h.th->doff > 5) {
-			tcp_opt_len = (skb->h.th->doff - 5) << 2;
-		}
-		ip_tcp_len = (skb->nh.iph->ihl << 2) + sizeof(struct tcphdr);
-
-		skb->nh.iph->check = 0;
-		skb->nh.iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
-		skb->h.th->check =
-			~csum_tcpudp_magic(skb->nh.iph->saddr,
-					    skb->nh.iph->daddr,
-					    0, IPPROTO_TCP, 0);
-
-		if (tcp_opt_len || (skb->nh.iph->ihl > 5)) {
-			vlan_tag_flags |= ((skb->nh.iph->ihl - 5) +
-				(tcp_opt_len >> 2)) << 8;
+		if (tcp_hdr(skb)->doff > 5)
+			tcp_opt_len = tcp_optlen(skb);
+
+		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
+
+		iph = ip_hdr(skb);
+		iph->check = 0;
+		iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+							 iph->daddr, 0,
+							 IPPROTO_TCP, 0);
+		if (tcp_opt_len || (iph->ihl > 5)) {
+			vlan_tag_flags |= ((iph->ihl - 5) +
+					   (tcp_opt_len >> 2)) << 8;
 		}
 	}
 	else
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 3fb354d9c51..7e03f41ae2c 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -884,8 +884,8 @@ static int ad_lacpdu_send(struct port *port)
 	}
 
 	skb->dev = slave->dev;
-	skb->mac.raw = skb->data;
-	skb->nh.raw = skb->data + ETH_HLEN;
+	skb_reset_mac_header(skb);
+	skb->network_header = skb->mac_header + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 	skb->priority = TC_PRIO_CONTROL;
 
@@ -928,8 +928,8 @@ static int ad_marker_send(struct port *port, struct marker *marker)
 	skb_reserve(skb, 16);
 
 	skb->dev = slave->dev;
-	skb->mac.raw = skb->data;
-	skb->nh.raw = skb->data + ETH_HLEN;
+	skb_reset_mac_header(skb);
+	skb->network_header = skb->mac_header + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 
 	marker_header = (struct marker_header *)skb_put(skb, length);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 217a2eedee0..92c3b6f6a8e 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -104,10 +104,15 @@ struct arp_pkt {
 };
 #pragma pack()
 
+static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
+{
+	return (struct arp_pkt *)skb_network_header(skb);
+}
+
 /* Forward declaration */
 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
 
-static inline u8 _simple_hash(u8 *hash_start, int hash_size)
+static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
 {
 	int i;
 	u8 hash = 0;
@@ -613,7 +618,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip)
 static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
-	struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw;
+	struct arp_pkt *arp = arp_pkt(skb);
 	struct slave *assigned_slave;
 	struct rlb_client_info *client_info;
 	u32 hash_index = 0;
@@ -701,7 +706,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
  */
 static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
 {
-	struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw;
+	struct arp_pkt *arp = arp_pkt(skb);
 	struct slave *tx_slave = NULL;
 
 	if (arp->op_code == __constant_htons(ARPOP_REPLY)) {
@@ -890,8 +895,8 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 		data = skb_put(skb, size);
 		memcpy(data, &pkt, size);
 
-		skb->mac.raw = data;
-		skb->nh.raw = data + ETH_HLEN;
+		skb_reset_mac_header(skb);
+		skb->network_header = skb->mac_header + ETH_HLEN;
 		skb->protocol = pkt.type;
 		skb->priority = TC_PRIO_CONTROL;
 		skb->dev = slave->dev;
@@ -1263,10 +1268,10 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	int hash_size = 0;
 	int do_tx_balance = 1;
 	u32 hash_index = 0;
-	u8 *hash_start = NULL;
+	const u8 *hash_start = NULL;
 	int res = 1;
 
-	skb->mac.raw = (unsigned char *)skb->data;
+	skb_reset_mac_header(skb);
 	eth_data = eth_hdr(skb);
 
 	/* make sure that the curr_active_slave and the slaves list do
@@ -1280,15 +1285,18 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	}
 
 	switch (ntohs(skb->protocol)) {
-	case ETH_P_IP:
+	case ETH_P_IP: {
+		const struct iphdr *iph = ip_hdr(skb);
+
 		if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) ||
-		    (skb->nh.iph->daddr == ip_bcast) ||
-		    (skb->nh.iph->protocol == IPPROTO_IGMP)) {
+		    (iph->daddr == ip_bcast) ||
+		    (iph->protocol == IPPROTO_IGMP)) {
 			do_tx_balance = 0;
 			break;
 		}
-		hash_start = (char*)&(skb->nh.iph->daddr);
-		hash_size = sizeof(skb->nh.iph->daddr);
+		hash_start = (char *)&(iph->daddr);
+		hash_size = sizeof(iph->daddr);
+	}
 		break;
 	case ETH_P_IPV6:
 		if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) {
@@ -1296,8 +1304,8 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 			break;
 		}
 
-		hash_start = (char*)&(skb->nh.ipv6h->daddr);
-		hash_size = sizeof(skb->nh.ipv6h->daddr);
+		hash_start = (char *)&(ipv6_hdr(skb)->daddr);
+		hash_size = sizeof(ipv6_hdr(skb)->daddr);
 		break;
 	case ETH_P_IPX:
 		if (ipx_hdr(skb)->ipx_checksum !=
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e4724d874e7..cea3783c92c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2524,7 +2524,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
 				 (2 * sizeof(u32)))))
 		goto out_unlock;
 
-	arp = skb->nh.arph;
+	arp = arp_hdr(skb);
 	if (arp->ar_hln != dev->addr_len ||
 	    skb->pkt_type == PACKET_OTHERHOST ||
 	    skb->pkt_type == PACKET_LOOPBACK ||
@@ -3476,7 +3476,7 @@ static int bond_xmit_hash_policy_l34(struct sk_buff *skb,
 				    struct net_device *bond_dev, int count)
 {
 	struct ethhdr *data = (struct ethhdr *)skb->data;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl);
 	int layer4_xor = 0;
 
@@ -3640,9 +3640,8 @@ static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
 	read_lock_bh(&bond->lock);
 
 	bond_for_each_slave(bond, slave, i) {
-		if (slave->dev->get_stats) {
-			sstats = slave->dev->get_stats(slave->dev);
-
+		sstats = slave->dev->get_stats(slave->dev);
+		if (sstats) {
 			stats->rx_packets += sstats->rx_packets;
 			stats->rx_bytes += sstats->rx_bytes;
 			stats->rx_errors += sstats->rx_errors;
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index c8126484c2b..4aec747d9e4 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -1995,7 +1995,6 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 		return -1;
 
 	*skbref = skb;
-	skb->dev = cp->dev;
 	skb_reserve(skb, swivel);
 
 	p = skb->data;
@@ -2822,10 +2821,8 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
 
 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		u64 csum_start_off, csum_stuff_off;
-
-		csum_start_off = (u64) (skb->h.raw - skb->data);
-		csum_stuff_off = csum_start_off + skb->csum_offset;
+		const u64 csum_start_off = skb_transport_offset(skb);
+		const u64 csum_stuff_off = csum_start_off + skb->csum_offset;
 
 		ctrl =  TX_DESC_CSUM_EN |
 			CAS_BASE(TX_DESC_CSUM_START, csum_start_off) |
@@ -2849,8 +2846,8 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
 			      ctrl | TX_DESC_SOF, 0);
 		entry = TX_DESC_NEXT(ring, entry);
 
-		memcpy(tx_tiny_buf(cp, ring, entry), skb->data +
-		       len - tabort, tabort);
+		skb_copy_from_linear_data_offset(skb, len - tabort,
+			      tx_tiny_buf(cp, ring, entry), tabort);
 		mapping = tx_tiny_map(cp, ring, entry, tentry);
 		cas_write_txd(cp, ring, entry, mapping, tabort, ctrl,
 			      (nr_frags == 0));
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 326d4a66512..e4f874a70fe 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1062,7 +1062,7 @@ static inline struct sk_buff *get_packet(struct pci_dev *pdev,
 					    pci_unmap_addr(ce, dma_addr),
 					    pci_unmap_len(ce, dma_len),
 					    PCI_DMA_FROMDEVICE);
-		memcpy(skb->data, ce->skb->data, len);
+		skb_copy_from_linear_data(ce->skb, skb->data, len);
 		pci_dma_sync_single_for_device(pdev,
 					       pci_unmap_addr(ce, dma_addr),
 					       pci_unmap_len(ce, dma_len),
@@ -1379,12 +1379,11 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
 	}
 	__skb_pull(skb, sizeof(*p));
 
-	skb->dev = adapter->port[p->iff].dev;
 	skb->dev->last_rx = jiffies;
 	st = per_cpu_ptr(sge->port_stats[p->iff], smp_processor_id());
 	st->rx_packets++;
 
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->protocol = eth_type_trans(skb, adapter->port[p->iff].dev);
 	if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff &&
 	    skb->protocol == htons(ETH_P_IP) &&
 	    (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) {
@@ -1866,14 +1865,14 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		++st->tx_tso;
 
-		eth_type = skb->nh.raw - skb->data == ETH_HLEN ?
+		eth_type = skb_network_offset(skb) == ETH_HLEN ?
 			CPL_ETH_II : CPL_ETH_II_VLAN;
 
 		hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr));
 		hdr->opcode = CPL_TX_PKT_LSO;
 		hdr->ip_csum_dis = hdr->l4_csum_dis = 0;
-		hdr->ip_hdr_words = skb->nh.iph->ihl;
-		hdr->tcp_hdr_words = skb->h.th->doff;
+		hdr->ip_hdr_words = ip_hdr(skb)->ihl;
+		hdr->tcp_hdr_words = tcp_hdr(skb)->doff;
 		hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
 							  skb_shinfo(skb)->gso_size));
 		hdr->len = htonl(skb->len - sizeof(*hdr));
@@ -1913,7 +1912,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		if (!(adapter->flags & UDP_CSUM_CAPABLE) &&
 		    skb->ip_summed == CHECKSUM_PARTIAL &&
-		    skb->nh.iph->protocol == IPPROTO_UDP) {
+		    ip_hdr(skb)->protocol == IPPROTO_UDP) {
 			if (unlikely(skb_checksum_help(skb))) {
 				pr_debug("%s: unable to do udp checksum\n", dev->name);
 				dev_kfree_skb_any(skb);
@@ -1926,7 +1925,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		 */
 		if ((unlikely(!adapter->sge->espibug_skb[dev->if_port]))) {
 			if (skb->protocol == htons(ETH_P_ARP) &&
-			    skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) {
+			    arp_hdr(skb)->ar_op == htons(ARPOP_REQUEST)) {
 				adapter->sge->espibug_skb[dev->if_port] = skb;
 				/* We want to re-use this skb later. We
 				 * simply bump the reference count and it
@@ -2096,10 +2095,14 @@ static void espibug_workaround_t204(unsigned long data)
 					0x0, 0x7, 0x43, 0x0, 0x0, 0x0
 				};
 
-				memcpy(skb->data + sizeof(struct cpl_tx_pkt),
-					ch_mac_addr, ETH_ALEN);
-				memcpy(skb->data + skb->len - 10,
-					ch_mac_addr, ETH_ALEN);
+				skb_copy_to_linear_data_offset(skb,
+						    sizeof(struct cpl_tx_pkt),
+							       ch_mac_addr,
+							       ETH_ALEN);
+				skb_copy_to_linear_data_offset(skb,
+							       skb->len - 10,
+							       ch_mac_addr,
+							       ETH_ALEN);
 				skb->cb[0] = 0xff;
 			}
 
@@ -2126,10 +2129,14 @@ static void espibug_workaround(unsigned long data)
 	                if (!skb->cb[0]) {
 	                        u8 ch_mac_addr[ETH_ALEN] =
 	                            {0x0, 0x7, 0x43, 0x0, 0x0, 0x0};
-	                        memcpy(skb->data + sizeof(struct cpl_tx_pkt),
-	                               ch_mac_addr, ETH_ALEN);
-	                        memcpy(skb->data + skb->len - 10, ch_mac_addr,
-	                               ETH_ALEN);
+	                        skb_copy_to_linear_data_offset(skb,
+						     sizeof(struct cpl_tx_pkt),
+							       ch_mac_addr,
+							       ETH_ALEN);
+	                        skb_copy_to_linear_data_offset(skb,
+							       skb->len - 10,
+							       ch_mac_addr,
+							       ETH_ALEN);
 	                        skb->cb[0] = 0xff;
 	                }
 
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 8eb57127600..5bdf5ca85a6 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1348,7 +1348,8 @@ e100_rx(struct net_device *dev)
 
 #ifdef ETHDEBUG
 		printk("head = 0x%x, data = 0x%x, tail = 0x%x, end = 0x%x\n",
-		  skb->head, skb->data, skb->tail, skb->end);
+		       skb->head, skb->data, skb_tail_pointer(skb),
+		       skb_end_pointer(skb));
 		printk("copying packet to 0x%x.\n", skb_data_ptr);
 #endif
 
@@ -1375,7 +1376,6 @@ e100_rx(struct net_device *dev)
 		myNextRxDesc->descr.buf = L1_CACHE_ALIGN(virt_to_phys(myNextRxDesc->skb->data));
 	}
 
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 
 	/* Send the packet to the upper layers */
diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c
index 4612f71a710..9774bb1b3e8 100644
--- a/drivers/net/cs89x0.c
+++ b/drivers/net/cs89x0.c
@@ -1004,7 +1004,6 @@ skip_this_frame:
 		return;
 	}
 	skb_reserve(skb, 2);	/* longword align L3 header */
-	skb->dev = dev;
 
 	if (bp + length > lp->end_dma_buff) {
 		int semi_cnt = lp->end_dma_buff - bp;
@@ -1702,7 +1701,6 @@ net_rx(struct net_device *dev)
 		return;
 	}
 	skb_reserve(skb, 2);	/* longword align L3 header */
-	skb->dev = dev;
 
 	readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);
 	if (length & 1)
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index 199e5066acf..ebcf35e4cf5 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -783,7 +783,7 @@ static int do_trace(struct t3cdev *dev, struct sk_buff *skb)
 	skb->protocol = htons(0xffff);
 	skb->dev = dev->lldev;
 	skb_pull(skb, sizeof(*p));
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	netif_receive_skb(skb);
 	return 0;
 }
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 027ab2c3825..3666586a483 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -661,7 +661,7 @@ static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 
 	if (skb) {
 		__skb_put(skb, IMMED_PKT_SIZE);
-		memcpy(skb->data, resp->imm_data, IMMED_PKT_SIZE);
+		skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
 	}
 	return skb;
 }
@@ -897,11 +897,11 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		d->flit[2] = 0;
 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 		hdr->cntrl = htonl(cntrl);
-		eth_type = skb->nh.raw - skb->data == ETH_HLEN ?
+		eth_type = skb_network_offset(skb) == ETH_HLEN ?
 		    CPL_ETH_II : CPL_ETH_II_VLAN;
 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
-		    V_LSO_IPHDR_WORDS(skb->nh.iph->ihl) |
-		    V_LSO_TCPHDR_WORDS(skb->h.th->doff);
+		    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
+		    V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
 		hdr->lso_info = htonl(tso_info);
 		flits = 3;
 	} else {
@@ -913,7 +913,8 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		if (skb->len <= WR_LEN - sizeof(*cpl)) {
 			q->sdesc[pidx].skb = NULL;
 			if (!skb->data_len)
-				memcpy(&d->flit[2], skb->data, skb->len);
+				skb_copy_from_linear_data(skb, &d->flit[2],
+							  skb->len);
 			else
 				skb_copy_bits(skb, 0, &d->flit[2], skb->len);
 
@@ -1319,16 +1320,19 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 	/* Only TX_DATA builds SGLs */
 
 	from = (struct work_request_hdr *)skb->data;
-	memcpy(&d->flit[1], &from[1], skb->h.raw - skb->data - sizeof(*from));
+	memcpy(&d->flit[1], &from[1],
+	       skb_transport_offset(skb) - sizeof(*from));
 
-	flits = (skb->h.raw - skb->data) / 8;
+	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
-	sgl_flits = make_sgl(skb, sgp, skb->h.raw, skb->tail - skb->h.raw,
+	sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
+			     skb->tail - skb->transport_header,
 			     adap->pdev);
 	if (need_skb_unmap()) {
 		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
 		skb->destructor = deferred_unmap_destructor;
-		((struct unmap_info *)skb->cb)->len = skb->tail - skb->h.raw;
+		((struct unmap_info *)skb->cb)->len = (skb->tail -
+						       skb->transport_header);
 	}
 
 	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
@@ -1349,8 +1353,8 @@ static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
 	if (skb->len <= WR_LEN && cnt == 0)
 		return 1;	/* packet fits as immediate data */
 
-	flits = (skb->h.raw - skb->data) / 8;	/* headers */
-	if (skb->tail != skb->h.raw)
+	flits = skb_transport_offset(skb) / 8;	/* headers */
+	if (skb->tail != skb->transport_header)
 		cnt++;
 	return flits_to_desc(flits + sgl_len(cnt));
 }
@@ -1620,7 +1624,9 @@ static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 			     unsigned int gather_idx)
 {
 	rq->offload_pkts++;
-	skb->mac.raw = skb->nh.raw = skb->h.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
+	skb_reset_transport_header(skb);
 
 	if (rq->polling) {
 		rx_gather[gather_idx++] = skb;
@@ -1684,9 +1690,8 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
 	struct port_info *pi;
 
 	skb_pull(skb, sizeof(*p) + pad);
-	skb->dev = adap->port[p->iff];
 	skb->dev->last_rx = jiffies;
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
 	pi = netdev_priv(skb->dev);
 	if (pi->rx_csum_offload && p->csum_valid && p->csum == 0xffff &&
 	    !p->fragment) {
@@ -1717,11 +1722,11 @@ static void skb_data_init(struct sk_buff *skb, struct sge_fl_page *p,
 {
 	skb->len = len;
 	if (len <= SKB_DATA_SIZE) {
-		memcpy(skb->data, p->va, len);
+		skb_copy_to_linear_data(skb, p->va, len);
 		skb->tail += len;
 		put_page(p->frag.page);
 	} else {
-		memcpy(skb->data, p->va, SKB_DATA_SIZE);
+		skb_copy_to_linear_data(skb, p->va, SKB_DATA_SIZE);
 		skb_shinfo(skb)->frags[0].page = p->frag.page;
 		skb_shinfo(skb)->frags[0].page_offset =
 		    p->frag.page_offset + SKB_DATA_SIZE;
@@ -1767,7 +1772,7 @@ static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 			__skb_put(skb, len);
 			pci_dma_sync_single_for_cpu(adap->pdev, mapping, len,
 						    PCI_DMA_FROMDEVICE);
-			memcpy(skb->data, sd->t.skb->data, len);
+			skb_copy_from_linear_data(sd->t.skb, skb->data, len);
 			pci_dma_sync_single_for_device(adap->pdev, mapping, len,
 						       PCI_DMA_FROMDEVICE);
 		} else if (!drop_thres)
diff --git a/drivers/net/de600.c b/drivers/net/de600.c
index e547ce14eef..dae97b860da 100644
--- a/drivers/net/de600.c
+++ b/drivers/net/de600.c
@@ -359,7 +359,6 @@ static void de600_rx_intr(struct net_device *dev)
 	}
 	/* else */
 
-	skb->dev = dev;
 	skb_reserve(skb,2);	/* Align */
 
 	/* 'skb->data' points to the start of sk_buff data area. */
diff --git a/drivers/net/de620.c b/drivers/net/de620.c
index b6ad0cb5055..dc489242617 100644
--- a/drivers/net/de620.c
+++ b/drivers/net/de620.c
@@ -697,7 +697,6 @@ static int de620_rx_intr(struct net_device *dev)
 		}
 		else { /* Yep! Go get it! */
 			skb_reserve(skb,2);	/* Align */
-			skb->dev = dev;
 			/* skb->data points to the start of sk_buff data area */
 			buffer = skb_put(skb,size);
 			/* copy the packet into the buffer */
diff --git a/drivers/net/declance.c b/drivers/net/declance.c
index 9f7e1db8ce6..95d854e2295 100644
--- a/drivers/net/declance.c
+++ b/drivers/net/declance.c
@@ -616,7 +616,6 @@ static int lance_rx(struct net_device *dev)
 			}
 			lp->stats.rx_bytes += len;
 
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* 16 byte align */
 			skb_put(skb, len);	/* make room */
 
diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c
index 07d2731c1aa..571d82f8008 100644
--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -3091,13 +3091,13 @@ static void dfx_rcv_queue_process(
 					{
 						/* Receive buffer allocated, pass receive packet up */
 
-						memcpy(skb->data, p_buff + RCV_BUFF_K_PADDING, pkt_len+3);
+						skb_copy_to_linear_data(skb,
+							       p_buff + RCV_BUFF_K_PADDING,
+							       pkt_len + 3);
 					}
 
 					skb_reserve(skb,3);		/* adjust data field so that it points to FC byte */
 					skb_put(skb, pkt_len);		/* pass up packet length, NOT including CRC */
-					skb->dev = bp->dev;		/* pass up device pointer */
-
 					skb->protocol = fddi_type_trans(skb, bp->dev);
 					bp->rcv_total_bytes += skb->len;
 					netif_rx(skb);
diff --git a/drivers/net/depca.c b/drivers/net/depca.c
index f3807aaf10a..183497020bf 100644
--- a/drivers/net/depca.c
+++ b/drivers/net/depca.c
@@ -1044,7 +1044,6 @@ static int depca_rx(struct net_device *dev)
 					unsigned char *buf;
 					skb_reserve(skb, 2);	/* 16 byte align the IP header */
 					buf = skb_put(skb, pkt_len);
-					skb->dev = dev;
 					if (entry < lp->rx_old) {	/* Wrapped buffer */
 						len = (lp->rxRingMask - lp->rx_old + 1) * RX_BUFF_SZ;
 						memcpy_fromio(buf, lp->rx_buff[lp->rx_old], len);
diff --git a/drivers/net/dgrs.c b/drivers/net/dgrs.c
index a79520295fd..df62c0232f3 100644
--- a/drivers/net/dgrs.c
+++ b/drivers/net/dgrs.c
@@ -503,7 +503,6 @@ dgrs_rcv_frame(
 		/* discarding the frame */
 		goto out;
 	}
-	skb->dev = devN;
 	skb_reserve(skb, 2);	/* Align IP header */
 
 again:
@@ -742,7 +741,7 @@ static int dgrs_start_xmit(struct sk_buff *skb, struct net_device *devN)
 		}
 
 		amt = min_t(unsigned int, len, rbdp->size - count);
-		memcpy( (char *) S2H(rbdp->buf) + count, skb->data + i, amt);
+		skb_copy_from_linear_data_offset(skb, i, S2H(rbdp->buf) + count, amt);
 		i += amt;
 		count += amt;
 		len -= amt;
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index 9d446a0fe0b..74ec64a1625 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -504,7 +504,6 @@ rio_timer (unsigned long data)
 					break;
 				}
 				np->rx_skbuff[entry] = skb;
-				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve (skb, 2);
 				np->rx_ring[entry].fraginfo =
@@ -575,7 +574,6 @@ alloc_list (struct net_device *dev)
 				dev->name);
 			break;
 		}
-		skb->dev = dev;	/* Mark as being used by this device. */
 		skb_reserve (skb, 2);	/* 16 byte align the IP header. */
 		/* Rubicon now supports 40 bits of addressing space. */
 		np->rx_ring[i].fraginfo =
@@ -866,7 +864,6 @@ receive_packet (struct net_device *dev)
 							    	DMA_48BIT_MASK,
 							    np->rx_buf_sz,
 							    PCI_DMA_FROMDEVICE);
-				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve (skb, 2);
 				eth_copy_and_sum (skb,
@@ -910,7 +907,6 @@ receive_packet (struct net_device *dev)
 				break;
 			}
 			np->rx_skbuff[entry] = skb;
-			skb->dev = dev;
 			/* 16 byte align the IP header */
 			skb_reserve (skb, 2);
 			np->rx_ring[entry].fraginfo =
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 615d2b14efa..8cc1174e7f6 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -954,7 +954,6 @@ dm9000_rx(struct net_device *dev)
 		/* Move data from DM9000 */
 		if (GoodPacket
 		    && ((skb = dev_alloc_skb(RxLen + 4)) != NULL)) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 			rdptr = (u8 *) skb_put(skb, RxLen - 4);
 
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 0cefef5e3f0..4d0e0aea72b 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1769,7 +1769,7 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
 
 	/* Align, init, and map the RFD. */
 	skb_reserve(rx->skb, NET_IP_ALIGN);
-	memcpy(rx->skb->data, &nic->blank_rfd, sizeof(struct rfd));
+	skb_copy_to_linear_data(rx->skb, &nic->blank_rfd, sizeof(struct rfd));
 	rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data,
 		RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
 
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index b28a915bd98..9267f16b1b3 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -409,25 +409,21 @@ e1000_release_hw_control(struct e1000_adapter *adapter)
 {
 	uint32_t ctrl_ext;
 	uint32_t swsm;
-	uint32_t extcnf;
 
 	/* Let firmware taken over control of h/w */
 	switch (adapter->hw.mac_type) {
-	case e1000_82571:
-	case e1000_82572:
-	case e1000_80003es2lan:
-		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
-		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
-				ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
-		break;
 	case e1000_82573:
 		swsm = E1000_READ_REG(&adapter->hw, SWSM);
 		E1000_WRITE_REG(&adapter->hw, SWSM,
 				swsm & ~E1000_SWSM_DRV_LOAD);
+		break;
+	case e1000_82571:
+	case e1000_82572:
+	case e1000_80003es2lan:
 	case e1000_ich8lan:
-		extcnf = E1000_READ_REG(&adapter->hw, CTRL_EXT);
+		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
 		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
-				extcnf & ~E1000_CTRL_EXT_DRV_LOAD);
+				ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
 		break;
 	default:
 		break;
@@ -450,26 +446,21 @@ e1000_get_hw_control(struct e1000_adapter *adapter)
 {
 	uint32_t ctrl_ext;
 	uint32_t swsm;
-	uint32_t extcnf;
 
 	/* Let firmware know the driver has taken over */
 	switch (adapter->hw.mac_type) {
-	case e1000_82571:
-	case e1000_82572:
-	case e1000_80003es2lan:
-		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
-		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
-				ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
-		break;
 	case e1000_82573:
 		swsm = E1000_READ_REG(&adapter->hw, SWSM);
 		E1000_WRITE_REG(&adapter->hw, SWSM,
 				swsm | E1000_SWSM_DRV_LOAD);
 		break;
+	case e1000_82571:
+	case e1000_82572:
+	case e1000_80003es2lan:
 	case e1000_ich8lan:
-		extcnf = E1000_READ_REG(&adapter->hw, EXTCNF_CTRL);
-		E1000_WRITE_REG(&adapter->hw, EXTCNF_CTRL,
-				extcnf | E1000_EXTCNF_CTRL_SWFLAG);
+		ctrl_ext = E1000_READ_REG(&adapter->hw, CTRL_EXT);
+		E1000_WRITE_REG(&adapter->hw, CTRL_EXT,
+				ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
 		break;
 	default:
 		break;
@@ -522,14 +513,15 @@ e1000_release_manageability(struct e1000_adapter *adapter)
 	}
 }
 
-int
-e1000_up(struct e1000_adapter *adapter)
+/**
+ * e1000_configure - configure the hardware for RX and TX
+ * @adapter = private board structure
+ **/
+static void e1000_configure(struct e1000_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	int i;
 
-	/* hardware has been reset, we need to reload some things */
-
 	e1000_set_multi(netdev);
 
 	e1000_restore_vlan(adapter);
@@ -548,14 +540,20 @@ e1000_up(struct e1000_adapter *adapter)
 	}
 
 	adapter->tx_queue_len = netdev->tx_queue_len;
+}
+
+int e1000_up(struct e1000_adapter *adapter)
+{
+	/* hardware has been reset, we need to reload some things */
+	e1000_configure(adapter);
+
+	clear_bit(__E1000_DOWN, &adapter->flags);
 
 #ifdef CONFIG_E1000_NAPI
-	netif_poll_enable(netdev);
+	netif_poll_enable(adapter->netdev);
 #endif
 	e1000_irq_enable(adapter);
 
-	clear_bit(__E1000_DOWN, &adapter->flags);
-
 	/* fire a link change interrupt to start the watchdog */
 	E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_LSC);
 	return 0;
@@ -640,15 +638,15 @@ e1000_down(struct e1000_adapter *adapter)
 	 * reschedule our watchdog timer */
 	set_bit(__E1000_DOWN, &adapter->flags);
 
+#ifdef CONFIG_E1000_NAPI
+	netif_poll_disable(netdev);
+#endif
 	e1000_irq_disable(adapter);
 
 	del_timer_sync(&adapter->tx_fifo_stall_timer);
 	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
 
-#ifdef CONFIG_E1000_NAPI
-	netif_poll_disable(netdev);
-#endif
 	netdev->tx_queue_len = adapter->tx_queue_len;
 	adapter->link_speed = 0;
 	adapter->link_duplex = 0;
@@ -1410,21 +1408,17 @@ e1000_open(struct net_device *netdev)
 		return -EBUSY;
 
 	/* allocate transmit descriptors */
-	if ((err = e1000_setup_all_tx_resources(adapter)))
+	err = e1000_setup_all_tx_resources(adapter);
+	if (err)
 		goto err_setup_tx;
 
 	/* allocate receive descriptors */
-	if ((err = e1000_setup_all_rx_resources(adapter)))
-		goto err_setup_rx;
-
-	err = e1000_request_irq(adapter);
+	err = e1000_setup_all_rx_resources(adapter);
 	if (err)
-		goto err_req_irq;
+		goto err_setup_rx;
 
 	e1000_power_up_phy(adapter);
 
-	if ((err = e1000_up(adapter)))
-		goto err_up;
 	adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
 	if ((adapter->hw.mng_cookie.status &
 			  E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) {
@@ -1437,12 +1431,33 @@ e1000_open(struct net_device *netdev)
 	    e1000_check_mng_mode(&adapter->hw))
 		e1000_get_hw_control(adapter);
 
+	/* before we allocate an interrupt, we must be ready to handle it.
+	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
+	 * as soon as we call pci_request_irq, so we have to setup our
+	 * clean_rx handler before we do so.  */
+	e1000_configure(adapter);
+
+	err = e1000_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* From here on the code is the same as e1000_up() */
+	clear_bit(__E1000_DOWN, &adapter->flags);
+
+#ifdef CONFIG_E1000_NAPI
+	netif_poll_enable(netdev);
+#endif
+
+	e1000_irq_enable(adapter);
+
+	/* fire a link status change interrupt to start the watchdog */
+	E1000_WRITE_REG(&adapter->hw, ICS, E1000_ICS_LSC);
+
 	return E1000_SUCCESS;
 
-err_up:
-	e1000_power_down_phy(adapter);
-	e1000_free_irq(adapter);
 err_req_irq:
+	e1000_release_hw_control(adapter);
+	e1000_power_down_phy(adapter);
 	e1000_free_all_rx_resources(adapter);
 err_setup_rx:
 	e1000_free_all_tx_resources(adapter);
@@ -2887,33 +2902,30 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 				return err;
 		}
 
-		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		mss = skb_shinfo(skb)->gso_size;
 		if (skb->protocol == htons(ETH_P_IP)) {
-			skb->nh.iph->tot_len = 0;
-			skb->nh.iph->check = 0;
-			skb->h.th->check =
-				~csum_tcpudp_magic(skb->nh.iph->saddr,
-						   skb->nh.iph->daddr,
-						   0,
-						   IPPROTO_TCP,
-						   0);
+			struct iphdr *iph = ip_hdr(skb);
+			iph->tot_len = 0;
+			iph->check = 0;
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 			cmd_length = E1000_TXD_CMD_IP;
-			ipcse = skb->h.raw - skb->data - 1;
+			ipcse = skb_transport_offset(skb) - 1;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
-			skb->nh.ipv6h->payload_len = 0;
-			skb->h.th->check =
-				~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-						 &skb->nh.ipv6h->daddr,
-						 0,
-						 IPPROTO_TCP,
-						 0);
+			ipv6_hdr(skb)->payload_len = 0;
+			tcp_hdr(skb)->check =
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr,
+						 0, IPPROTO_TCP, 0);
 			ipcse = 0;
 		}
-		ipcss = skb->nh.raw - skb->data;
-		ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
-		tucss = skb->h.raw - skb->data;
-		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
+		ipcss = skb_network_offset(skb);
+		ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
+		tucss = skb_transport_offset(skb);
+		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
 		tucse = 0;
 
 		cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
@@ -2954,7 +2966,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 	uint8_t css;
 
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		css = skb->h.raw - skb->data;
+		css = skb_transport_offset(skb);
 
 		i = tx_ring->next_to_use;
 		buffer_info = &tx_ring->buffer_info[i];
@@ -2962,7 +2974,8 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 
 		context_desc->lower_setup.ip_config = 0;
 		context_desc->upper_setup.tcp_fields.tucss = css;
-		context_desc->upper_setup.tcp_fields.tucso = css + skb->csum;
+		context_desc->upper_setup.tcp_fields.tucso =
+			css + skb->csum_offset;
 		context_desc->upper_setup.tcp_fields.tucse = 0;
 		context_desc->tcp_seg_setup.data = 0;
 		context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT);
@@ -3296,7 +3309,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		/* TSO Workaround for 82571/2/3 Controllers -- if skb->data
 		* points to just header, pull a few bytes of payload from
 		* frags into skb->data */
-		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) {
 			switch (adapter->hw.mac_type) {
 				unsigned int pull_size;
@@ -3307,7 +3320,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 				 * NOTE: this is a TSO only workaround
 				 * if end byte alignment not correct move us
 				 * into the next dword */
-				if ((unsigned long)(skb->tail - 1) & 4)
+				if ((unsigned long)(skb_tail_pointer(skb) - 1) & 4)
 					break;
 				/* fall through */
 			case e1000_82571:
@@ -3363,12 +3376,9 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	    (adapter->hw.mac_type == e1000_82573))
 		e1000_transfer_dhcp_info(adapter, skb);
 
-	local_irq_save(flags);
-	if (!spin_trylock(&tx_ring->tx_lock)) {
+	if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags))
 		/* Collision - tell upper layer to requeue */
-		local_irq_restore(flags);
 		return NETDEV_TX_LOCKED;
-	}
 
 	/* need: count + 2 desc gap to keep tail from touching
 	 * head, otherwise try next time */
@@ -4227,9 +4237,12 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter,
 			    netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
 			if (new_skb) {
 				skb_reserve(new_skb, NET_IP_ALIGN);
-				memcpy(new_skb->data - NET_IP_ALIGN,
-				       skb->data - NET_IP_ALIGN,
-				       length + NET_IP_ALIGN);
+				skb_copy_to_linear_data_offset(new_skb,
+							       -NET_IP_ALIGN,
+							       (skb->data -
+							        NET_IP_ALIGN),
+							       (length +
+							        NET_IP_ALIGN));
 				/* save the skb in buffer_info as good */
 				buffer_info->skb = skb;
 				skb = new_skb;
@@ -4391,7 +4404,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
 				PCI_DMA_FROMDEVICE);
 			vaddr = kmap_atomic(ps_page->ps_page[0],
 			                    KM_SKB_DATA_SOFTIRQ);
-			memcpy(skb->tail, vaddr, l1);
+			memcpy(skb_tail_pointer(skb), vaddr, l1);
 			kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
 			pci_dma_sync_single_for_device(pdev,
 				ps_page_dma->ps_page_dma[0],
diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c
index b4463094c93..39654e1e2be 100644
--- a/drivers/net/eepro.c
+++ b/drivers/net/eepro.c
@@ -1591,7 +1591,6 @@ eepro_rx(struct net_device *dev)
 
 				break;
 			}
-			skb->dev = dev;
 			skb_reserve(skb,2);
 
 			if (lp->version == LAN595)
diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c
index e28bb1e38f8..6c267c38df9 100644
--- a/drivers/net/eepro100.c
+++ b/drivers/net/eepro100.c
@@ -1793,7 +1793,6 @@ speedo_rx(struct net_device *dev)
 			   copying to a properly sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				pci_dma_sync_single_for_cpu(sp->pdev, sp->rx_ring_dma[entry],
@@ -1805,8 +1804,9 @@ speedo_rx(struct net_device *dev)
 				eth_copy_and_sum(skb, sp->rx_skbuff[entry]->data, pkt_len, 0);
 				skb_put(skb, pkt_len);
 #else
-				memcpy(skb_put(skb, pkt_len), sp->rx_skbuff[entry]->data,
-					   pkt_len);
+				skb_copy_from_linear_data(sp->rx_skbuff[entry],
+							  skb_put(skb, pkt_len),
+							  pkt_len);
 #endif
 				pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[entry],
 											   sizeof(struct RxFD) + pkt_len,
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c
index 3868b803126..8aaf5ec0c36 100644
--- a/drivers/net/eexpress.c
+++ b/drivers/net/eexpress.c
@@ -976,7 +976,6 @@ static void eexp_hw_rx_pio(struct net_device *dev)
 					lp->stats.rx_dropped++;
 					break;
 				}
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				outw(pbuf+10, ioaddr+READ_PTR);
 			        insw(ioaddr+DATAPORT, skb_put(skb,pkt_len),(pkt_len+1)>>1);
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 0e4042bc0a4..58364a0ff37 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -391,8 +391,8 @@ static int ehea_poll(struct net_device *dev, int *budget)
 					if (!skb)
 						break;
 				}
-				memcpy(skb->data, ((char*)cqe) + 64,
-				       cqe->num_bytes_transfered - 4);
+				skb_copy_to_linear_data(skb, ((char*)cqe) + 64,
+					       cqe->num_bytes_transfered - 4);
 				ehea_fill_skb(dev, skb, cqe);
 			} else if (rq == 2) {  /* RQ2 */
 				skb = get_skb_by_index(skb_arr_rq2,
@@ -1262,8 +1262,8 @@ static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr)
 static inline void write_ip_start_end(struct ehea_swqe *swqe,
 				      const struct sk_buff *skb)
 {
-	swqe->ip_start = (u8)(((u64)skb->nh.iph) - ((u64)skb->data));
-	swqe->ip_end = (u8)(swqe->ip_start + skb->nh.iph->ihl * 4 - 1);
+	swqe->ip_start = skb_network_offset(skb);
+	swqe->ip_end = (u8)(swqe->ip_start + ip_hdrlen(skb) - 1);
 }
 
 static inline void write_tcp_offset_end(struct ehea_swqe *swqe,
@@ -1300,13 +1300,13 @@ static void write_swqe2_TSO(struct sk_buff *skb,
 	/* copy only eth/ip/tcp headers to immediate data and
 	 * the rest of skb->data to sg1entry
 	 */
-	headersize = ETH_HLEN + (skb->nh.iph->ihl * 4) + (skb->h.th->doff * 4);
+	headersize = ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb);
 
 	skb_data_size = skb->len - skb->data_len;
 
 	if (skb_data_size >= headersize) {
 		/* copy immediate data */
-		memcpy(imm_data, skb->data, headersize);
+		skb_copy_from_linear_data(skb, imm_data, headersize);
 		swqe->immediate_data_length = headersize;
 
 		if (skb_data_size > headersize) {
@@ -1337,7 +1337,7 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
 	 */
 	if (skb_data_size >= SWQE2_MAX_IMM) {
 		/* copy immediate data */
-		memcpy(imm_data, skb->data, SWQE2_MAX_IMM);
+		skb_copy_from_linear_data(skb, imm_data, SWQE2_MAX_IMM);
 
 		swqe->immediate_data_length = SWQE2_MAX_IMM;
 
@@ -1350,7 +1350,7 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
 			swqe->descriptors++;
 		}
 	} else {
-		memcpy(imm_data, skb->data, skb_data_size);
+		skb_copy_from_linear_data(skb, imm_data, skb_data_size);
 		swqe->immediate_data_length = skb_data_size;
 	}
 }
@@ -1688,6 +1688,7 @@ static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev,
 		       struct ehea_swqe *swqe, u32 lkey)
 {
 	if (skb->protocol == htons(ETH_P_IP)) {
+		const struct iphdr *iph = ip_hdr(skb);
 		/* IPv4 */
 		swqe->tx_control |= EHEA_SWQE_CRC
 				 | EHEA_SWQE_IP_CHECKSUM
@@ -1697,15 +1698,15 @@ static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev,
 
 		write_ip_start_end(swqe, skb);
 
-		if (skb->nh.iph->protocol == IPPROTO_UDP) {
-			if ((skb->nh.iph->frag_off & IP_MF) ||
-			    (skb->nh.iph->frag_off & IP_OFFSET))
+		if (iph->protocol == IPPROTO_UDP) {
+			if ((iph->frag_off & IP_MF) ||
+			    (iph->frag_off & IP_OFFSET))
 				/* IP fragment, so don't change cs */
 				swqe->tx_control &= ~EHEA_SWQE_TCP_CHECKSUM;
 			else
 				write_udp_offset_end(swqe, skb);
 
-		} else if (skb->nh.iph->protocol == IPPROTO_TCP) {
+		} else if (iph->protocol == IPPROTO_TCP) {
 			write_tcp_offset_end(swqe, skb);
 		}
 
@@ -1731,10 +1732,11 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
 	int i;
 
 	if (skb->protocol == htons(ETH_P_IP)) {
+		const struct iphdr *iph = ip_hdr(skb);
 		/* IPv4 */
 		write_ip_start_end(swqe, skb);
 
-		if (skb->nh.iph->protocol == IPPROTO_TCP) {
+		if (iph->protocol == IPPROTO_TCP) {
 			swqe->tx_control |= EHEA_SWQE_CRC
 					 | EHEA_SWQE_IP_CHECKSUM
 					 | EHEA_SWQE_TCP_CHECKSUM
@@ -1742,9 +1744,9 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
 
 			write_tcp_offset_end(swqe, skb);
 
-		} else if (skb->nh.iph->protocol == IPPROTO_UDP) {
-			if ((skb->nh.iph->frag_off & IP_MF) ||
-			    (skb->nh.iph->frag_off & IP_OFFSET))
+		} else if (iph->protocol == IPPROTO_UDP) {
+			if ((iph->frag_off & IP_MF) ||
+			    (iph->frag_off & IP_OFFSET))
 				/* IP fragment, so don't change cs */
 				swqe->tx_control |= EHEA_SWQE_CRC
 						 | EHEA_SWQE_IMM_DATA_PRESENT;
@@ -1770,10 +1772,11 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
 	/* copy (immediate) data */
 	if (nfrags == 0) {
 		/* data is in a single piece */
-		memcpy(imm_data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, imm_data, skb->len);
 	} else {
 		/* first copy data from the skb->data buffer ... */
-		memcpy(imm_data, skb->data, skb->len - skb->data_len);
+		skb_copy_from_linear_data(skb, imm_data,
+					  skb->len - skb->data_len);
 		imm_data += skb->len - skb->data_len;
 
 		/* ... then copy data from the fragments */
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index 3a6a83d3ee1..4e3f14c9c71 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -934,7 +934,6 @@ static void epic_init_ring(struct net_device *dev)
 		ep->rx_skbuff[i] = skb;
 		if (skb == NULL)
 			break;
-		skb->dev = dev;			/* Mark as being used by this device. */
 		skb_reserve(skb, 2);	/* 16 byte align the IP header. */
 		ep->rx_ring[i].bufaddr = pci_map_single(ep->pci_dev,
 			skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
@@ -1199,7 +1198,6 @@ static int epic_rx(struct net_device *dev, int budget)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(ep->pci_dev,
 							    ep->rx_ring[entry].bufaddr,
@@ -1236,7 +1234,6 @@ static int epic_rx(struct net_device *dev, int budget)
 			skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz);
 			if (skb == NULL)
 				break;
-			skb->dev = dev;			/* Mark as being used by this device. */
 			skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 			ep->rx_ring[entry].bufaddr = pci_map_single(ep->pci_dev,
 				skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c
index 93283e386f3..04abf59e500 100644
--- a/drivers/net/eth16i.c
+++ b/drivers/net/eth16i.c
@@ -1175,7 +1175,6 @@ static void eth16i_rx(struct net_device *dev)
 				break;
 			}
 
-			skb->dev = dev;
 			skb_reserve(skb,2);
 
 			/*
diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c
index 714ea1176ec..cb0792c187b 100644
--- a/drivers/net/ewrk3.c
+++ b/drivers/net/ewrk3.c
@@ -993,7 +993,6 @@ static int ewrk3_rx(struct net_device *dev)
 
 					if ((skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 						unsigned char *p;
-						skb->dev = dev;
 						skb_reserve(skb, 2);	/* Align to 16 bytes */
 						p = skb_put(skb, pkt_len);
 
diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c
index 38a13f44053..abe9b089c61 100644
--- a/drivers/net/fealnx.c
+++ b/drivers/net/fealnx.c
@@ -1719,7 +1719,6 @@ static int netdev_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak &&
 			    (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(np->pci_dev,
 							    np->cur_rx->buffer,
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 6764281b453..255b09124e1 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -647,7 +647,6 @@ while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
 		printk("%s: Memory squeeze, dropping packet.\n", dev->name);
 		fep->stats.rx_dropped++;
 	} else {
-		skb->dev = dev;
 		skb_put(skb,pkt_len-4);	/* Make room */
 		eth_copy_and_sum(skb, data, pkt_len-4, 0);
 		skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/fec_8xx/fec_main.c b/drivers/net/fec_8xx/fec_main.c
index 77f747a5afa..e824d5d231a 100644
--- a/drivers/net/fec_8xx/fec_main.c
+++ b/drivers/net/fec_8xx/fec_main.c
@@ -551,7 +551,9 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(pkt_len + 2);
 				if (skbn != NULL) {
 					skb_reserve(skbn, 2);	/* align IP header */
-					memcpy(skbn->data, skb->data, pkt_len);
+					skb_copy_from_linear_data(skb
+								  skbn->data,
+								  pkt_len);
 					/* swap */
 					skbt = skb;
 					skb = skbn;
@@ -561,7 +563,6 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(ENET_RX_FRSIZE);
 
 			if (skbn != NULL) {
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				skb->protocol = eth_type_trans(skb, dev);
 				received++;
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index d04214e4e58..7a018027fcc 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1385,11 +1385,12 @@ static int nv_alloc_rx(struct net_device *dev)
 	while (np->put_rx.orig != less_rx) {
 		struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
-			skb->dev = dev;
 			np->put_rx_ctx->skb = skb;
-			np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
-							     skb->end-skb->data, PCI_DMA_FROMDEVICE);
-			np->put_rx_ctx->dma_len = skb->end-skb->data;
+			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+							     skb->data,
+							     skb_tailroom(skb),
+							     PCI_DMA_FROMDEVICE);
+			np->put_rx_ctx->dma_len = skb_tailroom(skb);
 			np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
 			wmb();
 			np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
@@ -1416,11 +1417,12 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 	while (np->put_rx.ex != less_rx) {
 		struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
-			skb->dev = dev;
 			np->put_rx_ctx->skb = skb;
-			np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
-							     skb->end-skb->data, PCI_DMA_FROMDEVICE);
-			np->put_rx_ctx->dma_len = skb->end-skb->data;
+			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+							     skb->data,
+							     skb_tailroom(skb),
+							     PCI_DMA_FROMDEVICE);
+			np->put_rx_ctx->dma_len = skb_tailroom(skb);
 			np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32;
 			np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
 			wmb();
@@ -1604,8 +1606,9 @@ static void nv_drain_rx(struct net_device *dev)
 		wmb();
 		if (np->rx_skb[i].skb) {
 			pci_unmap_single(np->pci_dev, np->rx_skb[i].dma,
-						np->rx_skb[i].skb->end-np->rx_skb[i].skb->data,
-						PCI_DMA_FROMDEVICE);
+					 (skb_end_pointer(np->rx_skb[i].skb) -
+					  np->rx_skb[i].skb->data),
+					 PCI_DMA_FROMDEVICE);
 			dev_kfree_skb(np->rx_skb[i].skb);
 			np->rx_skb[i].skb = NULL;
 		}
@@ -4376,11 +4379,12 @@ static int nv_loopback_test(struct net_device *dev)
 		ret = 0;
 		goto out;
 	}
+	test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
+				       skb_tailroom(tx_skb),
+				       PCI_DMA_FROMDEVICE);
 	pkt_data = skb_put(tx_skb, pkt_len);
 	for (i = 0; i < pkt_len; i++)
 		pkt_data[i] = (u8)(i & 0xff);
-	test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
-				       tx_skb->end-tx_skb->data, PCI_DMA_FROMDEVICE);
 
 	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
 		np->tx_ring.orig[0].buf = cpu_to_le32(test_dma_addr);
@@ -4437,7 +4441,7 @@ static int nv_loopback_test(struct net_device *dev)
 	}
 
 	pci_unmap_page(np->pci_dev, test_dma_addr,
-		       tx_skb->end-tx_skb->data,
+		       (skb_end_pointer(tx_skb) - tx_skb->data),
 		       PCI_DMA_TODEVICE);
 	dev_kfree_skb_any(tx_skb);
  out:
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 4a05c14bf7e..e2ddd617493 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -160,7 +160,8 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(pkt_len + 2);
 				if (skbn != NULL) {
 					skb_reserve(skbn, 2);	/* align IP header */
-					memcpy(skbn->data, skb->data, pkt_len);
+					skb_copy_from_linear_data(skb,
+						      skbn->data, pkt_len);
 					/* swap */
 					skbt = skb;
 					skb = skbn;
@@ -170,7 +171,6 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(ENET_RX_FRSIZE);
 
 			if (skbn != NULL) {
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				skb->protocol = eth_type_trans(skb, dev);
 				received++;
@@ -294,7 +294,8 @@ static int fs_enet_rx_non_napi(struct net_device *dev)
 				skbn = dev_alloc_skb(pkt_len + 2);
 				if (skbn != NULL) {
 					skb_reserve(skbn, 2);	/* align IP header */
-					memcpy(skbn->data, skb->data, pkt_len);
+					skb_copy_from_linear_data(skb,
+						      skbn->data, pkt_len);
 					/* swap */
 					skbt = skb;
 					skb = skbn;
@@ -304,7 +305,6 @@ static int fs_enet_rx_non_napi(struct net_device *dev)
 				skbn = dev_alloc_skb(ENET_RX_FRSIZE);
 
 			if (skbn != NULL) {
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				skb->protocol = eth_type_trans(skb, dev);
 				received++;
@@ -516,7 +516,6 @@ void fs_init_bds(struct net_device *dev)
 			break;
 		}
 		fep->rx_skbuff[i] = skb;
-		skb->dev = dev;
 		CBDW_BUFADDR(bdp,
 			dma_map_single(fep->dev, skb->data,
 				L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index d981d4c41dd..b666a0cc064 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -942,18 +942,18 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
 
 	/* Tell the controller what the protocol is */
 	/* And provide the already calculated phcs */
-	if (skb->nh.iph->protocol == IPPROTO_UDP) {
+	if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
 		flags |= TXFCB_UDP;
-		fcb->phcs = skb->h.uh->check;
+		fcb->phcs = udp_hdr(skb)->check;
 	} else
-		fcb->phcs = skb->h.th->check;
+		fcb->phcs = udp_hdr(skb)->check;
 
 	/* l3os is the distance between the start of the
 	 * frame (skb->data) and the start of the IP hdr.
 	 * l4os is the distance between the start of the
 	 * l3 hdr and the l4 hdr */
-	fcb->l3os = (u16)(skb->nh.raw - skb->data - GMAC_FCB_LEN);
-	fcb->l4os = (u16)(skb->h.raw - skb->nh.raw);
+	fcb->l3os = (u16)(skb_network_offset(skb) - GMAC_FCB_LEN);
+	fcb->l4os = skb_network_header_len(skb);
 
 	fcb->flags = flags;
 }
@@ -1295,8 +1295,6 @@ struct sk_buff * gfar_new_skb(struct net_device *dev, struct rxbd8 *bdp)
 	 */
 	skb_reserve(skb, alignamount);
 
-	skb->dev = dev;
-
 	bdp->bufPtr = dma_map_single(NULL, skb->data,
 			priv->rx_buffer_size, DMA_FROM_DEVICE);
 
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index c3c0d67fc38..2521b111b3a 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1568,7 +1568,6 @@ static int hamachi_rx(struct net_device *dev)
 				printk(KERN_ERR "%s: rx_copybreak non-zero "
 				  "not good with RX_CHECKSUM\n", dev->name);
 #endif
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(hmp->pci_dev,
 							    hmp->rx_ring[entry].addr,
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index d2542697e29..656f2789c9b 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -282,7 +282,7 @@ static int bpq_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	skb->protocol = ax25_type_trans(skb, dev);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	dev->hard_header(skb, dev, ETH_P_BPQ, bpq->dest_addr, NULL, 0);
 	bpq->stats.tx_packets++;
 	bpq->stats.tx_bytes+=skb->len;
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 0fbb414b5a4..3be8c504759 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -930,7 +930,7 @@ static int scc_send_packet(struct sk_buff *skb, struct net_device *dev)
 
 	/* Transfer data to DMA buffer */
 	i = priv->tx_head;
-	memcpy(priv->tx_buf[i], skb->data + 1, skb->len - 1);
+	skb_copy_from_linear_data_offset(skb, 1, priv->tx_buf[i], skb->len - 1);
 	priv->tx_len[i] = skb->len - 1;
 
 	/* Clear interrupts while we touch our circular buffers */
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index f5a17ad9d3d..b33adc6a340 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -317,7 +317,9 @@ void hdlcdrv_transmitter(struct net_device *dev, struct hdlcdrv_state *s)
 				dev_kfree_skb_irq(skb);
 				break;
 			}
-			memcpy(s->hdlctx.buffer, skb->data+1, pkt_len);
+			skb_copy_from_linear_data_offset(skb, 1,
+							 s->hdlctx.buffer,
+							 pkt_len);
 			dev_kfree_skb_irq(skb);
 			s->hdlctx.bp = s->hdlctx.buffer;
 			append_crc_ccitt(s->hdlctx.buffer, pkt_len);
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index ee3ea4fa729..467559debfd 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -638,7 +638,9 @@ static void yam_tx_byte(struct net_device *dev, struct yam_port *yp)
         			dev_kfree_skb_any(skb);
 				break;
 			}
-			memcpy(yp->tx_buf, skb->data + 1, yp->tx_len);
+			skb_copy_from_linear_data_offset(skb, 1,
+							 yp->tx_buf,
+							 yp->tx_len);
 			dev_kfree_skb_any(skb);
 			yp->tx_count = 0;
 			yp->tx_crcl = 0x21;
diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c
index 7dc5185aa2c..8118a6750b6 100644
--- a/drivers/net/hp100.c
+++ b/drivers/net/hp100.c
@@ -1816,7 +1816,6 @@ static void hp100_rx(struct net_device *dev)
 			u_char *ptr;
 
 			skb_reserve(skb,2);
-			skb->dev = dev;
 
 			/* ptr to start of the sk_buff data area */
 			skb_put(skb, pkt_len);
diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c
index dd8ad874682..3d82d46f499 100644
--- a/drivers/net/ibm_emac/ibm_emac_core.c
+++ b/drivers/net/ibm_emac/ibm_emac_core.c
@@ -1338,7 +1338,7 @@ static inline int emac_rx_sg_append(struct ocp_enet_private *dev, int slot)
 			dev_kfree_skb(dev->rx_sg_skb);
 			dev->rx_sg_skb = NULL;
 		} else {
-			cacheable_memcpy(dev->rx_sg_skb->tail,
+			cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb),
 					 dev->rx_skb[slot]->data, len);
 			skb_put(dev->rx_sg_skb, len);
 			emac_recycle_rx_skb(dev, slot, len);
@@ -1398,7 +1398,6 @@ static int emac_poll_rx(void *param, int budget)
 
 		skb_put(skb, len);
 	      push_packet:
-		skb->dev = dev->ndev;
 		skb->protocol = eth_type_trans(skb, dev->ndev);
 		emac_rx_csum(dev, skb, ctrl);
 
diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c
index 3f946c81151..fe85d6fcba3 100644
--- a/drivers/net/ibmlana.c
+++ b/drivers/net/ibmlana.c
@@ -601,7 +601,6 @@ static void irqrx_handler(struct net_device *dev)
 
 				/* set up skb fields */
 
-				skb->dev = dev;
 				skb->protocol = eth_type_trans(skb, dev);
 				skb->ip_summed = CHECKSUM_NONE;
 
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 458db0538a9..0573fcfcb2c 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -798,7 +798,6 @@ static int ibmveth_poll(struct net_device *netdev, int *budget)
 
 				skb_reserve(skb, offset);
 				skb_put(skb, length);
-				skb->dev = netdev;
 				skb->protocol = eth_type_trans(skb, netdev);
 
 				netif_receive_skb(skb);	/* send it up */
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 4ad780719a8..f749e07c642 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -633,8 +633,6 @@ static inline void ioc3_rx(struct ioc3_private *ip)
 
 			ip->rx_skbs[rx_entry] = NULL;	/* Poison  */
 
-			new_skb->dev = priv_netdev(ip);
-
 			/* Because we reserve afterwards. */
 			skb_put(new_skb, (1664 + RX_OFFSET));
 			rxb = (struct ioc3_erxbuf *) new_skb->data;
@@ -940,7 +938,6 @@ static void ioc3_alloc_rings(struct net_device *dev)
 			}
 
 			ip->rx_skbs[i] = skb;
-			skb->dev = dev;
 
 			/* Because we reserve afterwards. */
 			skb_put(skb, (1664 + RX_OFFSET));
@@ -1396,9 +1393,9 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * manually.
 	 */
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		int proto = ntohs(skb->nh.iph->protocol);
+		const struct iphdr *ih = ip_hdr(skb);
+		const int proto = ntohs(ih->protocol);
 		unsigned int csoff;
-		struct iphdr *ih = skb->nh.iph;
 		uint32_t csum, ehsum;
 		uint16_t *eh;
 
@@ -1425,11 +1422,11 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		csoff = ETH_HLEN + (ih->ihl << 2);
 		if (proto == IPPROTO_UDP) {
 			csoff += offsetof(struct udphdr, check);
-			skb->h.uh->check = csum;
+			udp_hdr(skb)->check = csum;
 		}
 		if (proto == IPPROTO_TCP) {
 			csoff += offsetof(struct tcphdr, check);
-			skb->h.th->check = csum;
+			tcp_hdr(skb)->check = csum;
 		}
 
 		w0 = ETXD_DOCHECKSUM | (csoff << ETXD_CHKOFF_SHIFT);
@@ -1446,7 +1443,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (len <= 104) {
 		/* Short packet, let's copy it directly into the ring.  */
-		memcpy(desc->data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, desc->data, skb->len);
 		if (len < ETH_ZLEN) {
 			/* Very short packet, pad with zeros at the end. */
 			memset(desc->data + len, 0, ETH_ZLEN - len);
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index cebf8c374bc..f9c889c0dd0 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1472,9 +1472,8 @@ static int ali_ircc_fir_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	self->stats.tx_bytes += skb->len;
 
-	memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data, 
-	       skb->len);
-	
+	skb_copy_from_linear_data(skb, self->tx_fifo.queue[self->tx_fifo.free].start,
+		      skb->len);
 	self->tx_fifo.len++;
 	self->tx_fifo.free++;
 
@@ -1924,7 +1923,7 @@ static int  ali_ircc_dma_receive_complete(struct ali_ircc_cb *self)
 			
 			/* Copy frame without CRC, CRC is removed by hardware*/
 			skb_put(skb, len);
-			memcpy(skb->data, self->rx_buff.data, len);
+			skb_copy_to_linear_data(skb, self->rx_buff.data, len);
 
 			/* Move to next frame */
 			self->rx_buff.data += len;
@@ -1932,7 +1931,7 @@ static int  ali_ircc_dma_receive_complete(struct ali_ircc_cb *self)
 			self->stats.rx_packets++;
 
 			skb->dev = self->netdev;
-			skb->mac.raw  = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			self->netdev->last_rx = jiffies;
diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c
index 37914dc5b90..4dbdfaaf37b 100644
--- a/drivers/net/irda/au1k_ir.c
+++ b/drivers/net/irda/au1k_ir.c
@@ -526,7 +526,7 @@ static int au1k_irda_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 	
 	if (aup->speed == 4000000) {
 		/* FIR */
-		memcpy((void *)pDB->vaddr, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, pDB->vaddr, skb->len);
 		ptxd->count_0 = skb->len & 0xff;
 		ptxd->count_1 = (skb->len >> 8) & 0xff;
 
@@ -604,9 +604,9 @@ static int au1k_irda_rx(struct net_device *dev)
 				skb_put(skb, count);
 			else
 				skb_put(skb, count-2);
-			memcpy(skb->data, (void *)pDB->vaddr, count-2);
+			skb_copy_to_linear_data(skb, pDB->vaddr, count - 2);
 			skb->dev = dev;
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			prxd->count_0 = 0;
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 11af0ae7510..3ca47bf6dfe 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1119,7 +1119,7 @@ dumpbufs(skb->data,skb->len,'>');
   else
     {
       len = skb->len;
-      memcpy (self->tx_bufs[self->txs], skb->data, len);
+      skb_copy_from_linear_data(skb, self->tx_bufs[self->txs], len);
     }
   self->ring->tx[self->txs].len = len & 0x0fff;
 
@@ -1282,11 +1282,11 @@ dumpbufs(self->rx_bufs[self->rxs],len,'<');
                       skb_reserve (skb, 1);
 
                       skb_put (skb, len);
-                      memcpy (skb->data, self->rx_bufs[self->rxs], len);
-
+                      skb_copy_to_linear_data(skb, self->rx_bufs[self->rxs],
+					      len);
                       self->stats.rx_packets++;
                       skb->dev = self->netdev;
-                      skb->mac.raw = skb->data;
+                      skb_reset_mac_header(skb);
                       skb->protocol = htons (ETH_P_IRDA);
                     }
                   else
diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
index 1d510bdc9b8..0ac240ca905 100644
--- a/drivers/net/irda/irda-usb.c
+++ b/drivers/net/irda/irda-usb.c
@@ -441,7 +441,7 @@ static int irda_usb_hard_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto drop;
 	}
 
-	memcpy(self->tx_buff + self->header_length, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, self->tx_buff + self->header_length, skb->len);
 
 	/* Change setting for next frame */
 	if (self->capability & IUC_STIR421X) {
@@ -902,7 +902,7 @@ static void irda_usb_receive(struct urb *urb)
 
 	if(docopy) {
 		/* Copy packet, so we can recycle the original */
-		memcpy(newskb->data, skb->data, urb->actual_length);
+		skb_copy_from_linear_data(skb, newskb->data, urb->actual_length);
 		/* Deliver this new skb */
 		dataskb = newskb;
 		/* And hook the old skb to the URB
@@ -921,7 +921,7 @@ static void irda_usb_receive(struct urb *urb)
 
 	/* Ask the networking layer to queue the packet for the IrDA stack */
 	dataskb->dev = self->netdev;
-	dataskb->mac.raw  = dataskb->data;
+	skb_reset_mac_header(dataskb);
 	dataskb->protocol = htons(ETH_P_IRDA);
 	len = dataskb->len;
 	netif_rx(dataskb);
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index f0c61f3b2a8..0de867288a4 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -200,14 +200,14 @@ static inline int mcs_setup_transceiver_vishay(struct mcs_cb *mcs)
 /* Setup a communication between mcs7780 and agilent chip. */
 static inline int mcs_setup_transceiver_agilent(struct mcs_cb *mcs)
 {
-	IRDA_WARNING("This transceiver type is not supported yet.");
+	IRDA_WARNING("This transceiver type is not supported yet.\n");
 	return 1;
 }
 
 /* Setup a communication between mcs7780 and sharp chip. */
 static inline int mcs_setup_transceiver_sharp(struct mcs_cb *mcs)
 {
-	IRDA_WARNING("This transceiver type is not supported yet.");
+	IRDA_WARNING("This transceiver type is not supported yet.\n");
 	return 1;
 }
 
@@ -279,7 +279,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs)
 		break;
 
 	default:
-		IRDA_WARNING("Unknown transceiver type: %d",
+		IRDA_WARNING("Unknown transceiver type: %d\n",
 			     mcs->transceiver_type);
 		ret = 1;
 	}
@@ -318,7 +318,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs)
 		return ret;
 
 error:
-	IRDA_ERROR("%s", msg);
+	IRDA_ERROR("%s\n", msg);
 	return ret;
 }
 
@@ -353,7 +353,7 @@ static unsigned mcs_wrap_fir_skb(const struct sk_buff *skb, __u8 *buf)
 	buf[0] = len & 0xff;
 	buf[1] = (len >> 8) & 0xff;
 	/* copy the data into the tx buffer. */
-	memcpy(buf+2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, buf + 2, skb->len);
 	/* put the fcs in the last four bytes in little endian order. */
 	buf[len - 4] = fcs & 0xff;
 	buf[len - 3] = (fcs >> 8) & 0xff;
@@ -377,7 +377,7 @@ static unsigned mcs_wrap_mir_skb(const struct sk_buff *skb, __u8 *buf)
 	buf[0] = len & 0xff;
 	buf[1] = (len >> 8) & 0xff;
 	/* copy the data */
-	memcpy(buf+2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, buf + 2, skb->len);
 	/* put the fcs in last two bytes in little endian order. */
 	buf[len - 2] = fcs & 0xff;
 	buf[len - 1] = (fcs >> 8) & 0xff;
@@ -426,9 +426,9 @@ static void mcs_unwrap_mir(struct mcs_cb *mcs, __u8 *buf, int len)
 	}
 
 	skb_reserve(skb, 1);
-	memcpy(skb->data, buf, new_len);
+	skb_copy_to_linear_data(skb, buf, new_len);
 	skb_put(skb, new_len);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->dev = mcs->netdev;
 
@@ -479,9 +479,9 @@ static void mcs_unwrap_fir(struct mcs_cb *mcs, __u8 *buf, int len)
 	}
 
 	skb_reserve(skb, 1);
-	memcpy(skb->data, buf, new_len);
+	skb_copy_to_linear_data(skb, buf, new_len);
 	skb_put(skb, new_len);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->dev = mcs->netdev;
 
@@ -587,7 +587,7 @@ static int mcs_speed_change(struct mcs_cb *mcs)
 	} while(cnt++ < 100 && (rval & MCS_IRINTX));
 
 	if(cnt >= 100) {
-		IRDA_ERROR("unable to change speed");
+		IRDA_ERROR("unable to change speed\n");
 		ret = -EIO;
 		goto error;
 	}
@@ -638,7 +638,7 @@ static int mcs_speed_change(struct mcs_cb *mcs)
 
 		default:
 			ret = 1;
-			IRDA_WARNING("Unknown transceiver type: %d",
+			IRDA_WARNING("Unknown transceiver type: %d\n",
 				     mcs->transceiver_type);
 		}
 	if (unlikely(ret))
@@ -733,7 +733,7 @@ static int mcs_net_open(struct net_device *netdev)
 	sprintf(hwname, "usb#%d", mcs->usbdev->devnum);
 	mcs->irlap = irlap_open(netdev, &mcs->qos, hwname);
 	if (!mcs->irlap) {
-		IRDA_ERROR("mcs7780: irlap_open failed");
+		IRDA_ERROR("mcs7780: irlap_open failed\n");
 		goto error2;
 	}
 
@@ -862,7 +862,7 @@ static int mcs_hard_xmit(struct sk_buff *skb, struct net_device *ndev)
 			  mcs->out_buf, wraplen, mcs_send_irq, mcs);
 
 	if ((ret = usb_submit_urb(mcs->tx_urb, GFP_ATOMIC))) {
-		IRDA_ERROR("failed tx_urb: %d", ret);
+		IRDA_ERROR("failed tx_urb: %d\n", ret);
 		switch (ret) {
 		case -ENODEV:
 		case -EPIPE:
@@ -897,7 +897,7 @@ static int mcs_probe(struct usb_interface *intf,
 	if (!ndev)
 		goto error1;
 
-	IRDA_DEBUG(1, "MCS7780 USB-IrDA bridge found at %d.", udev->devnum);
+	IRDA_DEBUG(1, "MCS7780 USB-IrDA bridge found at %d.\n", udev->devnum);
 
 	/* what is it realy for? */
 	SET_MODULE_OWNER(ndev);
@@ -905,7 +905,7 @@ static int mcs_probe(struct usb_interface *intf,
 
 	ret = usb_reset_configuration(udev);
 	if (ret != 0) {
-		IRDA_ERROR("mcs7780: usb reset configuration failed");
+		IRDA_ERROR("mcs7780: usb reset configuration failed\n");
 		goto error2;
 	}
 
@@ -950,7 +950,7 @@ static int mcs_probe(struct usb_interface *intf,
 	if (ret != 0)
 		goto error2;
 
-	IRDA_DEBUG(1, "IrDA: Registered MosChip MCS7780 device as %s",
+	IRDA_DEBUG(1, "IrDA: Registered MosChip MCS7780 device as %s\n",
 		   ndev->name);
 
 	mcs->transceiver_type = transceiver_type;
@@ -981,7 +981,7 @@ static void mcs_disconnect(struct usb_interface *intf)
 	free_netdev(mcs->netdev);
 
 	usb_set_intfdata(intf, NULL);
-	IRDA_DEBUG(0, "MCS7780 now disconnected.");
+	IRDA_DEBUG(0, "MCS7780 now disconnected.\n");
 }
 
 /* Module insertion */
@@ -992,7 +992,7 @@ static int __init mcs_init(void)
 	/* register this driver with the USB subsystem */
 	result = usb_register(&mcs_driver);
 	if (result)
-		IRDA_ERROR("usb_register failed. Error number %d", result);
+		IRDA_ERROR("usb_register failed. Error number %d\n", result);
 
 	return result;
 }
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 29b5ccd29d0..d96c89751a7 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -1466,9 +1466,8 @@ static int nsc_ircc_hard_xmit_fir(struct sk_buff *skb, struct net_device *dev)
 
 	self->stats.tx_bytes += skb->len;
 
-	memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data, 
-	       skb->len);
-	
+	skb_copy_from_linear_data(skb, self->tx_fifo.queue[self->tx_fifo.free].start,
+		      skb->len);
 	self->tx_fifo.len++;
 	self->tx_fifo.free++;
 
@@ -1869,10 +1868,14 @@ static int nsc_ircc_dma_receive_complete(struct nsc_ircc_cb *self, int iobase)
 			/* Copy frame without CRC */
 			if (self->io.speed < 4000000) {
 				skb_put(skb, len-2);
-				memcpy(skb->data, self->rx_buff.data, len-2);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 2);
 			} else {
 				skb_put(skb, len-4);
-				memcpy(skb->data, self->rx_buff.data, len-4);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 4);
 			}
 
 			/* Move to next frame */
@@ -1881,7 +1884,7 @@ static int nsc_ircc_dma_receive_complete(struct nsc_ircc_cb *self, int iobase)
 			self->stats.rx_packets++;
 
 			skb->dev = self->netdev;
-			skb->mac.raw  = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			self->netdev->last_rx = jiffies;
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index 2272156af31..fb196fd9185 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -386,12 +386,12 @@ static void pxa_irda_fir_irq_eif(struct pxa_irda *si, struct net_device *dev, in
 
 		/* Align IP header to 20 bytes  */
 		skb_reserve(skb, 1);
-		memcpy(skb->data, si->dma_rx_buff, len);
+		skb_copy_to_linear_data(skb, si->dma_rx_buff, len);
 		skb_put(skb, len);
 
 		/* Feed it to IrLAP  */
 		skb->dev = dev;
-		skb->mac.raw  = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		netif_rx(skb);
 
@@ -484,7 +484,7 @@ static int pxa_irda_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 		unsigned long mtt = irda_get_mtt(skb);
 
 		si->dma_tx_buff_len = skb->len;
-		memcpy(si->dma_tx_buff, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, si->dma_tx_buff, skb->len);
 
 		if (mtt)
 			while ((unsigned)(OSCR - si->last_oscr)/4 < mtt)
diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/net/irda/sa1100_ir.c
index 937372d0039..056639f72be 100644
--- a/drivers/net/irda/sa1100_ir.c
+++ b/drivers/net/irda/sa1100_ir.c
@@ -504,7 +504,7 @@ static void sa1100_irda_fir_error(struct sa1100_irda *si, struct net_device *dev
 
 		skb_put(skb, len);
 		skb->dev = dev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		si->stats.rx_packets++;
 		si->stats.rx_bytes += len;
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index 31c623381ea..198bf3bfa70 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -315,6 +315,7 @@ static struct smsc_chip __initdata lpc_chips_flat[] =
 {
 	/* Base address 0x2E or 0x4E */
 	{ "47N227",	KEY55_1|FIR|SERx4,	0x5a, 0x00 },
+	{ "47N227",	KEY55_1|FIR|SERx4,	0x7a, 0x00 },
 	{ "47N267",	KEY55_1|FIR|SERx4,	0x5e, 0x00 },
 	{ NULL }
 };
@@ -1161,7 +1162,7 @@ static int smsc_ircc_hard_xmit_fir(struct sk_buff *skb, struct net_device *dev)
 		self->new_speed = speed;
 	}
 
-	memcpy(self->tx_buff.head, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, self->tx_buff.head, skb->len);
 
 	self->tx_buff.len = skb->len;
 	self->tx_buff.data = self->tx_buff.head;
@@ -1412,7 +1413,7 @@ static void smsc_ircc_dma_receive_complete(struct smsc_ircc_cb *self)
 	self->stats.rx_bytes += len;
 
 	skb->dev = self->netdev;
-	skb->mac.raw  = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	netif_rx(skb);
 }
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index 20d306fea4c..755aa444a4d 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -52,7 +52,6 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <net/irda/irda.h>
-#include <net/irda/irlap.h>
 #include <net/irda/irda_device.h>
 #include <net/irda/wrapper.h>
 #include <net/irda/crc.h>
@@ -349,7 +348,7 @@ static void fir_eof(struct stir_cb *stir)
 		}
 		skb_reserve(nskb, 1);
 		skb = nskb;
-		memcpy(nskb->data, rx_buff->data, len);
+		skb_copy_to_linear_data(nskb, rx_buff->data, len);
 	} else {
 		nskb = dev_alloc_skb(rx_buff->truesize);
 		if (unlikely(!nskb)) {
@@ -364,7 +363,7 @@ static void fir_eof(struct stir_cb *stir)
 
 	skb_put(skb, len);
 
-	skb->mac.raw  = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->dev = stir->netdev;
 
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index c3ed9b3067e..ff5358574d0 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -925,8 +925,8 @@ static int via_ircc_hard_xmit_fir(struct sk_buff *skb,
 
 	self->tx_fifo.tail += skb->len;
 	self->stats.tx_bytes += skb->len;
-	memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data,
-	       skb->len);
+	skb_copy_from_linear_data(skb,
+		      self->tx_fifo.queue[self->tx_fifo.free].start, skb->len);
 	self->tx_fifo.len++;
 	self->tx_fifo.free++;
 //F01   if (self->tx_fifo.len == 1) {
@@ -1125,7 +1125,7 @@ static int via_ircc_dma_receive_complete(struct via_ircc_cb *self,
 		self->stats.rx_bytes += len;
 		self->stats.rx_packets++;
 		skb->dev = self->netdev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		netif_rx(skb);
 		return TRUE;
@@ -1189,7 +1189,7 @@ F01_E */
 		skb_reserve(skb, 1);
 		skb_put(skb, len - 4);
 
-		memcpy(skb->data, self->rx_buff.data, len - 4);
+		skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4);
 		IRDA_DEBUG(2, "%s(): len=%x.rx_buff=%p\n", __FUNCTION__,
 			   len - 4, self->rx_buff.data);
 
@@ -1198,7 +1198,7 @@ F01_E */
 		self->stats.rx_bytes += len;
 		self->stats.rx_packets++;
 		skb->dev = self->netdev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		netif_rx(skb);
 
@@ -1234,7 +1234,7 @@ static int upload_rxdata(struct via_ircc_cb *self, int iobase)
 	}
 	skb_reserve(skb, 1);
 	skb_put(skb, len - 4 + 1);
-	memcpy(skb->data, self->rx_buff.data, len - 4 + 1);
+	skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4 + 1);
 	st_fifo->tail++;
 	st_fifo->len++;
 	if (st_fifo->tail > MAX_RX_WINDOW)
@@ -1244,7 +1244,7 @@ static int upload_rxdata(struct via_ircc_cb *self, int iobase)
 	self->stats.rx_bytes += len;
 	self->stats.rx_packets++;
 	skb->dev = self->netdev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	netif_rx(skb);
 	if (st_fifo->len < (MAX_RX_WINDOW + 2)) {
@@ -1303,7 +1303,7 @@ static int RxTimerHandler(struct via_ircc_cb *self, int iobase)
 			}
 			skb_reserve(skb, 1);
 			skb_put(skb, len - 4);
-			memcpy(skb->data, self->rx_buff.data, len - 4);
+			skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4);
 
 			IRDA_DEBUG(2, "%s(): len=%x.head=%x\n", __FUNCTION__,
 				   len - 4, st_fifo->head);
@@ -1313,7 +1313,7 @@ static int RxTimerHandler(struct via_ircc_cb *self, int iobase)
 			self->stats.rx_bytes += len;
 			self->stats.rx_packets++;
 			skb->dev = self->netdev;
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 		}		//while
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index 3457e9d8b66..c4be973867a 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -595,7 +595,7 @@ static int vlsi_process_rx(struct vlsi_ring *r, struct ring_descr *rd)
 	rd->skb = NULL;
 	skb->dev = ndev;
 	memcpy(skb_put(skb,len), rd->buf, len);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	if (in_interrupt())
 		netif_rx(skb);
 	else
@@ -993,7 +993,7 @@ static int vlsi_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 			goto drop;
 		}
 		else
-			memcpy(rd->buf, skb->data, len);
+			skb_copy_from_linear_data(skb, rd->buf, len);
 	}
 
 	rd->skb = skb;			/* remember skb for tx-complete stats */
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 4212657fa4f..5182e800cc1 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -529,7 +529,7 @@ int w83977af_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Decide if we should use PIO or DMA transfer */
 	if (self->io.speed > PIO_MAX_SPEED) {
 		self->tx_buff.data = self->tx_buff.head;
-		memcpy(self->tx_buff.data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, self->tx_buff.data, skb->len);
 		self->tx_buff.len = skb->len;
 		
 		mtt = irda_get_mtt(skb);
@@ -908,10 +908,14 @@ int w83977af_dma_receive_complete(struct w83977af_ir *self)
 			/* Copy frame without CRC */
 			if (self->io.speed < 4000000) {
 				skb_put(skb, len-2);
-				memcpy(skb->data, self->rx_buff.data, len-2);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 2);
 			} else {
 				skb_put(skb, len-4);
-				memcpy(skb->data, self->rx_buff.data, len-4);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 4);
 			}
 
 			/* Move to next frame */
@@ -919,7 +923,7 @@ int w83977af_dma_receive_complete(struct w83977af_ir *self)
 			self->stats.rx_packets++;
 			
 			skb->dev = self->netdev;
-			skb->mac.raw  = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			self->netdev->last_rx = jiffies;
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index 0e9ba3c3faf..347d50cd77d 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -1540,7 +1540,6 @@ static void veth_receive(struct veth_lpar_connection *cnx,
 		}
 
 		skb_put(skb, length);
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		skb->ip_summed = CHECKSUM_NONE;
 		netif_rx(skb);	/* send it up */
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index afc2ec72529..dfde80e54ae 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1182,24 +1182,27 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 
 	if (likely(skb_is_gso(skb))) {
 		struct ixgb_buffer *buffer_info;
+		struct iphdr *iph;
+
 		if (skb_header_cloned(skb)) {
 			err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 			if (err)
 				return err;
 		}
 
-		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		mss = skb_shinfo(skb)->gso_size;
-		skb->nh.iph->tot_len = 0;
-		skb->nh.iph->check = 0;
-		skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
-						      skb->nh.iph->daddr,
-						      0, IPPROTO_TCP, 0);
-		ipcss = skb->nh.raw - skb->data;
-		ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
-		ipcse = skb->h.raw - skb->data - 1;
-		tucss = skb->h.raw - skb->data;
-		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
+		iph = ip_hdr(skb);
+		iph->tot_len = 0;
+		iph->check = 0;
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+							 iph->daddr, 0,
+							 IPPROTO_TCP, 0);
+		ipcss = skb_network_offset(skb);
+		ipcso = (void *)&(iph->check) - (void *)skb->data;
+		ipcse = skb_transport_offset(skb) - 1;
+		tucss = skb_transport_offset(skb);
+		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
 		tucse = 0;
 
 		i = adapter->tx_ring.next_to_use;
@@ -1243,7 +1246,7 @@ ixgb_tx_csum(struct ixgb_adapter *adapter, struct sk_buff *skb)
 
 	if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		struct ixgb_buffer *buffer_info;
-		css = skb->h.raw - skb->data;
+		css = skb_transport_offset(skb);
 		cso = css + skb->csum_offset;
 
 		i = adapter->tx_ring.next_to_use;
@@ -2014,9 +2017,12 @@ ixgb_clean_rx_irq(struct ixgb_adapter *adapter)
 			    netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
 			if (new_skb) {
 				skb_reserve(new_skb, NET_IP_ALIGN);
-				memcpy(new_skb->data - NET_IP_ALIGN,
-				       skb->data - NET_IP_ALIGN,
-				       length + NET_IP_ALIGN);
+				skb_copy_to_linear_data_offset(new_skb,
+							       -NET_IP_ALIGN,
+							       (skb->data -
+							        NET_IP_ALIGN),
+							       (length +
+							        NET_IP_ALIGN));
 				/* save the skb in buffer_info as good */
 				buffer_info->skb = skb;
 				skb = new_skb;
diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c
index a4eccb11d67..6683afc02aa 100644
--- a/drivers/net/ixp2000/ixpdev.c
+++ b/drivers/net/ixp2000/ixpdev.c
@@ -110,11 +110,10 @@ static int ixpdev_rx(struct net_device *dev, int *budget)
 
 		skb = dev_alloc_skb(desc->pkt_length + 2);
 		if (likely(skb != NULL)) {
-			skb->dev = nds[desc->channel];
 			skb_reserve(skb, 2);
 			eth_copy_and_sum(skb, buf, desc->pkt_length, 0);
 			skb_put(skb, desc->pkt_length);
-			skb->protocol = eth_type_trans(skb, skb->dev);
+			skb->protocol = eth_type_trans(skb, nds[desc->channel]);
 
 			skb->dev->last_rx = jiffies;
 
diff --git a/drivers/net/lance.c b/drivers/net/lance.c
index a3843320dbe..0fe96c85828 100644
--- a/drivers/net/lance.c
+++ b/drivers/net/lance.c
@@ -988,7 +988,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (lance_debug > 5)
 			printk("%s: bouncing a high-memory packet (%#x).\n",
 				   dev->name, (u32)isa_virt_to_bus(skb->data));
-		memcpy(&lp->tx_bounce_buffs[entry], skb->data, skb->len);
+		skb_copy_from_linear_data(skb, &lp->tx_bounce_buffs[entry], skb->len);
 		lp->tx_ring[entry].base =
 			((u32)isa_virt_to_bus((lp->tx_bounce_buffs + entry)) & 0xffffff) | 0x83000000;
 		dev_kfree_skb(skb);
@@ -1184,7 +1184,6 @@ lance_rx(struct net_device *dev)
 					}
 					break;
 				}
-				skb->dev = dev;
 				skb_reserve(skb,2);	/* 16 byte align */
 				skb_put(skb,pkt_len);	/* Make room */
 				eth_copy_and_sum(skb,
diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c
index 452863d5d49..0edcd125fd6 100644
--- a/drivers/net/lasi_82596.c
+++ b/drivers/net/lasi_82596.c
@@ -801,7 +801,6 @@ memory_squeeze:
 				lp->stats.rx_dropped++;
 			}
 			else {
-				skb->dev = dev;
 				if (!rx_in_place) {
 					/* 16 byte align the data fields */
 					dma_sync_single_for_cpu(lp->dev, (dma_addr_t)WSWAPchar(rbd->b_data), PKT_BUF_SZ, DMA_FROM_DEVICE);
diff --git a/drivers/net/lib8390.c b/drivers/net/lib8390.c
index e726c06b8dc..5c86e737f95 100644
--- a/drivers/net/lib8390.c
+++ b/drivers/net/lib8390.c
@@ -722,7 +722,6 @@ static void ei_receive(struct net_device *dev)
 			else
 			{
 				skb_reserve(skb,2);	/* IP headers on 16 byte boundaries */
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame));
 				skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 2b739fd584f..6ba6ed2b480 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -75,8 +75,9 @@ static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats);
 #ifdef LOOPBACK_TSO
 static void emulate_large_send_offload(struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
+	struct iphdr *iph = ip_hdr(skb);
+	struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) +
+					      (iph->ihl * 4));
 	unsigned int doffset = (iph->ihl + th->doff) * 4;
 	unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
 	unsigned int offset = 0;
@@ -90,10 +91,11 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		if (!nskb)
 			break;
 		skb_reserve(nskb, 32);
-		nskb->mac.raw = nskb->data - 14;
-		nskb->nh.raw = nskb->data;
-		iph = nskb->nh.iph;
-		memcpy(nskb->data, skb->nh.raw, doffset);
+		skb_set_mac_header(nskb, -ETH_HLEN);
+		skb_reset_network_header(nskb);
+		iph = ip_hdr(nskb);
+		skb_copy_to_linear_data(nskb, skb_network_header(skb),
+					doffset);
 		if (skb_copy_bits(skb,
 				  doffset + offset,
 				  nskb->data + doffset,
@@ -108,7 +110,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		nskb->pkt_type = skb->pkt_type;
 
-		th = (struct tcphdr*)(nskb->nh.raw + iph->ihl*4);
+		th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4);
 		iph->tot_len = htons(frag_size + doffset);
 		iph->id = htons(id);
 		iph->check = 0;
@@ -137,7 +139,6 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_orphan(skb);
 
 	skb->protocol = eth_type_trans(skb,dev);
-	skb->dev = dev;
 #ifndef LOOPBACK_MUST_CHECKSUM
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 #endif
@@ -145,7 +146,7 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 #ifdef LOOPBACK_TSO
 	if (skb_is_gso(skb)) {
 		BUG_ON(skb->protocol != htons(ETH_P_IP));
-		BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
+		BUG_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
 
 		emulate_large_send_offload(skb);
 		return 0;
@@ -163,11 +164,9 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static struct net_device_stats loopback_stats;
-
 static struct net_device_stats *get_stats(struct net_device *dev)
 {
-	struct net_device_stats *stats = &loopback_stats;
+	struct net_device_stats *stats = &dev->stats;
 	unsigned long bytes = 0;
 	unsigned long packets = 0;
 	int i;
@@ -207,7 +206,6 @@ static const struct ethtool_ops loopback_ethtool_ops = {
 struct net_device loopback_dev = {
 	.name	 		= "lo",
 	.get_stats		= &get_stats,
-	.priv			= &loopback_stats,
 	.mtu			= (16 * 1024) + 20 + 20 + 12,
 	.hard_start_xmit	= loopback_xmit,
 	.hard_header		= eth_header,
diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c
index 177c502f738..5fc18da1873 100644
--- a/drivers/net/lp486e.c
+++ b/drivers/net/lp486e.c
@@ -676,7 +676,6 @@ i596_rx_one(struct net_device *dev, struct i596_private *lp,
 			return 1;
 		}
 
-		skb->dev = dev;
 		memcpy(skb_put(skb,pkt_len), rfd->data, pkt_len);
 
 		skb->protocol = eth_type_trans(skb,dev);
diff --git a/drivers/net/mac89x0.c b/drivers/net/mac89x0.c
index e960138011c..90e695d5326 100644
--- a/drivers/net/mac89x0.c
+++ b/drivers/net/mac89x0.c
@@ -530,7 +530,6 @@ net_rx(struct net_device *dev)
 		return;
 	}
 	skb_put(skb, length);
-	skb->dev = dev;
 
 	memcpy_fromio(skb->data, dev->mem_start + PP_RxFrame, length);
 
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 2e9571bf073..0e04f7ac3f2 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -357,7 +357,6 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 	}
 
 	skb_reserve(skb, RX_OFFSET);
-	skb->dev = bp->dev;
 	skb->ip_summed = CHECKSUM_NONE;
 	skb_put(skb, len);
 
@@ -368,9 +367,10 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 			BUG_ON(frag != last_frag);
 			frag_len = len - offset;
 		}
-		memcpy(skb->data + offset,
-		       bp->rx_buffers + (RX_BUFFER_SIZE * frag),
-		       frag_len);
+		skb_copy_to_linear_data_offset(skb, offset,
+					       (bp->rx_buffers +
+					        (RX_BUFFER_SIZE * frag)),
+					       frag_len);
 		offset += RX_BUFFER_SIZE;
 		bp->rx_ring[frag].addr &= ~MACB_BIT(RX_USED);
 		wmb();
@@ -576,7 +576,8 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	int i;
 	dev_dbg(&bp->pdev->dev,
 		"start_xmit: len %u head %p data %p tail %p end %p\n",
-		skb->len, skb->head, skb->data, skb->tail, skb->end);
+		skb->len, skb->head, skb->data,
+		skb_tail_pointer(skb), skb_end_pointer(skb));
 	dev_dbg(&bp->pdev->dev,
 		"data:");
 	for (i = 0; i < 16; i++)
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index 9ec24f0d5d6..b3bd6239495 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -939,7 +939,6 @@ static irqreturn_t mace_rxdma_intr(int irq, void *dev_id)
 		else	/* Ethernet header; mace includes FCS */
 		    nb -= 8;
 		skb_put(skb, nb);
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		mp->stats.rx_bytes += skb->len;
 		netif_rx(skb);
diff --git a/drivers/net/macmace.c b/drivers/net/macmace.c
index 5d541e87304..27911c07558 100644
--- a/drivers/net/macmace.c
+++ b/drivers/net/macmace.c
@@ -420,8 +420,7 @@ static int mace_xmit_start(struct sk_buff *skb, struct net_device *dev)
 	mp->stats.tx_bytes += skb->len;
 
 	/* We need to copy into our xmit buffer to take care of alignment and caching issues */
-
-	memcpy((void *) mp->tx_ring, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, mp->tx_ring, skb->len);
 
 	/* load the Tx DMA and fire it off */
 
@@ -621,7 +620,6 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
 	skb_reserve(skb,2);
 	memcpy(skb_put(skb, mf->len), mf->data, mf->len);
 
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	netif_rx(skb);
 	dev->last_rx = jiffies;
diff --git a/drivers/net/meth.c b/drivers/net/meth.c
index 7e69ca6edd9..0343ea12b29 100644
--- a/drivers/net/meth.c
+++ b/drivers/net/meth.c
@@ -421,7 +421,6 @@ static void meth_rx(struct net_device* dev, unsigned long int_status)
 					/* Write metadata, and then pass to the receive level */
 					skb_put(skb_c, len);
 					priv->rx_skbs[priv->rx_write] = skb;
-					skb_c->dev = dev;
 					skb_c->protocol = eth_type_trans(skb_c, dev);
 					dev->last_rx = jiffies;
 					priv->stats.rx_packets++;
@@ -609,7 +608,7 @@ static void meth_tx_short_prepare(struct meth_private *priv,
 
 	desc->header.raw = METH_TX_CMD_INT_EN | (len-1) | ((128-len) << 16);
 	/* maybe I should set whole thing to 0 first... */
-	memcpy(desc->data.dt + (120 - len), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, desc->data.dt + (120 - len), skb->len);
 	if (skb->len < len)
 		memset(desc->data.dt + 120 - len + skb->len, 0, len-skb->len);
 }
@@ -627,8 +626,8 @@ static void meth_tx_1page_prepare(struct meth_private *priv,
 
 	/* unaligned part */
 	if (unaligned_len) {
-		memcpy(desc->data.dt + (120 - unaligned_len),
-		       skb->data, unaligned_len);
+		skb_copy_from_linear_data(skb, desc->data.dt + (120 - unaligned_len),
+			      unaligned_len);
 		desc->header.raw |= (128 - unaligned_len) << 16;
 	}
 
@@ -653,8 +652,8 @@ static void meth_tx_2page_prepare(struct meth_private *priv,
 	desc->header.raw = METH_TX_CMD_INT_EN | TX_CATBUF1 | TX_CATBUF2| (skb->len - 1);
 	/* unaligned part */
 	if (unaligned_len){
-		memcpy(desc->data.dt + (120 - unaligned_len),
-		       skb->data, unaligned_len);
+		skb_copy_from_linear_data(skb, desc->data.dt + (120 - unaligned_len),
+			      unaligned_len);
 		desc->header.raw |= (128 - unaligned_len) << 16;
 	}
 
diff --git a/drivers/net/mipsnet.c b/drivers/net/mipsnet.c
index f42b9e20193..403f63afd20 100644
--- a/drivers/net/mipsnet.c
+++ b/drivers/net/mipsnet.c
@@ -101,7 +101,6 @@ static inline ssize_t mipsnet_get_fromdev(struct net_device *dev, size_t count)
 	if (ioiocpy_frommipsnet(dev, skb_put(skb, len), len))
 		return -EFAULT;
 
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 8015a7c5b0c..ab15ecd4b3d 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -434,7 +434,6 @@ static int mv643xx_eth_receive_queue(struct net_device *dev, int budget)
 			 * received packet
 			 */
 			skb_put(skb, pkt_info.byte_cnt - 4);
-			skb->dev = dev;
 
 			if (pkt_info.cmd_sts & ETH_LAYER_4_CHECKSUM_OK) {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -1162,15 +1161,15 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp,
 
 		cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM |
 			   ETH_GEN_IP_V_4_CHECKSUM  |
-			   skb->nh.iph->ihl << ETH_TX_IHL_SHIFT;
+			   ip_hdr(skb)->ihl << ETH_TX_IHL_SHIFT;
 
-		switch (skb->nh.iph->protocol) {
+		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_UDP:
 			cmd_sts |= ETH_UDP_FRAME;
-			desc->l4i_chk = skb->h.uh->check;
+			desc->l4i_chk = udp_hdr(skb)->check;
 			break;
 		case IPPROTO_TCP:
-			desc->l4i_chk = skb->h.th->check;
+			desc->l4i_chk = tcp_hdr(skb)->check;
 			break;
 		default:
 			BUG();
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index f8efe0e70a6..16e3c4315e8 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -879,7 +879,7 @@ myri10ge_rx_skb_build(struct sk_buff *skb, u8 * va,
 	 * skb_pull() (for ether_pad and eth_type_trans()) requires
 	 * the beginning of the packet in skb_headlen(), move it
 	 * manually */
-	memcpy(skb->data, va, hlen);
+	skb_copy_to_linear_data(skb, va, hlen);
 	skb_shinfo(skb)->frags[0].page_offset += hlen;
 	skb_shinfo(skb)->frags[0].size -= hlen;
 	skb->data_len -= hlen;
@@ -1020,7 +1020,6 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 		skb_shinfo(skb)->nr_frags = 0;
 	}
 	skb->protocol = eth_type_trans(skb, dev);
-	skb->dev = dev;
 
 	if (mgp->csum_flag) {
 		if ((skb->protocol == htons(ETH_P_IP)) ||
@@ -2030,7 +2029,7 @@ again:
 	odd_flag = 0;
 	flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		cksum_offset = (skb->h.raw - skb->data);
+		cksum_offset = skb_transport_offset(skb);
 		pseudo_hdr_offset = cksum_offset + skb->csum_offset;
 		/* If the headers are excessively large, then we must
 		 * fall back to a software checksum */
@@ -2055,7 +2054,7 @@ again:
 		 * send loop that we are still in the
 		 * header portion of the TSO packet.
 		 * TSO header must be at most 134 bytes long */
-		cum_len = -((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb));
 
 		/* for TSO, pseudo_hdr_offset holds mss.
 		 * The firmware figures out where to put
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index ee26ef52289..13444da9327 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -368,7 +368,7 @@ static __be16 myri_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = (((unsigned char *)skb->data) + MYRI_PAD_LEN);
+	skb_set_mac_header(skb, MYRI_PAD_LEN);
 	skb_pull(skb, dev->hard_header_len);
 	eth = eth_hdr(skb);
 
@@ -502,7 +502,7 @@ static void myri_rx(struct myri_eth *mp, struct net_device *dev)
 			copy_skb->dev = dev;
 			DRX(("resv_and_put "));
 			skb_put(copy_skb, len);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 
 			/* Reuse original ring buffer. */
 			DRX(("reuse "));
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 349b96a3ec4..a8d7ff2c96a 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -2289,7 +2289,6 @@ static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do)
 			 * without copying to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 			    && (skb = dev_alloc_skb(pkt_len + RX_OFFSET)) != NULL) {
-				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve(skb, RX_OFFSET);
 				pci_dma_sync_single_for_cpu(np->pci_dev,
diff --git a/drivers/net/netx-eth.c b/drivers/net/netx-eth.c
index a53644f6a29..2b8da0a5499 100644
--- a/drivers/net/netx-eth.c
+++ b/drivers/net/netx-eth.c
@@ -168,7 +168,6 @@ static void netx_eth_receive(struct net_device *ndev)
 		FIFO_PTR_SEGMENT(seg) | FIFO_PTR_FRAMENO(frameno));
 
 	ndev->last_rx = jiffies;
-	skb->dev = ndev;
 	skb->protocol = eth_type_trans(skb, ndev);
 	netif_rx(skb);
 	priv->stats.rx_packets++;
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index 6537574a9cd..0fba8f19076 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -35,6 +35,8 @@
 #include "netxen_nic_hw.h"
 #include "netxen_nic_phan_reg.h"
 
+#include <net/ip.h>
+
 /*  PCI Windowing for DDR regions.  */
 
 #define ADDR_IN_RANGE(addr, low, high)	\
@@ -371,22 +373,21 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 		      struct cmd_desc_type0 *desc, struct sk_buff *skb)
 {
 	if (desc->mss) {
-		desc->total_hdr_length = sizeof(struct ethhdr) +
-		    ((skb->nh.iph)->ihl * sizeof(u32)) +
-		    ((skb->h.th)->doff * sizeof(u32));
+		desc->total_hdr_length = (sizeof(struct ethhdr) +
+					  ip_hdrlen(skb) + tcp_hdrlen(skb));
 		netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO);
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		if (skb->nh.iph->protocol == IPPROTO_TCP) {
+		if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
 			netxen_set_cmd_desc_opcode(desc, TX_TCP_PKT);
-		} else if (skb->nh.iph->protocol == IPPROTO_UDP) {
+		} else if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
 			netxen_set_cmd_desc_opcode(desc, TX_UDP_PKT);
 		} else {
 			return;
 		}
 	}
 	adapter->stats.xmitcsummed++;
-	desc->tcp_hdr_offset = skb->h.raw - skb->data;
-	desc->ip_hdr_offset = skb->nh.raw - skb->data;
+	desc->tcp_hdr_offset = skb_transport_offset(skb);
+	desc->ip_hdr_offset = skb_network_offset(skb);
 }
 
 int netxen_is_flash_supported(struct netxen_adapter *adapter)
diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c
index eff965dc5ff..5cd40562da7 100644
--- a/drivers/net/netxen/netxen_nic_init.c
+++ b/drivers/net/netxen/netxen_nic_init.c
@@ -1129,7 +1129,6 @@ netxen_process_rcv(struct netxen_adapter *adapter, int ctxid,
 		port->stats.csummed++;
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-	skb->dev = netdev;
 	if (desc_ctx == RCV_DESC_LRO_CTXID) {
 		/* True length was only available on the last pkt */
 		skb_put(skb, buffer->lro_length);
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index 7d2525e76ab..ab25c225a07 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -41,6 +41,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/vmalloc.h>
+#include <net/ip.h>
 
 MODULE_DESCRIPTION("NetXen Multi port (1/10) Gigabit Network Driver");
 MODULE_LICENSE("GPL");
@@ -778,9 +779,8 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		if (skb_shinfo(skb)->gso_size > 0) {
 
 			no_of_desc++;
-			if (((skb->nh.iph)->ihl * sizeof(u32)) +
-			    ((skb->h.th)->doff * sizeof(u32)) +
-			    sizeof(struct ethhdr) >
+			if ((ip_hdrlen(skb) + tcp_hdrlen(skb) +
+			     sizeof(struct ethhdr)) >
 			    (sizeof(struct cmd_desc_type0) - 2)) {
 				no_of_desc++;
 			}
@@ -920,8 +920,10 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 			/* copy the next 64 bytes - should be enough except
 			 * for pathological case
 			 */
-			memcpy((void *)hwdesc, (void *)(skb->data) +
-			       first_hdr_len, hdr_len - first_hdr_len);
+			skb_copy_from_linear_data_offset(skb, first_hdr_len,
+							 hwdesc,
+							 (hdr_len -
+							  first_hdr_len));
 			producer = get_next_index(producer, max_tx_desc_count);
 		}
 	}
diff --git a/drivers/net/ni5010.c b/drivers/net/ni5010.c
index 8be0d030d6f..3d5b4232f65 100644
--- a/drivers/net/ni5010.c
+++ b/drivers/net/ni5010.c
@@ -562,7 +562,6 @@ static void ni5010_rx(struct net_device *dev)
 		return;
 	}
 
-	skb->dev = dev;
 	skb_reserve(skb, 2);
 
 	/* Read packet into buffer */
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index a6f4b24b017..8dbd6d1900b 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -934,7 +934,6 @@ static void ni52_rcv_int(struct net_device *dev)
 					skb = (struct sk_buff *) dev_alloc_skb(totlen+2);
 					if(skb != NULL)
 					{
-						skb->dev = dev;
 						skb_reserve(skb,2);
 						skb_put(skb,totlen);
 						eth_copy_and_sum(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen,0);
@@ -1183,7 +1182,7 @@ static int ni52_send_packet(struct sk_buff *skb, struct net_device *dev)
 	else
 #endif
 	{
-		memcpy((char *)p->xmit_cbuffs[p->xmit_count],(char *)(skb->data),skb->len);
+		skb_copy_from_linear_data(skb, (char *) p->xmit_cbuffs[p->xmit_count], skb->len);
 		len = skb->len;
 		if (len < ETH_ZLEN) {
 			len = ETH_ZLEN;
diff --git a/drivers/net/ni65.c b/drivers/net/ni65.c
index 1578f4d9849..3818edf0ac1 100644
--- a/drivers/net/ni65.c
+++ b/drivers/net/ni65.c
@@ -610,7 +610,6 @@ static void *ni65_alloc_mem(struct net_device *dev,char *what,int size,int type)
 			printk(KERN_WARNING "%s: unable to allocate %s memory.\n",dev->name,what);
 			return NULL;
 		}
-		skb->dev = dev;
 		skb_reserve(skb,2+16);
 		skb_put(skb,R_BUF_SIZE);	 /* grab the whole space .. (not necessary) */
 		ptr = skb->data;
@@ -1094,7 +1093,6 @@ static void ni65_recv_intr(struct net_device *dev,int csr0)
 			if(skb)
 			{
 				skb_reserve(skb,2);
-	skb->dev = dev;
 #ifdef RCV_VIA_SKB
 				if( (unsigned long) (skb->data + R_BUF_SIZE) > 0x1000000) {
 					skb_put(skb,len);
@@ -1178,8 +1176,9 @@ static int ni65_send_packet(struct sk_buff *skb, struct net_device *dev)
 		if( (unsigned long) (skb->data + skb->len) > 0x1000000) {
 #endif
 
-			memcpy((char *) p->tmdbounce[p->tmdbouncenum] ,(char *)skb->data,
-							 (skb->len > T_BUF_SIZE) ? T_BUF_SIZE : skb->len);
+			skb_copy_from_linear_data(skb, p->tmdbounce[p->tmdbouncenum],
+				      skb->len > T_BUF_SIZE ? T_BUF_SIZE :
+							      skb->len);
 			if (len > skb->len)
 				memset((char *)p->tmdbounce[p->tmdbouncenum]+skb->len, 0, len-skb->len);
 			dev_kfree_skb (skb);
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 9ec6e9e54f4..6a32338623f 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -607,7 +607,6 @@ static inline int rx_refill(struct net_device *ndev, gfp_t gfp)
 		res &= 0xf;
 		skb_reserve(skb, res);
 
-		skb->dev = ndev;
 		if (gfp != GFP_ATOMIC)
 			spin_lock_irqsave(&dev->rx_info.lock, flags);
 		res = ns83820_add_rx_skb(dev, skb);
@@ -1157,9 +1156,9 @@ again:
 	extsts = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		extsts |= EXTSTS_IPPKT;
-		if (IPPROTO_TCP == skb->nh.iph->protocol)
+		if (IPPROTO_TCP == ip_hdr(skb)->protocol)
 			extsts |= EXTSTS_TCPPKT;
-		else if (IPPROTO_UDP == skb->nh.iph->protocol)
+		else if (IPPROTO_UDP == ip_hdr(skb)->protocol)
 			extsts |= EXTSTS_UDPPKT;
 	}
 
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index d670ac74824..76fe9dd8e84 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -334,8 +334,6 @@ static void pasemi_mac_replenish_rx_ring(struct net_device *dev)
 			break;
 		}
 
-		skb->dev = dev;
-
 		dma = pci_map_single(mac->dma_pdev, skb->data, skb->len,
 				     PCI_DMA_FROMDEVICE);
 
@@ -731,16 +729,18 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 	dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_SS | XCT_MACTX_CRC_PAD;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		switch (skb->nh.iph->protocol) {
+		const unsigned char *nh = skb_network_header(skb);
+
+		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_TCP:
 			dflags |= XCT_MACTX_CSUM_TCP;
-			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
-			dflags |= XCT_MACTX_IPO(skb->nh.raw - skb->data);
+			dflags |= XCT_MACTX_IPH(skb_network_header_len(skb) >> 2);
+			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		case IPPROTO_UDP:
 			dflags |= XCT_MACTX_CSUM_UDP;
-			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
-			dflags |= XCT_MACTX_IPO(skb->nh.raw - skb->data);
+			dflags |= XCT_MACTX_IPH(skb_network_header_len(skb) >> 2);
+			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		}
 	}
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index 6ca4e4fa6b8..df8998b4f37 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -1344,7 +1344,7 @@ static int netdrv_start_xmit (struct sk_buff *skb, struct net_device *dev)
 
 	tp->tx_info[entry].skb = skb;
 	/* tp->tx_info[entry].mapping = 0; */
-	memcpy (tp->tx_buf[entry], skb->data, skb->len);
+	skb_copy_from_linear_data(skb, tp->tx_buf[entry], skb->len);
 
 	/* Note: the chip doesn't have auto-pad! */
 	NETDRV_W32 (TxStatus0 + (entry * sizeof(u32)),
@@ -1565,7 +1565,6 @@ static void netdrv_rx_interrupt (struct net_device *dev,
 
 		skb = dev_alloc_skb (pkt_size + 2);
 		if (skb) {
-			skb->dev = dev;
 			skb_reserve (skb, 2);	/* 16 byte align the IP fields. */
 
 			eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index c7bd9c1c7f3..2b395ee21f7 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -1056,7 +1056,6 @@ static int el3_rx(struct net_device *dev, int worklimit)
 			DEBUG(3, "  Receiving packet size %d status %4.4x.\n",
 				  pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				insl(ioaddr+RX_FIFO, skb_put(skb, pkt_len),
 						((pkt_len+3)>>2));
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 461e8274ef6..143ae2ff309 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -883,7 +883,6 @@ static int el3_rx(struct net_device *dev)
 	    DEBUG(3, "    Receiving packet size %d status %4.4x.\n",
 		  pkt_len, rx_status);
 	    if (skb != NULL) {
-		skb->dev = dev;
 		skb_reserve(skb, 2);
 		insl(ioaddr+RX_FIFO, skb_put(skb, pkt_len),
 			(pkt_len+3)>>2);
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 6139048f811..808fae1577e 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -1136,7 +1136,7 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		ei_block_output(dev, length, skb->data, output_page);
 	else {
 		memset(packet, 0, ETH_ZLEN);
-		memcpy(packet, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, packet, skb->len);
 		ei_block_output(dev, length, packet, output_page);
 	}
 	
@@ -1496,7 +1496,6 @@ static void ei_receive(struct net_device *dev)
 			else
 			{
 				skb_reserve(skb,2);	/* IP headers on 16 byte boundaries */
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame));
 				skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 0d7de617e53..3f93d493323 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -999,7 +999,6 @@ static void fjn_rx(struct net_device *dev)
 		lp->stats.rx_dropped++;
 		break;
 	    }
-	    skb->dev = dev;
 
 	    skb_reserve(skb, 2);
 	    insw(ioaddr + DATAPORT, skb_put(skb, pkt_len),
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 3b707747a81..73da611fd53 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -1182,12 +1182,10 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt)
       skb = dev_alloc_skb(pkt_len+2);
 
       if (skb != NULL) {
-	skb->dev = dev;
-
 	skb_reserve(skb, 2);
 	insw(ioaddr + AM2150_RCV, skb_put(skb, pkt_len), pkt_len>>1);
 	if (pkt_len & 1)
-	    *(skb->tail-1) = inb(ioaddr + AM2150_RCV);
+	    *(skb_tail_pointer(skb) - 1) = inb(ioaddr + AM2150_RCV);
 	skb->protocol = eth_type_trans(skb, dev);
 	
 	netif_rx(skb); /* Send the packet to the upper (protocol) layers. */
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 2561f76033e..7912dbd1425 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -1669,7 +1669,6 @@ static void smc_rx(struct net_device *dev)
 	     (packet_length+1)>>1);
 	skb->protocol = eth_type_trans(skb, dev);
 	
-	skb->dev = dev;
 	netif_rx(skb);
 	dev->last_rx = jiffies;
 	smc->stats.rx_packets++;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 5879e7c3698..809ec440b8e 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -1226,7 +1226,6 @@ xirc2ps_interrupt(int irq, void *dev_id)
 			    (pktlen+1)>>1);
 		}
 		skb->protocol = eth_type_trans(skb, dev);
-		skb->dev = dev;
 		netif_rx(skb);
 		dev->last_rx = jiffies;
 		lp->stats.rx_packets++;
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 4d94ba7899b..0791360a6a6 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -1206,7 +1206,6 @@ static void pcnet32_rx_entry(struct net_device *dev,
 					 PCI_DMA_FROMDEVICE);
 			skb_put(skb, pkt_len);
 			lp->rx_skbuff[entry] = newskb;
-			newskb->dev = dev;
 			lp->rx_dma_addr[entry] =
 					    pci_map_single(lp->pci_dev,
 							   newskb->data,
diff --git a/drivers/net/plip.c b/drivers/net/plip.c
index 6bb085f5443..8754cf3356b 100644
--- a/drivers/net/plip.c
+++ b/drivers/net/plip.c
@@ -546,7 +546,7 @@ static __be16 plip_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb,dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index ef58e412878..6d596ca50cf 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -88,8 +88,6 @@ struct ppp_file {
 #define PF_TO_PPP(pf)		PF_TO_X(pf, struct ppp)
 #define PF_TO_CHANNEL(pf)	PF_TO_X(pf, struct channel)
 
-#define ROUNDUP(n, x)		(((n) + (x) - 1) / (x))
-
 /*
  * Data structure describing one ppp unit.
  * A ppp unit corresponds to a ppp network interface device
@@ -1297,7 +1295,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 	 */
 	fragsize = len;
 	if (nfree > 1)
-		fragsize = ROUNDUP(fragsize, nfree);
+		fragsize = DIV_ROUND_UP(fragsize, nfree);
 	/* nbigger channels get fragsize bytes, the rest get fragsize-1,
 	   except if nbigger==0, then they all get fragsize. */
 	nbigger = len % nfree;
@@ -1685,7 +1683,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 			skb_pull_rcsum(skb, 2);
 			skb->dev = ppp->dev;
 			skb->protocol = htons(npindex_to_ethertype[npi]);
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 			netif_rx(skb);
 			ppp->dev->last_rx = jiffies;
 		}
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index b6f0e9a25e2..5918fab3834 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -594,7 +594,8 @@ ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb)
 				return NULL;
 			}
 			skb_reserve(npkt,2);
-			memcpy(skb_put(npkt,skb->len), skb->data, skb->len);
+			skb_copy_from_linear_data(skb,
+				      skb_put(npkt, skb->len), skb->len);
 			kfree_skb(skb);
 			skb = npkt;
 		}
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index ebfa2967cd6..6f98834e6ac 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -207,7 +207,7 @@ static inline struct pppox_sock *get_item(unsigned long sid,
 
 static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp)
 {
-	struct net_device *dev = NULL;
+	struct net_device *dev;
 	int ifindex;
 
 	dev = dev_get_by_name(sp->sa_addr.pppoe.dev);
@@ -218,20 +218,6 @@ static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp)
 	return get_item(sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex);
 }
 
-static inline int set_item(struct pppox_sock *po)
-{
-	int i;
-
-	if (!po)
-		return -EINVAL;
-
-	write_lock_bh(&pppoe_hash_lock);
-	i = __set_item(po);
-	write_unlock_bh(&pppoe_hash_lock);
-
-	return i;
-}
-
 static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int ifindex)
 {
 	struct pppox_sock *ret;
@@ -255,54 +241,53 @@ static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int
 static void pppoe_flush_dev(struct net_device *dev)
 {
 	int hash;
-
 	BUG_ON(dev == NULL);
 
-	read_lock_bh(&pppoe_hash_lock);
+	write_lock_bh(&pppoe_hash_lock);
 	for (hash = 0; hash < PPPOE_HASH_SIZE; hash++) {
 		struct pppox_sock *po = item_hash_table[hash];
 
 		while (po != NULL) {
-			if (po->pppoe_dev == dev) {
-				struct sock *sk = sk_pppox(po);
-
-				sock_hold(sk);
-				po->pppoe_dev = NULL;
+			struct sock *sk = sk_pppox(po);
+			if (po->pppoe_dev != dev) {
+				po = po->next;
+				continue;
+			}
+			po->pppoe_dev = NULL;
+			dev_put(dev);
 
-				/* We hold a reference to SK, now drop the
-				 * hash table lock so that we may attempt
-				 * to lock the socket (which can sleep).
-				 */
-				read_unlock_bh(&pppoe_hash_lock);
 
-				lock_sock(sk);
+			/* We always grab the socket lock, followed by the
+			 * pppoe_hash_lock, in that order.  Since we should
+			 * hold the sock lock while doing any unbinding,
+			 * we need to release the lock we're holding.
+			 * Hold a reference to the sock so it doesn't disappear
+			 * as we're jumping between locks.
+			 */
 
-				if (sk->sk_state &
-				    (PPPOX_CONNECTED | PPPOX_BOUND)) {
-					pppox_unbind_sock(sk);
-					dev_put(dev);
-					sk->sk_state = PPPOX_ZOMBIE;
-					sk->sk_state_change(sk);
-				}
+			sock_hold(sk);
 
-				release_sock(sk);
+			write_unlock_bh(&pppoe_hash_lock);
+			lock_sock(sk);
 
-				sock_put(sk);
+			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+				pppox_unbind_sock(sk);
+				sk->sk_state = PPPOX_ZOMBIE;
+				sk->sk_state_change(sk);
+			}
 
-				read_lock_bh(&pppoe_hash_lock);
+			release_sock(sk);
+			sock_put(sk);
 
-				/* Now restart from the beginning of this
-				 * hash chain.  We always NULL out pppoe_dev
-				 * so we are guaranteed to make forward
-				 * progress.
-				 */
-				po = item_hash_table[hash];
-				continue;
-			}
-			po = po->next;
+			/* Restart scan at the beginning of this hash chain.
+			 * While the lock was dropped the chain contents may
+			 * have changed.
+			 */
+			write_lock_bh(&pppoe_hash_lock);
+			po = item_hash_table[hash];
 		}
 	}
-	read_unlock_bh(&pppoe_hash_lock);
+	write_unlock_bh(&pppoe_hash_lock);
 }
 
 static int pppoe_device_event(struct notifier_block *this,
@@ -344,10 +329,10 @@ static struct notifier_block pppoe_notifier = {
 static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 {
 	struct pppox_sock *po = pppox_sk(sk);
-	struct pppox_sock *relay_po = NULL;
+	struct pppox_sock *relay_po;
 
 	if (sk->sk_state & PPPOX_BOUND) {
-		struct pppoe_hdr *ph = (struct pppoe_hdr *) skb->nh.raw;
+		struct pppoe_hdr *ph = pppoe_hdr(skb);
 		int len = ntohs(ph->length);
 		skb_pull_rcsum(skb, sizeof(struct pppoe_hdr));
 		if (pskb_trim_rcsum(skb, len))
@@ -401,7 +386,7 @@ static int pppoe_rcv(struct sk_buff *skb,
 	if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
 		goto out;
 
-	ph = (struct pppoe_hdr *) skb->nh.raw;
+	ph = pppoe_hdr(skb);
 
 	po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
 	if (po != NULL)
@@ -433,7 +418,7 @@ static int pppoe_disc_rcv(struct sk_buff *skb,
 	if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
 		goto out;
 
-	ph = (struct pppoe_hdr *) skb->nh.raw;
+	ph = pppoe_hdr(skb);
 	if (ph->code != PADT_CODE)
 		goto abort;
 
@@ -514,36 +499,49 @@ static int pppoe_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 	struct pppox_sock *po;
-	int error = 0;
 
 	if (!sk)
 		return 0;
 
-	if (sock_flag(sk, SOCK_DEAD))
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD)){
+		release_sock(sk);
 		return -EBADF;
+	}
 
 	pppox_unbind_sock(sk);
 
 	/* Signal the death of the socket. */
 	sk->sk_state = PPPOX_DEAD;
 
+
+	/* Write lock on hash lock protects the entire "po" struct from
+	 * concurrent updates via pppoe_flush_dev. The "po" struct should
+	 * be considered part of the hash table contents, thus protected
+	 * by the hash table lock */
+	write_lock_bh(&pppoe_hash_lock);
+
 	po = pppox_sk(sk);
 	if (po->pppoe_pa.sid) {
-		delete_item(po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex);
+		__delete_item(po->pppoe_pa.sid,
+			      po->pppoe_pa.remote, po->pppoe_ifindex);
 	}
 
-	if (po->pppoe_dev)
+	if (po->pppoe_dev) {
 		dev_put(po->pppoe_dev);
+		po->pppoe_dev = NULL;
+	}
 
-	po->pppoe_dev = NULL;
+	write_unlock_bh(&pppoe_hash_lock);
 
 	sock_orphan(sk);
 	sock->sk = NULL;
 
 	skb_queue_purge(&sk->sk_receive_queue);
+	release_sock(sk);
 	sock_put(sk);
 
-	return error;
+	return 0;
 }
 
 
@@ -599,14 +597,18 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 		po->pppoe_dev = dev;
 		po->pppoe_ifindex = dev->ifindex;
 
-		if (!(dev->flags & IFF_UP))
+		write_lock_bh(&pppoe_hash_lock);
+		if (!(dev->flags & IFF_UP)){
+			write_unlock_bh(&pppoe_hash_lock);
 			goto err_put;
+		}
 
 		memcpy(&po->pppoe_pa,
 		       &sp->sa_addr.pppoe,
 		       sizeof(struct pppoe_addr));
 
-		error = set_item(po);
+		error = __set_item(po);
+		write_unlock_bh(&pppoe_hash_lock);
 		if (error < 0)
 			goto err_put;
 
@@ -762,10 +764,10 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
 static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock,
 		  struct msghdr *m, size_t total_len)
 {
-	struct sk_buff *skb = NULL;
+	struct sk_buff *skb;
 	struct sock *sk = sock->sk;
 	struct pppox_sock *po = pppox_sk(sk);
-	int error = 0;
+	int error;
 	struct pppoe_hdr hdr;
 	struct pppoe_hdr *ph;
 	struct net_device *dev;
@@ -799,7 +801,7 @@ static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	/* Reserve space for headers. */
 	skb_reserve(skb, dev->hard_header_len);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	skb->dev = dev;
 
@@ -869,7 +871,8 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
 			goto abort;
 
 		skb_reserve(skb2, dev->hard_header_len + sizeof(struct pppoe_hdr));
-		memcpy(skb_put(skb2, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(skb2, skb->len),
+					  skb->len);
 	} else {
 		/* Make a clone so as to not disturb the original skb,
 		 * give dev_queue_xmit something it can free.
@@ -884,7 +887,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
 	memcpy(ph, &hdr, sizeof(struct pppoe_hdr));
 	skb2->protocol = __constant_htons(ETH_P_PPP_SES);
 
-	skb2->nh.raw = skb2->data;
+	skb_reset_network_header(skb2);
 
 	skb2->dev = dev;
 
@@ -929,10 +932,8 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 		  struct msghdr *m, size_t total_len, int flags)
 {
 	struct sock *sk = sock->sk;
-	struct sk_buff *skb = NULL;
+	struct sk_buff *skb;
 	int error = 0;
-	int len;
-	struct pppoe_hdr *ph = NULL;
 
 	if (sk->sk_state & PPPOX_BOUND) {
 		error = -EIO;
@@ -942,26 +943,21 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
 				flags & MSG_DONTWAIT, &error);
 
-	if (error < 0) {
+	if (error < 0)
 		goto end;
-	}
 
 	m->msg_namelen = 0;
 
 	if (skb) {
-		error = 0;
-		ph = (struct pppoe_hdr *) skb->nh.raw;
-		len = ntohs(ph->length);
+		struct pppoe_hdr *ph = pppoe_hdr(skb);
+		const int len = ntohs(ph->length);
 
 		error = memcpy_toiovec(m->msg_iov, (unsigned char *) &ph->tag[0], len);
-		if (error < 0)
-			goto do_skb_free;
-		error = len;
+		if (error == 0)
+			error = len;
 	}
 
-do_skb_free:
-	if (skb)
-		kfree_skb(skb);
+	kfree_skb(skb);
 end:
 	return error;
 }
@@ -991,7 +987,7 @@ out:
 
 static __inline__ struct pppox_sock *pppoe_get_idx(loff_t pos)
 {
-	struct pppox_sock *po = NULL;
+	struct pppox_sock *po;
 	int i = 0;
 
 	for (; i < PPPOE_HASH_SIZE; i++) {
diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c
index 9315046b3f5..3f8115db4d5 100644
--- a/drivers/net/pppox.c
+++ b/drivers/net/pppox.c
@@ -58,7 +58,7 @@ void pppox_unbind_sock(struct sock *sk)
 {
 	/* Clear connection to ppp device, if attached. */
 
-	if (sk->sk_state & (PPPOX_BOUND | PPPOX_ZOMBIE)) {
+	if (sk->sk_state & (PPPOX_BOUND | PPPOX_CONNECTED | PPPOX_ZOMBIE)) {
 		ppp_unregister_channel(&pppox_sk(sk)->chan);
 		sk->sk_state = PPPOX_DEAD;
 	}
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index a8246eb2f8d..7b80fb7a9d9 100755
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -1873,7 +1873,6 @@ static void ql_process_mac_rx_intr(struct ql3_adapter *qdev,
 			 pci_unmap_len(lrg_buf_cb2, maplen),
 			 PCI_DMA_FROMDEVICE);
 	prefetch(skb->data);
-	skb->dev = qdev->ndev;
 	skb->ip_summed = CHECKSUM_NONE;
 	skb->protocol = eth_type_trans(skb, qdev->ndev);
 
@@ -1928,7 +1927,8 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev,
 		 * Copy the ethhdr from first buffer to second. This
 		 * is necessary for 3022 IP completions.
 		 */
-		memcpy(skb_push(skb2, size), skb1->data + VLAN_ID_LEN, size);
+		skb_copy_from_linear_data_offset(skb1, VLAN_ID_LEN,
+						 skb_push(skb2, size), size);
 	} else {
 		u16 checksum = le16_to_cpu(ib_ip_rsp_ptr->checksum);
 		if (checksum & 
@@ -1946,7 +1946,6 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev,
 			skb2->ip_summed = CHECKSUM_UNNECESSARY;
 		}
 	}
-	skb2->dev = qdev->ndev;
 	skb2->protocol = eth_type_trans(skb2, qdev->ndev);
 
 	netif_receive_skb(skb2);
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 6a77b8a9224..45876a854f0 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2284,7 +2284,7 @@ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev)
 			return LargeSend | ((mss & MSSMask) << MSSShift);
 	}
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		const struct iphdr *ip = skb->nh.iph;
+		const struct iphdr *ip = ip_hdr(skb);
 
 		if (ip->protocol == IPPROTO_TCP)
 			return IPCS | TCPCS;
@@ -2586,7 +2586,6 @@ rtl8169_rx_interrupt(struct net_device *dev, struct rtl8169_private *tp,
 			pci_action(tp->pci_dev, le64_to_cpu(desc->addr),
 				   tp->rx_buf_sz, PCI_DMA_FROMDEVICE);
 
-			skb->dev = dev;
 			skb_put(skb, pkt_size);
 			skb->protocol = eth_type_trans(skb, dev);
 
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index b7ff484af3e..df6b73872fd 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -115,7 +115,6 @@ static int rionet_rx_clean(struct net_device *ndev)
 
 		rnet->rx_skb[i]->data = data;
 		skb_put(rnet->rx_skb[i], RIO_MAX_MSG_SIZE);
-		rnet->rx_skb[i]->dev = ndev;
 		rnet->rx_skb[i]->protocol =
 		    eth_type_trans(rnet->rx_skb[i], ndev);
 		error = netif_rx(rnet->rx_skb[i]);
diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c
index d81536f90df..25c73d47daa 100644
--- a/drivers/net/rrunner.c
+++ b/drivers/net/rrunner.c
@@ -1029,7 +1029,6 @@ static void rx_int(struct net_device *dev, u32 rxlimit, u32 index)
 					goto defer;
 				}
 			}
-			skb->dev = dev;
 			skb->protocol = hippi_type_trans(skb, dev);
 
 			netif_rx(skb);		/* send it up */
@@ -1452,7 +1451,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 		skb_reserve(new_skb, 8);
 		skb_put(new_skb, len);
-		memcpy(new_skb->data, skb->data, len);
+		skb_copy_from_linear_data(skb, new_skb->data, len);
 		dev_kfree_skb(skb);
 		skb = new_skb;
 	}
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 46ebf141ee5..600d3ff347f 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -2195,7 +2195,7 @@ static int fill_rxd_3buf(struct s2io_nic *nic, struct RxD_t *rxdp, struct \
 	frag_list->next = NULL;
 	tmp = (void *)ALIGN((long)frag_list->data, ALIGN_SIZE + 1);
 	frag_list->data = tmp;
-	frag_list->tail = tmp;
+	skb_reset_tail_pointer(frag_list);
 
 	/* Buffer-2 receives L4 data payload */
 	((struct RxD3*)rxdp)->Buffer2_ptr = pci_map_single(nic->pdev,
@@ -2349,7 +2349,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
 			tmp += ALIGN_SIZE;
 			tmp &= ~ALIGN_SIZE;
 			skb->data = (void *) (unsigned long)tmp;
-			skb->tail = (void *) (unsigned long)tmp;
+			skb_reset_tail_pointer(skb);
 
 			if (!(((struct RxD3*)rxdp)->Buffer0_ptr))
 				((struct RxD3*)rxdp)->Buffer0_ptr =
diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c
index 143958f1ef0..ad94358ece8 100644
--- a/drivers/net/saa9730.c
+++ b/drivers/net/saa9730.c
@@ -688,7 +688,6 @@ static int lan_saa9730_rx(struct net_device *dev)
 			} else {
 				lp->stats.rx_bytes += len;
 				lp->stats.rx_packets++;
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align */
 				skb_put(skb, len);	/* make room */
 				eth_copy_and_sum(skb,
diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c
index b9fa4fbb139..1de3eec1a79 100644
--- a/drivers/net/sb1000.c
+++ b/drivers/net/sb1000.c
@@ -834,7 +834,7 @@ printk("cm0: IP identification: %02x%02x  fragment offset: %02x%02x\n", buffer[3
 			goto dropped_frame;
 		}
 		skb->dev = dev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = (unsigned short) buffer[NewDatagramHeaderSkip + 16];
 		insw(ioaddr, skb_put(skb, NewDatagramDataSize),
 			NewDatagramDataSize / 2);
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 103c3174ab5..0a3a379b634 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -933,9 +933,6 @@ static int sbdma_add_rcvbuffer(sbmacdma_t *d,struct sk_buff *sb)
 		}
 
 		sbdma_align_skb(sb_new, SMP_CACHE_BYTES, ETHER_ALIGN);
-
-		/* mark skbuff owned by our device */
-		sb_new->dev = d->sbdma_eth->sbm_dev;
 	}
 	else {
 		sb_new = sb;
diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
index c32c21af3fd..5b7284c955d 100644
--- a/drivers/net/sc92031.c
+++ b/drivers/net/sc92031.c
@@ -814,7 +814,6 @@ static void _sc92031_rx_tasklet(struct net_device *dev)
 			memcpy(skb_put(skb, pkt_size), rx_ring + rx_ring_offset, pkt_size);
 		}
 
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		dev->last_rx = jiffies;
 		netif_rx(skb);
diff --git a/drivers/net/seeq8005.c b/drivers/net/seeq8005.c
index 0d6c95c7aed..4bce7c4f373 100644
--- a/drivers/net/seeq8005.c
+++ b/drivers/net/seeq8005.c
@@ -550,7 +550,6 @@ static void seeq8005_rx(struct net_device *dev)
 				lp->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* align data on 16 byte */
 			buf = skb_put(skb,pkt_len);
 
diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c
index 52ed522a234..d8c9c5d66d4 100644
--- a/drivers/net/sgiseeq.c
+++ b/drivers/net/sgiseeq.c
@@ -318,7 +318,6 @@ static inline void sgiseeq_rx(struct net_device *dev, struct sgiseeq_private *sp
 			skb = dev_alloc_skb(len + 2);
 
 			if (skb) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				skb_put(skb, len);
 
@@ -535,7 +534,7 @@ static int sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *    entry and the HPC got to the end of the chain before we
 	 *    added this new entry and restarted it.
 	 */
-	memcpy((char *)(long)td->buf_vaddr, skb->data, skblen);
+	skb_copy_from_linear_data(skb, (char *)(long)td->buf_vaddr, skblen);
 	if (len != skblen)
 		memset((char *)(long)td->buf_vaddr + skb->len, 0, len-skblen);
 	td->tdma.cntinfo = (len & HPCDMA_BCNT) |
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index 34463ce6f13..bc8de48da31 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -632,7 +632,6 @@ static int sis190_rx_interrupt(struct net_device *dev,
 			pci_action(tp->pci_dev, le32_to_cpu(desc->addr),
 				   tp->rx_buf_sz, PCI_DMA_FROMDEVICE);
 
-			skb->dev = dev;
 			skb_put(skb, pkt_size);
 			skb->protocol = eth_type_trans(skb, dev);
 
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index b2a3b19d773..dea0126723d 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -1160,7 +1160,6 @@ sis900_init_rx_ring(struct net_device *net_dev)
 			   buffer */
 			break;
 		}
-		skb->dev = net_dev;
 		sis_priv->rx_skbuff[i] = skb;
 		sis_priv->rx_ring[i].cmdsts = RX_BUF_SIZE;
                 sis_priv->rx_ring[i].bufptr = pci_map_single(sis_priv->pci_dev,
@@ -1800,7 +1799,6 @@ static int sis900_rx(struct net_device *net_dev)
 			sis_priv->stats.rx_packets++;
 			sis_priv->dirty_rx++;
 refill_rx_ring:
-			skb->dev = net_dev;
 			sis_priv->rx_skbuff[entry] = skb;
 			sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
                 	sis_priv->rx_ring[entry].bufptr =
@@ -1832,7 +1830,6 @@ refill_rx_ring:
 				sis_priv->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = net_dev;
 			sis_priv->rx_skbuff[entry] = skb;
 			sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
                 	sis_priv->rx_ring[entry].bufptr =
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index e94ab256b54..e0a93005e6d 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -1562,10 +1562,10 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 	pTxd->pMBuf     = pMessage;
 
 	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
-		u16 hdrlen = pMessage->h.raw - pMessage->data;
+		u16 hdrlen = skb_transport_offset(pMessage);
 		u16 offset = hdrlen + pMessage->csum_offset;
 
-		if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) &&
+		if ((ipip_hdr(pMessage)->protocol == IPPROTO_UDP) &&
 			(pAC->GIni.GIChipRev == 0) &&
 			(pAC->GIni.GIChipId == CHIP_ID_YUKON)) {
 			pTxd->TBControl = BMU_TCP_CHECK;
@@ -1681,7 +1681,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 	** Does the HW need to evaluate checksum for TCP or UDP packets? 
 	*/
 	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
-		u16 hdrlen = pMessage->h.raw - pMessage->data;
+		u16 hdrlen = skb_transport_offset(pMessage);
 		u16 offset = hdrlen + pMessage->csum_offset;
 
 		Control = BMU_STFWD;
@@ -1691,7 +1691,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 		** opcode for udp is not working in the hardware yet 
 		** (Revision 2.0)
 		*/
-		if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) &&
+		if ((ipip_hdr(pMessage)->protocol == IPPROTO_UDP) &&
 			(pAC->GIni.GIChipRev == 0) &&
 			(pAC->GIni.GIChipId == CHIP_ID_YUKON)) {
 			Control |= BMU_TCP_CHECK;
@@ -2127,7 +2127,7 @@ rx_start:
 						    (dma_addr_t) PhysAddr,
 						    FrameLength,
 						    PCI_DMA_FROMDEVICE);
-			memcpy(pNewMsg->data, pMsg, FrameLength);
+			skb_copy_to_linear_data(pNewMsg, pMsg, FrameLength);
 
 			pci_dma_sync_single_for_device(pAC->PciDev,
 						       (dma_addr_t) PhysAddr,
@@ -2193,7 +2193,6 @@ rx_start:
 				SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC,
 					FrameLength, pRxPort->PortIndex);
 
-				pMsg->dev = pAC->dev[pRxPort->PortIndex];
 				pMsg->protocol = eth_type_trans(pMsg,
 					pAC->dev[pRxPort->PortIndex]);
 				netif_rx(pMsg);
@@ -2246,7 +2245,6 @@ rx_start:
 				(IFF_PROMISC | IFF_ALLMULTI)) != 0 ||
 				(ForRlmt & SK_RLMT_RX_PROTOCOL) ==
 				SK_RLMT_RX_PROTOCOL) {
-				pMsg->dev = pAC->dev[pRxPort->PortIndex];
 				pMsg->protocol = eth_type_trans(pMsg,
 					pAC->dev[pRxPort->PortIndex]);
 				netif_rx(pMsg);
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 9733a11c614..a7ef6c8b772 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -1680,7 +1680,6 @@ void mac_drv_rx_complete(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
 	rxd->rxd_os.skb = NULL;
 	skb_trim(skb, len);
 	skb->protocol = fddi_type_trans(skb, bp->dev);
-	skb->dev = bp->dev;	/* pass up device pointer */
 
 	netif_rx(skb);
 	bp->dev->last_rx = jiffies;
@@ -1938,7 +1937,7 @@ int mac_drv_rx_init(struct s_smc *smc, int len, int fc,
 	}
 	skb_reserve(skb, 3);
 	skb_put(skb, len);
-	memcpy(skb->data, look_ahead, len);
+	skb_copy_to_linear_data(skb, look_ahead, len);
 
 	// deliver frame to system
 	skb->protocol = fddi_type_trans(skb, smc->os.dev);
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index d476a3cc2e9..f1a0e6c0fbd 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2654,12 +2654,12 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	td->dma_hi = map >> 32;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		int offset = skb->h.raw - skb->data;
+		const int offset = skb_transport_offset(skb);
 
 		/* This seems backwards, but it is what the sk98lin
 		 * does.  Looks like hardware is wrong?
 		 */
-		if (skb->h.ipiph->protocol == IPPROTO_UDP
+		if (ipip_hdr(skb)->protocol == IPPROTO_UDP
 	            && hw->chip_rev == 0 && hw->chip_id == CHIP_ID_YUKON)
 			control = BMU_TCP_CHECK;
 		else
@@ -2950,7 +2950,7 @@ static struct sk_buff *skge_rx_get(struct net_device *dev,
 		pci_dma_sync_single_for_cpu(skge->hw->pdev,
 					    pci_unmap_addr(e, mapaddr),
 					    len, PCI_DMA_FROMDEVICE);
-		memcpy(skb->data, e->skb->data, len);
+		skb_copy_from_linear_data(e->skb, skb->data, len);
 		pci_dma_sync_single_for_device(skge->hw->pdev,
 					       pci_unmap_addr(e, mapaddr),
 					       len, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index ac36152c68b..238c2ca34da 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -32,6 +32,7 @@
 #include <linux/ethtool.h>
 #include <linux/pci.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/tcp.h>
 #include <linux/in.h>
 #include <linux/delay.h>
@@ -1391,8 +1392,8 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	/* Check for TCP Segmentation Offload */
 	mss = skb_shinfo(skb)->gso_size;
 	if (mss != 0) {
-		mss += ((skb->h.th->doff - 5) * 4);	/* TCP options */
-		mss += (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr);
+		mss += tcp_optlen(skb); /* TCP options */
+		mss += ip_hdrlen(skb) + sizeof(struct tcphdr);
 		mss += ETH_HLEN;
 
 		if (mss != sky2->tx_last_mss) {
@@ -1420,14 +1421,14 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 
 	/* Handle TCP checksum offload */
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		unsigned offset = skb->h.raw - skb->data;
+		const unsigned offset = skb_transport_offset(skb);
 		u32 tcpsum;
 
 		tcpsum = offset << 16;		/* sum start */
 		tcpsum |= offset + skb->csum_offset;	/* sum write */
 
 		ctrl = CALSUM | WR_SUM | INIT_SUM | LOCK_SUM;
-		if (skb->nh.iph->protocol == IPPROTO_UDP)
+		if (ip_hdr(skb)->protocol == IPPROTO_UDP)
 			ctrl |= UDPTCP;
 
 		if (tcpsum != sky2->tx_tcpsum) {
@@ -1970,7 +1971,7 @@ static struct sk_buff *receive_copy(struct sky2_port *sky2,
 		skb_reserve(skb, 2);
 		pci_dma_sync_single_for_cpu(sky2->hw->pdev, re->data_addr,
 					    length, PCI_DMA_FROMDEVICE);
-		memcpy(skb->data, re->skb->data, length);
+		skb_copy_from_linear_data(re->skb, skb->data, length);
 		skb->ip_summed = re->skb->ip_summed;
 		skb->csum = re->skb->csum;
 		pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr,
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index 2f4b1de7a2b..65bd20fac82 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -363,7 +363,7 @@ sl_bump(struct slip *sl)
 	}
 	skb->dev = sl->dev;
 	memcpy(skb_put(skb,count), sl->rbuff, count);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol=htons(ETH_P_IP);
 	netif_rx(skb);
 	sl->dev->last_rx = jiffies;
diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c
index c9561413198..8a2109a913b 100644
--- a/drivers/net/smc911x.c
+++ b/drivers/net/smc911x.c
@@ -502,7 +502,6 @@ static inline void	 smc911x_rcv(struct net_device *dev)
 		DBG(SMC_DEBUG_PKTS, "%s: Received packet\n", dev->name,);
 		PRINT_PKT(data, ((pkt_len - 4) <= 64) ? pkt_len - 4 : 64);
 		dev->last_rx = jiffies;
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		netif_rx(skb);
 		lp->stats.rx_packets++;
@@ -1307,7 +1306,6 @@ smc911x_rx_dma_irq(int dma, void *data)
 	lp->current_rx_skb = NULL;
 	PRINT_PKT(skb->data, skb->len);
 	dev->last_rx = jiffies;
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	netif_rx(skb);
 	lp->stats.rx_packets++;
diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c
index bd6e84506c2..36c1ebadbf2 100644
--- a/drivers/net/smc9194.c
+++ b/drivers/net/smc9194.c
@@ -1262,7 +1262,6 @@ static void smc_rcv(struct net_device *dev)
 
 		skb_reserve( skb, 2 );   /* 16 bit alignment */
 
-		skb->dev = dev;
 		data = skb_put( skb, packet_length);
 
 #ifdef USE_32_BIT
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 49f4b7712eb..01cc3c742c3 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -568,7 +568,6 @@ static inline void  smc_rcv(struct net_device *dev)
 		PRINT_PKT(data, packet_len - 4);
 
 		dev->last_rx = jiffies;
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		netif_rx(skb);
 		lp->stats.rx_packets++;
diff --git a/drivers/net/sonic.c b/drivers/net/sonic.c
index ed7aa0a5acc..c6320c71993 100644
--- a/drivers/net/sonic.c
+++ b/drivers/net/sonic.c
@@ -85,7 +85,6 @@ static int sonic_open(struct net_device *dev)
 			       dev->name);
 			return -ENOMEM;
 		}
-		skb->dev = dev;
 		/* align IP header unless DMA requires otherwise */
 		if (SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
 			skb_reserve(skb, 2);
@@ -451,7 +450,6 @@ static void sonic_rx(struct net_device *dev)
 				lp->stats.rx_dropped++;
 				break;
 			}
-			new_skb->dev = dev;
 			/* provide 16 byte IP header alignment unless DMA requires otherwise */
 			if(SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
 				skb_reserve(new_skb, 2);
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index e3019d52c30..230da14b1b6 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -720,7 +720,7 @@ spider_net_prepare_tx_descr(struct spider_net_card *card,
 	spin_unlock_irqrestore(&chain->lock, flags);
 
 	if (skb->protocol == htons(ETH_P_IP) && skb->ip_summed == CHECKSUM_PARTIAL)
-		switch (skb->nh.iph->protocol) {
+		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_TCP:
 			hwdescr->dmac_cmd_status |= SPIDER_NET_DMAC_TCP;
 			break;
@@ -990,7 +990,6 @@ spider_net_pass_skb_up(struct spider_net_descr *descr,
 	netdev = card->netdev;
 
 	skb = descr->skb;
-	skb->dev = netdev;
 	skb_put(skb, hwdescr->valid_size);
 
 	/* the card seems to add 2 bytes of junk in front
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 8bba2e3da7e..9d6e454a8f9 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1452,7 +1452,6 @@ static int __netdev_rx(struct net_device *dev, int *quota)
 		   to a minimally-sized skbuff. */
 		if (pkt_len < rx_copybreak
 		    && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* 16 byte align the IP header */
 			pci_dma_sync_single_for_cpu(np->pci_dev,
 						    np->rx_info[entry].mapping,
diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c
index 4757aa647c7..396c3d961f8 100644
--- a/drivers/net/sun3_82586.c
+++ b/drivers/net/sun3_82586.c
@@ -775,7 +775,6 @@ static void sun3_82586_rcv_int(struct net_device *dev)
 					skb = (struct sk_buff *) dev_alloc_skb(totlen+2);
 					if(skb != NULL)
 					{
-						skb->dev = dev;
 						skb_reserve(skb,2);
 						skb_put(skb,totlen);
 						eth_copy_and_sum(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen,0);
@@ -1027,7 +1026,7 @@ static int sun3_82586_send_packet(struct sk_buff *skb, struct net_device *dev)
 			memset((char *)p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN);
 			len = ETH_ZLEN;
 		}
-		memcpy((char *)p->xmit_cbuffs[p->xmit_count],(char *)(skb->data),skb->len);
+		skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
 
 #if (NUM_XMIT_BUFFS == 1)
 #	ifdef NO_NOPCOMMANDS
diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c
index 7bee45b42a2..791e081fdc1 100644
--- a/drivers/net/sun3lance.c
+++ b/drivers/net/sun3lance.c
@@ -629,7 +629,7 @@ static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev )
 	head->length = (-len) | 0xf000;
 	head->misc = 0;
 
-	memcpy( PKTBUF_ADDR(head), (void *)skb->data, skb->len );
+	skb_copy_from_linear_data(skb, PKTBUF_ADDR(head), skb->len);
 	if (len != skb->len)
 		memset(PKTBUF_ADDR(head) + skb->len, 0, len-skb->len);
 
@@ -851,10 +851,9 @@ static int lance_rx( struct net_device *dev )
 				}
 
 
-				skb->dev = dev;
 				skb_reserve( skb, 2 );	/* 16 byte align */
 				skb_put( skb, pkt_len );	/* Make room */
-//			        memcpy( skb->data, PKTBUF_ADDR(head), pkt_len );
+//			        skb_copy_to_linear_data(skb, PKTBUF_ADDR(head), pkt_len);
 				eth_copy_and_sum(skb,
 						 PKTBUF_ADDR(head),
 						 pkt_len, 0);
diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index 18f88853e1e..2ad8d58dee3 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -855,7 +855,6 @@ static void bigmac_rx(struct bigmac *bp)
 				drops++;
 				goto drop_it;
 			}
-			copy_skb->dev = bp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			sbus_dma_sync_single_for_cpu(bp->bigmac_sdev,
diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
index c06ecc8002b..f51ba31970a 100644
--- a/drivers/net/sundance.c
+++ b/drivers/net/sundance.c
@@ -1308,7 +1308,6 @@ static void rx_poll(unsigned long data)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(np->pci_dev,
 							    desc->frag[0].addr,
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 08ea61db46f..5da73212ac9 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -64,11 +64,9 @@
 #include <asm/uaccess.h>
 #include <asm/irq.h>
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 #include <asm/idprom.h>
-#include <asm/openprom.h>
-#include <asm/oplib.h>
-#include <asm/pbm.h>
+#include <asm/prom.h>
 #endif
 
 #ifdef CONFIG_PPC_PMAC
@@ -845,11 +843,10 @@ static int gem_rx(struct gem *gp, int work_to_do)
 				goto drop_it;
 			}
 
-			copy_skb->dev = gp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 			pci_dma_sync_single_for_device(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
@@ -1029,10 +1026,8 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		u64 csum_start_off, csum_stuff_off;
-
-		csum_start_off = (u64) (skb->h.raw - skb->data);
-		csum_stuff_off = csum_start_off + skb->csum_offset;
+		const u64 csum_start_off = skb_transport_offset(skb);
+		const u64 csum_stuff_off = csum_start_off + skb->csum_offset;
 
 		ctrl = (TXDCTRL_CENAB |
 			(csum_start_off << 15) |
@@ -2849,7 +2844,7 @@ static int gem_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return rc;
 }
 
-#if (!defined(__sparc__) && !defined(CONFIG_PPC_PMAC))
+#if (!defined(CONFIG_SPARC) && !defined(CONFIG_PPC_PMAC))
 /* Fetch MAC address from vital product data of PCI ROM. */
 static int find_eth_addr_in_vpd(void __iomem *rom_base, int len, unsigned char *dev_addr)
 {
@@ -2904,36 +2899,19 @@ static void get_gem_mac_nonobp(struct pci_dev *pdev, unsigned char *dev_addr)
 
 static int __devinit gem_get_device_address(struct gem *gp)
 {
-#if defined(__sparc__) || defined(CONFIG_PPC_PMAC)
+#if defined(CONFIG_SPARC) || defined(CONFIG_PPC_PMAC)
 	struct net_device *dev = gp->dev;
-#endif
-
-#if defined(__sparc__)
-	struct pci_dev *pdev = gp->pdev;
-	struct pcidev_cookie *pcp = pdev->sysdata;
-	int use_idprom = 1;
-
-	if (pcp != NULL) {
-		unsigned char *addr;
-		int len;
-
-		addr = of_get_property(pcp->prom_node, "local-mac-address",
-				       &len);
-		if (addr && len == 6) {
-			use_idprom = 0;
-			memcpy(dev->dev_addr, addr, 6);
-		}
-	}
-	if (use_idprom)
-		memcpy(dev->dev_addr, idprom->id_ethaddr, 6);
-#elif defined(CONFIG_PPC_PMAC)
 	const unsigned char *addr;
 
 	addr = get_property(gp->of_node, "local-mac-address", NULL);
 	if (addr == NULL) {
+#ifdef CONFIG_SPARC
+		addr = idprom->id_ethaddr;
+#else
 		printk("\n");
 		printk(KERN_ERR "%s: can't get mac-address\n", dev->name);
 		return -1;
+#endif
 	}
 	memcpy(dev->dev_addr, addr, 6);
 #else
@@ -3091,7 +3069,7 @@ static int __devinit gem_init_one(struct pci_dev *pdev,
 	/* On Apple, we want a reference to the Open Firmware device-tree
 	 * node. We use it for clock control.
 	 */
-#ifdef CONFIG_PPC_PMAC
+#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC)
 	gp->of_node = pci_device_to_OF_node(pdev);
 #endif
 
diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index a70067c85cc..58cf87c5751 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -1025,7 +1025,7 @@ struct gem {
 
 	struct pci_dev		*pdev;
 	struct net_device	*dev;
-#ifdef CONFIG_PPC_PMAC
+#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC)
 	struct device_node	*of_node;
 #endif
 };
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 192bbc91c73..51c3fe2108a 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -55,9 +55,6 @@
 
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
-#ifdef CONFIG_SPARC
-#include <asm/pbm.h>
-#endif
 #endif
 
 #include "sunhme.h"
@@ -2058,11 +2055,10 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev)
 				goto drop_it;
 			}
 
-			copy_skb->dev = dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			hme_dma_sync_for_cpu(hp, dma_addr, len, DMA_FROMDEVICE);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 			hme_dma_sync_for_device(hp, dma_addr, len, DMA_FROMDEVICE);
 
 			/* Reuse original ring buffer. */
@@ -2270,10 +2266,8 @@ static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_flags = TXFLAG_OWN;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		u32 csum_start_off, csum_stuff_off;
-
-		csum_start_off = (u32) (skb->h.raw - skb->data);
-		csum_stuff_off = csum_start_off + skb->csum_offset;
+		const u32 csum_start_off = skb_transport_offset(skb);
+		const u32 csum_stuff_off = csum_start_off + skb->csum_offset;
 
 		tx_flags = (TXFLAG_OWN | TXFLAG_CSENABLE |
 			    ((csum_start_off << 14) & TXFLAG_CSBUFBEGIN) |
@@ -2704,7 +2698,7 @@ static int __devinit happy_meal_sbus_probe_one(struct sbus_dev *sdev, int is_qfe
 			dev->dev_addr[i] = macaddr[i];
 		macaddr[5]++;
 	} else {
-		unsigned char *addr;
+		const unsigned char *addr;
 		int len;
 
 		addr = of_get_property(dp, "local-mac-address", &len);
@@ -2986,7 +2980,7 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
 {
 	struct quattro *qp = NULL;
 #ifdef CONFIG_SPARC
-	struct pcidev_cookie *pcp;
+	struct device_node *dp;
 #endif
 	struct happy_meal *hp;
 	struct net_device *dev;
@@ -2998,13 +2992,8 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
 
 	/* Now make sure pci_dev cookie is there. */
 #ifdef CONFIG_SPARC
-	pcp = pdev->sysdata;
-	if (pcp == NULL) {
-		printk(KERN_ERR "happymeal(PCI): Some PCI device info missing\n");
-		return -ENODEV;
-	}
-
-	strcpy(prom_name, pcp->prom_node->name);
+	dp = pci_device_to_OF_node(pdev);
+	strcpy(prom_name, dp->name);
 #else
 	if (is_quattro_p(pdev))
 		strcpy(prom_name, "SUNW,qfe");
@@ -3081,11 +3070,11 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
 		macaddr[5]++;
 	} else {
 #ifdef CONFIG_SPARC
-		unsigned char *addr;
+		const unsigned char *addr;
 		int len;
 
 		if (qfe_slot != -1 &&
-		    (addr = of_get_property(pcp->prom_node,
+		    (addr = of_get_property(dp,
 					    "local-mac-address", &len)) != NULL
 		    && len == 6) {
 			memcpy(dev->dev_addr, addr, 6);
@@ -3105,7 +3094,7 @@ static int __devinit happy_meal_pci_probe(struct pci_dev *pdev,
 	hp->tcvregs    = (hpreg_base + 0x7000UL);
 
 #ifdef CONFIG_SPARC
-	hp->hm_revision = of_getintprop_default(pcp->prom_node, "hm-rev", 0xff);
+	hp->hm_revision = of_getintprop_default(dp, "hm-rev", 0xff);
 	if (hp->hm_revision == 0xff) {
 		unsigned char prev;
 
@@ -3300,7 +3289,7 @@ static int __devinit hme_sbus_probe(struct of_device *dev, const struct of_devic
 {
 	struct sbus_dev *sdev = to_sbus_device(&dev->dev);
 	struct device_node *dp = dev->node;
-	char *model = of_get_property(dp, "model", NULL);
+	const char *model = of_get_property(dp, "model", NULL);
 	int is_qfe = (match->data != NULL);
 
 	if (!is_qfe && model && !strcmp(model, "SUNW,sbus-qfe"))
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index b0929a457b6..42722530ab2 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -547,7 +547,6 @@ static void lance_rx_dvma(struct net_device *dev)
 
 			lp->stats.rx_bytes += len;
 
-			skb->dev = dev;
 			skb_reserve(skb, 2);		/* 16 byte align */
 			skb_put(skb, len);		/* make room */
 			eth_copy_and_sum(skb,
@@ -721,7 +720,6 @@ static void lance_rx_pio(struct net_device *dev)
 
 			lp->stats.rx_bytes += len;
 
-			skb->dev = dev;
 			skb_reserve (skb, 2);		/* 16 byte align */
 			skb_put(skb, len);		/* make room */
 			lance_piocopy_to_skb(skb, &(ib->rx_buf[entry][0]), len);
@@ -1145,7 +1143,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		struct lance_init_block *ib = lp->init_block_mem;
 		ib->btx_ring [entry].length = (-len) | 0xf000;
 		ib->btx_ring [entry].misc = 0;
-		memcpy((char *)&ib->tx_buf [entry][0], skb->data, skblen);
+		skb_copy_from_linear_data(skb, &ib->tx_buf [entry][0], skblen);
 		if (len != skblen)
 			memset((char *) &ib->tx_buf [entry][skblen], 0, len - skblen);
 		ib->btx_ring [entry].tmd1_bits = (LE_T1_POK | LE_T1_OWN);
diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c
index f3bad56d476..fa70e0b78af 100644
--- a/drivers/net/sunqe.c
+++ b/drivers/net/sunqe.c
@@ -437,7 +437,6 @@ static void qe_rx(struct sunqe *qep)
 				drops++;
 				qep->net_stats.rx_dropped++;
 			} else {
-				skb->dev = qep->dev;
 				skb_reserve(skb, 2);
 				skb_put(skb, len);
 				eth_copy_and_sum(skb, (unsigned char *) this_qbuf,
@@ -593,7 +592,7 @@ static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Avoid a race... */
 	qep->qe_block->qe_txd[entry].tx_flags = TXD_UPDATE;
 
-	memcpy(txbuf, skb->data, len);
+	skb_copy_from_linear_data(skb, txbuf, len);
 
 	qep->qe_block->qe_txd[entry].tx_addr = txbuf_dvma;
 	qep->qe_block->qe_txd[entry].tx_flags =
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index e3a7e3ceab7..d7741e23f8d 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1145,7 +1145,6 @@ tc35815_rx(struct net_device *dev)
 				break;
 			}
 			skb_reserve(skb, 2);   /* 16 bit alignment */
-			skb->dev = dev;
 
 			data = skb_put(skb, pkt_len);
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 256969e1300..9488f49ea56 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -40,16 +40,16 @@
 #include <linux/dma-mapping.h>
 
 #include <net/checksum.h>
+#include <net/ip.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
 
-#ifdef CONFIG_SPARC64
+#ifdef CONFIG_SPARC
 #include <asm/idprom.h>
-#include <asm/oplib.h>
-#include <asm/pbm.h>
+#include <asm/prom.h>
 #endif
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
@@ -3349,7 +3349,7 @@ static int tg3_rx(struct tg3 *tp, int budget)
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 			pci_dma_sync_single_for_device(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
@@ -3908,20 +3908,20 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
 			mss |= (skb_headlen(skb) - ETH_HLEN) << 9;
 		else {
-			tcp_opt_len = ((skb->h.th->doff - 5) * 4);
-			ip_tcp_len = (skb->nh.iph->ihl * 4) +
-				     sizeof(struct tcphdr);
+			struct iphdr *iph = ip_hdr(skb);
+
+			tcp_opt_len = tcp_optlen(skb);
+			ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
-			skb->nh.iph->check = 0;
-			skb->nh.iph->tot_len = htons(mss + ip_tcp_len +
-						     tcp_opt_len);
+			iph->check = 0;
+			iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
 			mss |= (ip_tcp_len + tcp_opt_len) << 9;
 		}
 
 		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
 			       TXD_FLAG_CPU_POST_DMA);
 
-		skb->h.th->check = 0;
+		tcp_hdr(skb)->check = 0;
 
 	}
 	else if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -4055,6 +4055,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 	mss = 0;
 	if (skb->len > (tp->dev->mtu + ETH_HLEN) &&
 	    (mss = skb_shinfo(skb)->gso_size) != 0) {
+		struct iphdr *iph;
 		int tcp_opt_len, ip_tcp_len, hdr_len;
 
 		if (skb_header_cloned(skb) &&
@@ -4063,8 +4064,8 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 			goto out_unlock;
 		}
 
-		tcp_opt_len = ((skb->h.th->doff - 5) * 4);
-		ip_tcp_len = (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr);
+		tcp_opt_len = tcp_optlen(skb);
+		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 		hdr_len = ip_tcp_len + tcp_opt_len;
 		if (unlikely((ETH_HLEN + hdr_len) > 80) &&
@@ -4074,34 +4075,31 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
 			       TXD_FLAG_CPU_POST_DMA);
 
-		skb->nh.iph->check = 0;
-		skb->nh.iph->tot_len = htons(mss + hdr_len);
+		iph = ip_hdr(skb);
+		iph->check = 0;
+		iph->tot_len = htons(mss + hdr_len);
 		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
-			skb->h.th->check = 0;
+			tcp_hdr(skb)->check = 0;
 			base_flags &= ~TXD_FLAG_TCPUDP_CSUM;
-		}
-		else {
-			skb->h.th->check =
-				~csum_tcpudp_magic(skb->nh.iph->saddr,
-						   skb->nh.iph->daddr,
-						   0, IPPROTO_TCP, 0);
-		}
+		} else
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 
 		if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO) ||
 		    (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) {
-			if (tcp_opt_len || skb->nh.iph->ihl > 5) {
+			if (tcp_opt_len || iph->ihl > 5) {
 				int tsflags;
 
-				tsflags = ((skb->nh.iph->ihl - 5) +
-					   (tcp_opt_len >> 2));
+				tsflags = (iph->ihl - 5) + (tcp_opt_len >> 2);
 				mss |= (tsflags << 11);
 			}
 		} else {
-			if (tcp_opt_len || skb->nh.iph->ihl > 5) {
+			if (tcp_opt_len || iph->ihl > 5) {
 				int tsflags;
 
-				tsflags = ((skb->nh.iph->ihl - 5) +
-					   (tcp_opt_len >> 2));
+				tsflags = (iph->ihl - 5) + (tcp_opt_len >> 2);
 				base_flags |= tsflags << 12;
 			}
 		}
@@ -10988,24 +10986,20 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	return err;
 }
 
-#ifdef CONFIG_SPARC64
+#ifdef CONFIG_SPARC
 static int __devinit tg3_get_macaddr_sparc(struct tg3 *tp)
 {
 	struct net_device *dev = tp->dev;
 	struct pci_dev *pdev = tp->pdev;
-	struct pcidev_cookie *pcp = pdev->sysdata;
-
-	if (pcp != NULL) {
-		unsigned char *addr;
-		int len;
-
-		addr = of_get_property(pcp->prom_node, "local-mac-address",
-					&len);
-		if (addr && len == 6) {
-			memcpy(dev->dev_addr, addr, 6);
-			memcpy(dev->perm_addr, dev->dev_addr, 6);
-			return 0;
-		}
+	struct device_node *dp = pci_device_to_OF_node(pdev);
+	const unsigned char *addr;
+	int len;
+
+	addr = of_get_property(dp, "local-mac-address", &len);
+	if (addr && len == 6) {
+		memcpy(dev->dev_addr, addr, 6);
+		memcpy(dev->perm_addr, dev->dev_addr, 6);
+		return 0;
 	}
 	return -ENODEV;
 }
@@ -11026,7 +11020,7 @@ static int __devinit tg3_get_device_address(struct tg3 *tp)
 	u32 hi, lo, mac_offset;
 	int addr_ok = 0;
 
-#ifdef CONFIG_SPARC64
+#ifdef CONFIG_SPARC
 	if (!tg3_get_macaddr_sparc(tp))
 		return 0;
 #endif
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index f85f0025112..106dc1ef0ac 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -1112,7 +1112,7 @@ static int TLan_StartTx( struct sk_buff *skb, struct net_device *dev )
 
 	if ( bbuf ) {
 		tail_buffer = priv->txBuffer + ( priv->txTail * TLAN_MAX_FRAME_SIZE );
-		memcpy( tail_buffer, skb->data, skb->len );
+		skb_copy_from_linear_data(skb, tail_buffer, skb->len);
 	} else {
 		tail_list->buffer[0].address = pci_map_single(priv->pciDev, skb->data, skb->len, PCI_DMA_TODEVICE);
 		TLan_StoreSKB(tail_list, skb);
@@ -1577,7 +1577,6 @@ u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int )
 				printk(KERN_INFO "TLAN: Couldn't allocate memory for received data.\n");
 			else {
 				head_buffer = priv->rxBuffer + (priv->rxHead * TLAN_MAX_FRAME_SIZE);
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				t = (void *) skb_put(skb, frameSize);
 
@@ -1608,7 +1607,6 @@ u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int )
 				skb->protocol = eth_type_trans( skb, dev );
 				netif_rx( skb );
 
-				new_skb->dev = dev;
 				skb_reserve( new_skb, 2 );
 				t = (void *) skb_put( new_skb, TLAN_MAX_FRAME_SIZE );
 				head_list->buffer[0].address = pci_map_single(priv->pciDev, new_skb->data, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index 7580bdeacad..e22a3f5333e 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -933,20 +933,21 @@ static void xl_rx(struct net_device *dev)
 				return ; 				
 			}
 	
-			skb->dev = dev ; 
-
 			while (xl_priv->rx_ring_tail != temp_ring_loc) { 
 				copy_len = xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfraglen & 0x7FFF ; 
 				frame_length -= copy_len ;  
 				pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
-				memcpy(skb_put(skb,copy_len), xl_priv->rx_ring_skb[xl_priv->rx_ring_tail]->data, copy_len) ; 
+				skb_copy_from_linear_data(xl_priv->rx_ring_skb[xl_priv->rx_ring_tail],
+							  skb_put(skb, copy_len),
+							  copy_len);
 				pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 				adv_rx_ring(dev) ; 
 			} 
 
 			/* Now we have found the last fragment */
 			pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
-			memcpy(skb_put(skb,copy_len), xl_priv->rx_ring_skb[xl_priv->rx_ring_tail]->data, frame_length) ; 
+			skb_copy_from_linear_data(xl_priv->rx_ring_skb[xl_priv->rx_ring_tail],
+				      skb_put(skb,copy_len), frame_length);
 /*			memcpy(skb_put(skb,frame_length), bus_to_virt(xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr), frame_length) ; */
 			pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 			adv_rx_ring(dev) ; 
@@ -967,8 +968,6 @@ static void xl_rx(struct net_device *dev)
 				return ; 
 			}
 
-			skb->dev = dev ; 
-
 			skb2 = xl_priv->rx_ring_skb[xl_priv->rx_ring_tail] ; 
 			pci_unmap_single(xl_priv->pdev, xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr, xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
 			skb_put(skb2, frame_length) ; 
diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c
index 01d55315ee8..1e8958ee2d0 100644
--- a/drivers/net/tokenring/ibmtr.c
+++ b/drivers/net/tokenring/ibmtr.c
@@ -1771,7 +1771,6 @@ static void tr_rx(struct net_device *dev)
 	/*BMS again, if she comes in with few but leaves with many */
 	skb_reserve(skb, sizeof(struct trh_hdr) - lan_hdr_len);
 	skb_put(skb, length);
-	skb->dev = dev;
 	data = skb->data;
 	rbuffer_len = ntohs(readw(rbuf + offsetof(struct rec_buf, buf_len)));
 	rbufdata = rbuf + offsetof(struct rec_buf, data);
diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c
index e999feb8c0b..5d849c089a3 100644
--- a/drivers/net/tokenring/lanstreamer.c
+++ b/drivers/net/tokenring/lanstreamer.c
@@ -944,8 +944,6 @@ static void streamer_rx(struct net_device *dev)
 				printk(KERN_WARNING "%s: Not enough memory to copy packet to upper layers. \n",	dev->name);
 				streamer_priv->streamer_stats.rx_dropped++;
 			} else {	/* we allocated an skb OK */
-				skb->dev = dev;
-
 				if (buffer_cnt == 1) {
 					/* release the DMA mapping */
 					pci_unmap_single(streamer_priv->pci_dev, 
@@ -1607,10 +1605,11 @@ static void streamer_arb_cmd(struct net_device *dev)
 				      frame_data, buffer_len);
 		} while (next_ptr && (buff_off = next_ptr));
 
+		mac_frame->protocol = tr_type_trans(mac_frame, dev);
 #if STREAMER_NETWORK_MONITOR
 		printk(KERN_WARNING "%s: Received MAC Frame, details: \n",
 		       dev->name);
-		mac_hdr = (struct trh_hdr *) mac_frame->data;
+		mac_hdr = tr_hdr(mac_frame);
 		printk(KERN_WARNING
 		       "%s: MAC Frame Dest. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n",
 		       dev->name, mac_hdr->daddr[0], mac_hdr->daddr[1],
@@ -1622,8 +1621,6 @@ static void streamer_arb_cmd(struct net_device *dev)
 		       mac_hdr->saddr[2], mac_hdr->saddr[3],
 		       mac_hdr->saddr[4], mac_hdr->saddr[5]);
 #endif
-		mac_frame->dev = dev;
-		mac_frame->protocol = tr_type_trans(mac_frame, dev);
 		netif_rx(mac_frame);
 
 		/* Now tell the card we have dealt with the received frame */
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index 8f4ecc1109c..09b3cfb8e80 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -814,8 +814,6 @@ static void olympic_rx(struct net_device *dev)
 					olympic_priv->rx_ring_last_received += i ; 
 					olympic_priv->rx_ring_last_received &= (OLYMPIC_RX_RING_SIZE -1) ;  
 				} else  {
-					skb->dev = dev ; 
-
 					/* Optimise based upon number of buffers used. 
 			   	   	   If only one buffer is used we can simply swap the buffers around.
 			   	   	   If more than one then we must use the new buffer and copy the information
@@ -847,7 +845,9 @@ static void olympic_rx(struct net_device *dev)
 							pci_dma_sync_single_for_cpu(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
-							memcpy(skb_put(skb,length-4),olympic_priv->rx_ring_skb[rx_ring_last_received]->data,length-4) ; 
+							skb_copy_from_linear_data(olympic_priv->rx_ring_skb[rx_ring_last_received],
+								      skb_put(skb,length - 4),
+								      length - 4);
 							pci_dma_sync_single_for_device(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
@@ -864,7 +864,9 @@ static void olympic_rx(struct net_device *dev)
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
 							rx_desc = &(olympic_priv->olympic_rx_ring[rx_ring_last_received]);
 							cpy_length = (i == 1 ? frag_len : le32_to_cpu(rx_desc->res_length)); 
-							memcpy(skb_put(skb, cpy_length), olympic_priv->rx_ring_skb[rx_ring_last_received]->data, cpy_length) ;
+							skb_copy_from_linear_data(olympic_priv->rx_ring_skb[rx_ring_last_received],
+								      skb_put(skb, cpy_length),
+								      cpy_length);
 							pci_dma_sync_single_for_device(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
@@ -1440,16 +1442,16 @@ static void olympic_arb_cmd(struct net_device *dev)
 			next_ptr=readw(buf_ptr+offsetof(struct mac_receive_buffer,next)); 
 		} while (next_ptr && (buf_ptr=olympic_priv->olympic_lap + ntohs(next_ptr)));
 
+		mac_frame->protocol = tr_type_trans(mac_frame, dev);
+
 		if (olympic_priv->olympic_network_monitor) { 
 			struct trh_hdr *mac_hdr ; 
 			printk(KERN_WARNING "%s: Received MAC Frame, details: \n",dev->name) ;
-			mac_hdr = (struct trh_hdr *)mac_frame->data ; 
+			mac_hdr = tr_hdr(mac_frame);
 			printk(KERN_WARNING "%s: MAC Frame Dest. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", dev->name , mac_hdr->daddr[0], mac_hdr->daddr[1], mac_hdr->daddr[2], mac_hdr->daddr[3], mac_hdr->daddr[4], mac_hdr->daddr[5]) ; 
 			printk(KERN_WARNING "%s: MAC Frame Srce. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", dev->name , mac_hdr->saddr[0], mac_hdr->saddr[1], mac_hdr->saddr[2], mac_hdr->saddr[3], mac_hdr->saddr[4], mac_hdr->saddr[5]) ; 
 		}
-		mac_frame->dev = dev ; 
-		mac_frame->protocol = tr_type_trans(mac_frame,dev);
-		netif_rx(mac_frame) ; 	
+		netif_rx(mac_frame);
 		dev->last_rx = jiffies;
 
 drop_frame:
diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c
index cec282a6f62..9bbea5c8acf 100644
--- a/drivers/net/tokenring/smctr.c
+++ b/drivers/net/tokenring/smctr.c
@@ -3889,14 +3889,13 @@ static int smctr_process_rx_packet(MAC_HEADER *rmf, __u16 size,
 
                 /* Slide data into a sleek skb. */
                 skb_put(skb, skb->len);
-                memcpy(skb->data, rmf, skb->len);
+                skb_copy_to_linear_data(skb, rmf, skb->len);
 
                 /* Update Counters */
                 tp->MacStat.rx_packets++;
                 tp->MacStat.rx_bytes += skb->len;
 
                 /* Kick the packet on up. */
-                skb->dev = dev;
                 skb->protocol = tr_type_trans(skb, dev);
                 netif_rx(skb);
 		dev->last_rx = jiffies;
@@ -4476,14 +4475,13 @@ static int smctr_rx_frame(struct net_device *dev)
 				if (skb) {
                                 	skb_put(skb, rx_size);
 
-                                	memcpy(skb->data, pbuff, rx_size);
+					skb_copy_to_linear_data(skb, pbuff, rx_size);
 
                                 	/* Update Counters */
                                 	tp->MacStat.rx_packets++;
                                 	tp->MacStat.rx_bytes += skb->len;
 
                                 	/* Kick the packet on up. */
-                                	skb->dev = dev;
                                 	skb->protocol = tr_type_trans(skb, dev);
                                 	netif_rx(skb);
 					dev->last_rx = jiffies;
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index ea797ca2b98..12bd294045a 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -644,7 +644,7 @@ static int tms380tr_hardware_send_packet(struct sk_buff *skb, struct net_device
 		dmabuf  = 0;
 		i 	= tp->TplFree->TPLIndex;
 		buf 	= tp->LocalTxBuffers[i];
-		memcpy(buf, skb->data, length);
+		skb_copy_from_linear_data(skb, buf, length);
 		newbuf 	= ((char *)buf - (char *)tp) + tp->dmabuffer;
 	}
 	else {
@@ -2168,7 +2168,6 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
 				}
 				else
 				{
-					skb->dev	= dev;
 					skb_put(skb, tp->MaxPacketSize);
 					rpl->SkbStat 	= SKB_DATA_COPY;
 					ReceiveDataPtr 	= rpl->MData;
@@ -2179,7 +2178,8 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
 				|| rpl->SkbStat == SKB_DMA_DIRECT))
 			{
 				if(rpl->SkbStat == SKB_DATA_COPY)
-					memcpy(skb->data, ReceiveDataPtr, Length);
+					skb_copy_to_linear_data(skb, ReceiveDataPtr,
+						       Length);
 
 				/* Deliver frame to system */
 				rpl->Skb = NULL;
diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c
index d92c5c597e1..0bfc2c9c1c0 100644
--- a/drivers/net/tsi108_eth.c
+++ b/drivers/net/tsi108_eth.c
@@ -788,7 +788,6 @@ static int tsi108_complete_rx(struct net_device *dev, int budget)
 			printk(".\n");
 		}
 
-		skb->dev = dev;
 		skb_put(skb, data->rxring[rx].len);
 		skb->protocol = eth_type_trans(skb, dev);
 		netif_receive_skb(skb);
diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index c82befa209a..861729806dc 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -63,7 +63,7 @@ MODULE_PARM_DESC (debug, "de2104x bitmapped message enable number");
 
 /* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
 #if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
-        || defined(__sparc__) || defined(__ia64__) \
+        || defined(CONFIG_SPARC) || defined(__ia64__) \
         || defined(__sh__) || defined(__mips__)
 static int rx_copybreak = 1518;
 #else
@@ -435,7 +435,6 @@ static void de_rx (struct de_private *de)
 			rx_work = 100;
 			goto rx_next;
 		}
-		copy_skb->dev = de->dev;
 
 		if (!copying_skb) {
 			pci_unmap_single(de->pdev, mapping,
@@ -450,8 +449,8 @@ static void de_rx (struct de_private *de)
 		} else {
 			pci_dma_sync_single_for_cpu(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
 			skb_reserve(copy_skb, RX_OFFSET);
-			memcpy(skb_put(copy_skb, len), skb->data, len);
-
+			skb_copy_from_linear_data(skb, skb_put(copy_skb, len),
+						  len);
 			pci_dma_sync_single_for_device(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index 4b3cd3d8b62..62143f92c23 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -1160,7 +1160,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev)
 	sprintf(lp->adapter_name,"%s (%s)", name, gendev->bus_id);
 
 	lp->dma_size = (NUM_RX_DESC + NUM_TX_DESC) * sizeof(struct de4x5_desc);
-#if defined(__alpha__) || defined(__powerpc__) || defined(__sparc_v9__) || defined(DE4X5_DO_MEMCPY)
+#if defined(__alpha__) || defined(__powerpc__) || defined(CONFIG_SPARC) || defined(DE4X5_DO_MEMCPY)
 	lp->dma_size += RX_BUFF_SZ * NUM_RX_DESC + DE4X5_ALIGN;
 #endif
 	lp->rx_ring = dma_alloc_coherent(gendev, lp->dma_size,
@@ -1175,7 +1175,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev)
 	** Set up the RX descriptor ring (Intels)
 	** Allocate contiguous receive buffers, long word aligned (Alphas)
 	*/
-#if !defined(__alpha__) && !defined(__powerpc__) && !defined(__sparc_v9__) && !defined(DE4X5_DO_MEMCPY)
+#if !defined(__alpha__) && !defined(__powerpc__) && !defined(CONFIG_SPARC) && !defined(DE4X5_DO_MEMCPY)
 	for (i=0; i<NUM_RX_DESC; i++) {
 	    lp->rx_ring[i].status = 0;
 	    lp->rx_ring[i].des1 = cpu_to_le32(RX_BUFF_SZ);
@@ -1252,11 +1252,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev)
 	    mii_get_phy(dev);
 	}
 
-#ifndef __sparc_v9__
 	printk("      and requires IRQ%d (provided by %s).\n", dev->irq,
-#else
-	printk("      and requires IRQ%x (provided by %s).\n", dev->irq,
-#endif
 	       ((lp->bus == PCI) ? "PCI BIOS" : "EISA CNFG"));
     }
 
@@ -3627,14 +3623,13 @@ de4x5_alloc_rx_buff(struct net_device *dev, int index, int len)
     struct de4x5_private *lp = netdev_priv(dev);
     struct sk_buff *p;
 
-#if !defined(__alpha__) && !defined(__powerpc__) && !defined(__sparc_v9__) && !defined(DE4X5_DO_MEMCPY)
+#if !defined(__alpha__) && !defined(__powerpc__) && !defined(CONFIG_SPARC) && !defined(DE4X5_DO_MEMCPY)
     struct sk_buff *ret;
     u_long i=0, tmp;
 
     p = dev_alloc_skb(IEEE802_3_SZ + DE4X5_ALIGN + 2);
     if (!p) return NULL;
 
-    p->dev = dev;
     tmp = virt_to_bus(p->data);
     i = ((tmp + DE4X5_ALIGN) & ~DE4X5_ALIGN) - tmp;
     skb_reserve(p, i);
@@ -3655,7 +3650,6 @@ de4x5_alloc_rx_buff(struct net_device *dev, int index, int len)
     p = dev_alloc_skb(len + 2);
     if (!p) return NULL;
 
-    p->dev = dev;
     skb_reserve(p, 2);	                               /* Align */
     if (index < lp->rx_old) {                          /* Wrapped buffer */
 	short tlen = (lp->rxRingSize - lp->rx_old) * RX_BUFF_SZ;
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index 9aeac76184f..b3a64ca9863 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -682,7 +682,7 @@ static int dmfe_start_xmit(struct sk_buff *skb, struct DEVICE *dev)
 
 	/* transmit this packet */
 	txptr = db->tx_insert_ptr;
-	memcpy(txptr->tx_buf_ptr, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, txptr->tx_buf_ptr, skb->len);
 	txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len);
 
 	/* Point to next transmit free descriptor */
@@ -988,14 +988,14 @@ static void dmfe_rx_packet(struct DEVICE *dev, struct dmfe_board_info * db)
 
 						skb = newskb;
 						/* size less than COPY_SIZE, allocate a rxlen SKB */
-						skb->dev = dev;
 						skb_reserve(skb, 2); /* 16byte align */
-						memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->data, rxlen);
+						skb_copy_from_linear_data(rxptr->rx_skb_ptr,
+							  skb_put(skb, rxlen),
+									  rxlen);
 						dmfe_reuse_skb(db, rxptr->rx_skb_ptr);
-					} else {
-						skb->dev = dev;
+					} else
 						skb_put(skb, rxlen);
-					}
+
 					skb->protocol = eth_type_trans(skb, dev);
 					netif_rx(skb);
 					dev->last_rx = jiffies;
diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c
index e3488d7b8ed..e86df07769a 100644
--- a/drivers/net/tulip/interrupt.c
+++ b/drivers/net/tulip/interrupt.c
@@ -192,7 +192,6 @@ int tulip_poll(struct net_device *dev, int *budget)
                                   to a minimally-sized skbuff. */
                                if (pkt_len < tulip_rx_copybreak
                                    && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-                                       skb->dev = dev;
                                        skb_reserve(skb, 2);    /* 16 byte align the IP header */
                                        pci_dma_sync_single_for_cpu(tp->pdev,
 								   tp->rx_buffers[entry].mapping,
@@ -416,7 +415,6 @@ static int tulip_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < tulip_rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(tp->pdev,
 							    tp->rx_buffers[entry].mapping,
diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index e3774a52237..e9bf526ec53 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -36,8 +36,8 @@
 #include <asm/unaligned.h>
 #include <asm/uaccess.h>
 
-#ifdef __sparc__
-#include <asm/pbm.h>
+#ifdef CONFIG_SPARC
+#include <asm/prom.h>
 #endif
 
 static char version[] __devinitdata =
@@ -67,7 +67,7 @@ const char * const medianame[32] = {
 
 /* Set the copy breakpoint for the copy-only-tiny-buffer Rx structure. */
 #if defined(__alpha__) || defined(__arm__) || defined(__hppa__) \
-	|| defined(__sparc__) || defined(__ia64__) \
+	|| defined(CONFIG_SPARC) || defined(__ia64__) \
 	|| defined(__sh__) || defined(__mips__)
 static int rx_copybreak = 1518;
 #else
@@ -91,7 +91,7 @@ static int rx_copybreak = 100;
 static int csr0 = 0x01A00000 | 0xE000;
 #elif defined(__i386__) || defined(__powerpc__) || defined(__x86_64__)
 static int csr0 = 0x01A00000 | 0x8000;
-#elif defined(__sparc__) || defined(__hppa__)
+#elif defined(CONFIG_SPARC) || defined(__hppa__)
 /* The UltraSparc PCI controllers will disconnect at every 64-byte
  * crossing anyways so it makes no sense to tell Tulip to burst
  * any more than that.
@@ -1315,7 +1315,7 @@ static int __devinit tulip_init_one (struct pci_dev *pdev,
 	/* DM9102A has troubles with MRM & clear reserved bits 24:22, 20, 16, 7:1 */
 	if (tulip_uli_dm_quirk(pdev)) {
 		csr0 &= ~0x01f100ff;
-#if defined(__sparc__)
+#if defined(CONFIG_SPARC)
                 csr0 = (csr0 & ~0xff00) | 0xe000;
 #endif
 	}
@@ -1535,23 +1535,19 @@ static int __devinit tulip_init_one (struct pci_dev *pdev,
 	   Many PCI BIOSes also incorrectly report the IRQ line, so we correct
 	   that here as well. */
 	if (sum == 0  || sum == 6*0xff) {
-#if defined(__sparc__)
-		struct pcidev_cookie *pcp = pdev->sysdata;
+#if defined(CONFIG_SPARC)
+		struct device_node *dp = pci_device_to_OF_node(pdev);
+		const unsigned char *addr;
+		int len;
 #endif
 		eeprom_missing = 1;
 		for (i = 0; i < 5; i++)
 			dev->dev_addr[i] = last_phys_addr[i];
 		dev->dev_addr[i] = last_phys_addr[i] + 1;
-#if defined(__sparc__)
-		if (pcp) {
-			unsigned char *addr;
-			int len;
-
-			addr = of_get_property(pcp->prom_node,
-					       "local-mac-address", &len);
-			if (addr && len == 6)
-				memcpy(dev->dev_addr, addr, 6);
-		}
+#if defined(CONFIG_SPARC)
+		addr = of_get_property(dp, "local-mac-address", &len);
+		if (addr && len == 6)
+			memcpy(dev->dev_addr, addr, 6);
 #endif
 #if defined(__i386__) || defined(__x86_64__)	/* Patch up x86 BIOS bug. */
 		if (last_irq)
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 229158e8e4b..ca2548eb7d6 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -583,7 +583,7 @@ static int uli526x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* transmit this packet */
 	txptr = db->tx_insert_ptr;
-	memcpy(txptr->tx_buf_ptr, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, txptr->tx_buf_ptr, skb->len);
 	txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len);
 
 	/* Point to next transmit free descriptor */
@@ -828,14 +828,14 @@ static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info
 					( (skb = dev_alloc_skb(rxlen + 2) )
 					!= NULL) ) {
 					/* size less than COPY_SIZE, allocate a rxlen SKB */
-					skb->dev = dev;
 					skb_reserve(skb, 2); /* 16byte align */
-					memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->tail, rxlen);
+					memcpy(skb_put(skb, rxlen),
+					       skb_tail_pointer(rxptr->rx_skb_ptr),
+					       rxlen);
 					uli526x_reuse_skb(db, rxptr->rx_skb_ptr);
-				} else {
-					skb->dev = dev;
+				} else
 					skb_put(skb, rxlen);
-				}
+
 				skb->protocol = eth_type_trans(skb, dev);
 				netif_rx(skb);
 				dev->last_rx = jiffies;
@@ -1177,7 +1177,10 @@ static void uli526x_reuse_skb(struct uli526x_board_info *db, struct sk_buff * sk
 
 	if (!(rxptr->rdes0 & cpu_to_le32(0x80000000))) {
 		rxptr->rx_skb_ptr = skb;
-		rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) );
+		rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
+							  skb_tail_pointer(skb),
+							  RX_ALLOC_SIZE,
+							  PCI_DMA_FROMDEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		db->rx_avail_cnt++;
@@ -1341,7 +1344,10 @@ static void allocate_rx_buffer(struct uli526x_board_info *db)
 		if ( ( skb = dev_alloc_skb(RX_ALLOC_SIZE) ) == NULL )
 			break;
 		rxptr->rx_skb_ptr = skb; /* FIXME (?) */
-		rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) );
+		rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
+							  skb_tail_pointer(skb),
+							  RX_ALLOC_SIZE,
+							  PCI_DMA_FROMDEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		rxptr = rxptr->next_rx_desc;
diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c
index 002a05e0722..5b71ac78bca 100644
--- a/drivers/net/tulip/winbond-840.c
+++ b/drivers/net/tulip/winbond-840.c
@@ -813,7 +813,6 @@ static void init_rxtx_rings(struct net_device *dev)
 		np->rx_skbuff[i] = skb;
 		if (skb == NULL)
 			break;
-		skb->dev = dev;			/* Mark as being used by this device. */
 		np->rx_addr[i] = pci_map_single(np->pci_dev,skb->data,
 					np->rx_buf_sz,PCI_DMA_FROMDEVICE);
 
@@ -903,7 +902,7 @@ static void init_registers(struct net_device *dev)
 	}
 #elif defined(__powerpc__) || defined(__i386__) || defined(__alpha__) || defined(__ia64__) || defined(__x86_64__)
 	i |= 0xE000;
-#elif defined(__sparc__) || defined (CONFIG_PARISC)
+#elif defined(CONFIG_SPARC) || defined (CONFIG_PARISC)
 	i |= 0x4800;
 #else
 #warning Processor architecture undefined
@@ -1229,7 +1228,6 @@ static int netdev_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry],
 							    np->rx_skbuff[entry]->len,
@@ -1278,7 +1276,6 @@ static int netdev_rx(struct net_device *dev)
 			np->rx_skbuff[entry] = skb;
 			if (skb == NULL)
 				break;			/* Better luck next round. */
-			skb->dev = dev;			/* Mark as being used by this device. */
 			np->rx_addr[entry] = pci_map_single(np->pci_dev,
 							skb->data,
 							np->rx_buf_sz, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c
index 61d313049dd..985a1810ca5 100644
--- a/drivers/net/tulip/xircom_cb.c
+++ b/drivers/net/tulip/xircom_cb.c
@@ -411,9 +411,9 @@ static int xircom_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			   sometimes sends more than you ask it to. */
 
 			memset(&card->tx_buffer[bufferoffsets[desc]/4],0,1536);
-			memcpy(&(card->tx_buffer[bufferoffsets[desc]/4]),skb->data,skb->len);
-
-
+			skb_copy_from_linear_data(skb,
+				  &(card->tx_buffer[bufferoffsets[desc] / 4]),
+						  skb->len);
 			/* FIXME: The specification tells us that the length we send HAS to be a multiple of
 			   4 bytes. */
 
@@ -1207,7 +1207,6 @@ static void investigate_read_descriptor(struct net_device *dev,struct xircom_pri
 				card->stats.rx_dropped++;
 				goto out;
 			}
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 			eth_copy_and_sum(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len, 0);
 			skb_put(skb, pkt_len);
diff --git a/drivers/net/tulip/xircom_tulip_cb.c b/drivers/net/tulip/xircom_tulip_cb.c
index a998c5d0ae9..f6417292737 100644
--- a/drivers/net/tulip/xircom_tulip_cb.c
+++ b/drivers/net/tulip/xircom_tulip_cb.c
@@ -65,7 +65,7 @@ static int rx_copybreak = 100;
 static int csr0 = 0x01A00000 | 0xE000;
 #elif defined(__powerpc__)
 static int csr0 = 0x01B00000 | 0x8000;
-#elif defined(__sparc__)
+#elif defined(CONFIG_SPARC)
 static int csr0 = 0x01B00080 | 0x8000;
 #elif defined(__i386__)
 static int csr0 = 0x01A00000 | 0x8000;
@@ -915,7 +915,9 @@ xircom_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tp->tx_skbuff[entry] = skb;
 	if (tp->chip_id == X3201_3) {
-		memcpy(tp->tx_aligned_skbuff[entry]->data,skb->data,skb->len);
+		skb_copy_from_linear_data(skb,
+					  tp->tx_aligned_skbuff[entry]->data,
+					  skb->len);
 		tp->tx_ring[entry].buffer1 = virt_to_bus(tp->tx_aligned_skbuff[entry]->data);
 	} else
 		tp->tx_ring[entry].buffer1 = virt_to_bus(skb->data);
@@ -1238,7 +1240,6 @@ xircom_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 #if ! defined(__alpha__)
 				eth_copy_and_sum(skb, bus_to_virt(tp->rx_ring[entry].buffer1),
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 5643d1e84ed..a2c6caaaae9 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -18,6 +18,10 @@
 /*
  *  Changes:
  *
+ *  Brian Braunstein <linuxkernel@bristyle.com> 2007/03/23
+ *    Fixed hw address handling.  Now net_device.dev_addr is kept consistent
+ *    with tun.dev_addr when the address is set by this module.
+ *
  *  Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14
  *    Add TUNSETLINK ioctl to set the link encapsulation
  *
@@ -196,7 +200,10 @@ static void tun_net_init(struct net_device *dev)
 		dev->set_multicast_list = tun_net_mclist;
 
 		ether_setup(dev);
-		random_ether_addr(dev->dev_addr);
+
+		/* random address already created for us by tun_set_iff, use it */
+		memcpy(dev->dev_addr, tun->dev_addr, min(sizeof(tun->dev_addr), sizeof(dev->dev_addr)) );
+
 		dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
 		break;
 	}
@@ -254,11 +261,11 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
 		return -EFAULT;
 	}
 
-	skb->dev = tun->dev;
 	switch (tun->flags & TUN_TYPE_MASK) {
 	case TUN_TUN_DEV:
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = pi.proto;
+		skb->dev = tun->dev;
 		break;
 	case TUN_TAP_DEV:
 		skb->protocol = eth_type_trans(skb, tun->dev);
@@ -386,8 +393,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 		 *   - we are multicast promiscous.
 		 *   - we belong to the multicast group.
 		 */
-		memcpy(addr, skb->data,
-		       min_t(size_t, sizeof addr, skb->len));
+		skb_copy_from_linear_data(skb, addr, min_t(size_t, sizeof addr,
+								   skb->len));
 		bit_nr = ether_crc(sizeof addr, addr) >> 26;
 		if ((tun->if_flags & IFF_PROMISC) ||
 				memcmp(addr, tun->dev_addr, sizeof addr) == 0 ||
@@ -636,6 +643,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		return 0;
 
 	case SIOCGIFHWADDR:
+		/* Note: the actual net device's address may be different */
 		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev_addr,
 				min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
 		if (copy_to_user( argp, &ifr, sizeof ifr))
@@ -643,16 +651,24 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		return 0;
 
 	case SIOCSIFHWADDR:
-		/** Set the character device's hardware address. This is used when
-		 * filtering packets being sent from the network device to the character
-		 * device. */
-		memcpy(tun->dev_addr, ifr.ifr_hwaddr.sa_data,
-				min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
-		DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x\n",
-				tun->dev->name,
-				tun->dev_addr[0], tun->dev_addr[1], tun->dev_addr[2],
-				tun->dev_addr[3], tun->dev_addr[4], tun->dev_addr[5]);
-		return 0;
+	{
+		/* try to set the actual net device's hw address */
+		int ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
+
+		if (ret == 0) {
+			/** Set the character device's hardware address. This is used when
+			 * filtering packets being sent from the network device to the character
+			 * device. */
+			memcpy(tun->dev_addr, ifr.ifr_hwaddr.sa_data,
+					min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
+			DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x\n",
+					tun->dev->name,
+					tun->dev_addr[0], tun->dev_addr[1], tun->dev_addr[2],
+					tun->dev_addr[3], tun->dev_addr[4], tun->dev_addr[5]);
+		}
+
+		return  ret;
+	}
 
 	case SIOCADDMULTI:
 		/** Add the specified group to the character device's multicast filter
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 0d91d094edd..f2dd7763cd0 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -1708,7 +1708,6 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile u32 * ready,
 
 		if(pkt_len < rx_copybreak &&
 		   (new_skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-			new_skb->dev = tp->dev;
 			skb_reserve(new_skb, 2);
 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr,
 						    PKT_BUF_SZ,
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index f3a972e74e9..adea290a9d5 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -1486,7 +1486,6 @@ static int rhine_rx(struct net_device *dev, int limit)
 			   copying to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak &&
 				(skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(rp->pdev,
 							    rp->rx_skbuff_dma[entry],
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 8e5d82051bd..25b75b61518 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1339,7 +1339,8 @@ static inline int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size,
 			if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN)
 				skb_reserve(new_skb, 2);
 
-			memcpy(new_skb->data, rx_skb[0]->data, pkt_size);
+			skb_copy_from_linear_data(rx_skb[0], new_skb->data,
+						  pkt_size);
 			*rx_skb = new_skb;
 			ret = 0;
 		}
@@ -1398,7 +1399,6 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
 		vptr->stats.multicast++;
 
 	skb = rd_info->skb;
-	skb->dev = vptr->dev;
 
 	pci_dma_sync_single_for_cpu(vptr->pdev, rd_info->skb_dma,
 				    vptr->rx_buf_sz, PCI_DMA_FROMDEVICE);
@@ -1428,7 +1428,7 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
 		   PCI_DMA_FROMDEVICE);
 
 	skb_put(skb, pkt_len - 4);
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->protocol = eth_type_trans(skb, vptr->dev);
 
 	stats->rx_bytes += pkt_len;
 	netif_rx(skb);
@@ -1928,7 +1928,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (pktlen < ETH_ZLEN) {
 		/* Cannot occur until ZC support */
 		pktlen = ETH_ZLEN;
-		memcpy(tdinfo->buf, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, tdinfo->buf, skb->len);
 		memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len);
 		tdinfo->skb = skb;
 		tdinfo->skb_dma[0] = tdinfo->buf_dma;
@@ -1944,7 +1944,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 		int nfrags = skb_shinfo(skb)->nr_frags;
 		tdinfo->skb = skb;
 		if (nfrags > 6) {
-			memcpy(tdinfo->buf, skb->data, skb->len);
+			skb_copy_from_linear_data(skb, tdinfo->buf, skb->len);
 			tdinfo->skb_dma[0] = tdinfo->buf_dma;
 			td_ptr->tdesc0.pktsize =
 			td_ptr->td_buf[0].pa_low = cpu_to_le32(tdinfo->skb_dma[0]);
@@ -2007,7 +2007,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM)
 				 && (skb->ip_summed == CHECKSUM_PARTIAL)) {
-		struct iphdr *ip = skb->nh.iph;
+		const struct iphdr *ip = ip_hdr(skb);
 		if (ip->protocol == IPPROTO_TCP)
 			td_ptr->tdesc1.TCR |= TCR0_TCPCK;
 		else if (ip->protocol == IPPROTO_UDP)
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 5b82e4fd0d7..23464735fa8 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -773,7 +773,7 @@ static int sppp_rx_done(struct channel_data *chan)
 	}
 	chan->rx_skb->protocol = htons(ETH_P_WAN_PPP);
 	chan->rx_skb->dev = chan->pppdev.dev;
-	chan->rx_skb->mac.raw = chan->rx_skb->data;
+	skb_reset_mac_header(chan->rx_skb);
 	chan->stats.rx_packets++;
 	chan->stats.rx_bytes += chan->cosa->rxsize;
 	netif_rx(chan->rx_skb);
diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c
index a631d1c2fa1..016b3ff3ea5 100644
--- a/drivers/net/wan/cycx_x25.c
+++ b/drivers/net/wan/cycx_x25.c
@@ -834,7 +834,7 @@ static void cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd)
 	++chan->ifstats.rx_packets;
 	chan->ifstats.rx_bytes += pktlen;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	netif_rx(skb);
 	dev->last_rx = jiffies;		/* timestamp */
 }
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 73698755943..66be20c292b 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -176,7 +176,7 @@ static void dlci_receive(struct sk_buff *skb, struct net_device *dev)
 	if (process)
 	{
 		/* we've set up the protocol, so discard the header */
-		skb->mac.raw = skb->data; 
+		skb_reset_mac_header(skb);
 		skb_pull(skb, header);
 		dlp->stats.rx_bytes += skb->len;
 		netif_rx(skb);
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 25021a7992a..dca02447145 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -1904,7 +1904,8 @@ static struct sk_buff *dscc4_init_dummy_skb(struct dscc4_dev_priv *dpriv)
 		struct TxFD *tx_fd = dpriv->tx_fd + last;
 
 		skb->len = DUMMY_SKB_SIZE;
-		memcpy(skb->data, version, strlen(version)%DUMMY_SKB_SIZE);
+		skb_copy_to_linear_data(skb, version,
+					strlen(version) % DUMMY_SKB_SIZE);
 		tx_fd->state = FrameEnd | TO_STATE_TX(DUMMY_SKB_SIZE);
 		tx_fd->data = pci_map_single(dpriv->pci_priv->pdev, skb->data,
 					     DUMMY_SKB_SIZE, PCI_DMA_TODEVICE);
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index c45d6a83339..58a53b6d9b4 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -864,7 +864,7 @@ fst_tx_dma_complete(struct fst_card_info *card, struct fst_port_info *port,
 static __be16 farsync_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST;
 	return htons(ETH_P_CUST);
 }
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index c9664fd8a91..00e0aaadabc 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -124,7 +124,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 	skb_put(skb, sizeof(struct cisco_packet));
 	skb->priority = TC_PRIO_CONTROL;
 	skb->dev = dev;
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	dev_queue_xmit(skb);
 }
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index c6c3c757d6f..aeb2789adf2 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -533,7 +533,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 		skb->protocol = __constant_htons(NLPID_CCITT_ANSI_LMI);
 		fr_hard_header(&skb, LMI_CCITT_ANSI_DLCI);
 	}
-	data = skb->tail;
+	data = skb_tail_pointer(skb);
 	data[i++] = LMI_CALLREF;
 	data[i++] = dce ? LMI_STATUS : LMI_STATUS_ENQUIRY;
 	if (lmi == LMI_ANSI)
@@ -590,7 +590,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 	skb_put(skb, i);
 	skb->priority = TC_PRIO_CONTROL;
 	skb->dev = dev;
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	dev_queue_xmit(skb);
 }
@@ -1011,7 +1011,6 @@ static int fr_rx(struct sk_buff *skb)
 		stats->rx_bytes += skb->len;
 		if (pvc->state.becn)
 			stats->rx_compressed++;
-		skb->dev = dev;
 		netif_rx(skb);
 		return NET_RX_SUCCESS;
 	} else {
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index a02c5fb4056..9ba3e4ee6ec 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -59,7 +59,7 @@ static void hostess_input(struct z8530_channel *c, struct sk_buff *skb)
 	/* Drop the CRC - it's not a good idea to try and negotiate it ;) */
 	skb_trim(skb, skb->len-2);
 	skb->protocol=__constant_htons(ETH_P_WAN_PPP);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->dev=c->netdevice;
 	/*
 	 *	Send it to the PPP layer. We don't have time to process
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 2b54f1bc3a0..ae132c1c545 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1636,7 +1636,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             if (nsb) {
                 sc->lmc_rxq[i] = nsb;
                 nsb->dev = dev;
-                sc->lmc_rxring[i].buffer1 = virt_to_bus (nsb->tail);
+                sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb));
             }
             sc->failed_recv_alloc = 1;
             goto skip_packet;
@@ -1667,8 +1667,8 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             skb_put (skb, len);
             skb->protocol = lmc_proto_type(sc, skb);
             skb->protocol = htons(ETH_P_WAN_PPP);
-            skb->mac.raw = skb->data;
-//            skb->nh.raw = skb->data;
+            skb_reset_mac_header(skb);
+            /* skb_reset_network_header(skb); */
             skb->dev = dev;
             lmc_proto_netif(sc, skb);
 
@@ -1679,7 +1679,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             if (nsb) {
                 sc->lmc_rxq[i] = nsb;
                 nsb->dev = dev;
-                sc->lmc_rxring[i].buffer1 = virt_to_bus (nsb->tail);
+                sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb));
                 /* Transferred to 21140 below */
             }
             else {
@@ -1702,11 +1702,11 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             if(!nsb) {
                 goto give_it_anyways;
             }
-            memcpy(skb_put(nsb, len), skb->data, len);
+            skb_copy_from_linear_data(skb, skb_put(nsb, len), len);
             
             nsb->protocol = lmc_proto_type(sc, skb);
-            nsb->mac.raw = nsb->data;
-//            nsb->nh.raw = nsb->data;
+            skb_reset_mac_header(nsb);
+            /* skb_reset_network_header(nsb); */
             nsb->dev = dev;
             lmc_proto_netif(sc, nsb);
         }
@@ -1932,7 +1932,7 @@ static void lmc_softreset (lmc_softc_t * const sc) /*fold00*/
         sc->lmc_rxring[i].status = 0x80000000;
 
         /* used to be PKT_BUF_SZ now uses skb since we lose some to head room */
-        sc->lmc_rxring[i].length = skb->end - skb->data;
+        sc->lmc_rxring[i].length = skb_tailroom(skb);
 
         /* use to be tail which is dumb since you're thinking why write
          * to the end of the packj,et but since there's nothing there tail == data
diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c
index 62184dee377..999bf71937c 100644
--- a/drivers/net/wan/pc300_drv.c
+++ b/drivers/net/wan/pc300_drv.c
@@ -1755,17 +1755,17 @@ cpc_trace(struct net_device *dev, struct sk_buff *skb_main, char rx_tx)
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_CUST);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST;
 	skb->len = 10 + skb_main->len;
 
-	memcpy(skb->data, dev->name, 5);
+	skb_copy_to_linear_data(skb, dev->name, 5);
 	skb->data[5] = '[';
 	skb->data[6] = rx_tx;
 	skb->data[7] = ']';
 	skb->data[8] = ':';
 	skb->data[9] = ' ';
-	memcpy(&skb->data[10], skb_main->data, skb_main->len);
+	skb_copy_from_linear_data(skb_main, &skb->data[10], skb_main->len);
 
 	netif_rx(skb);
 }
diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c
index 5873c346e7e..07dbdfbfc15 100644
--- a/drivers/net/wan/pc300_tty.c
+++ b/drivers/net/wan/pc300_tty.c
@@ -1003,17 +1003,17 @@ static void cpc_tty_trace(pc300dev_t *dev, char* buf, int len, char rxtx)
 	skb_put (skb, 10 + len); 
 	skb->dev = dev->dev; 
 	skb->protocol = htons(ETH_P_CUST); 
-	skb->mac.raw = skb->data; 
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST; 
 	skb->len = 10 + len; 
 
-	memcpy(skb->data,dev->dev->name,5);
+	skb_copy_to_linear_data(skb, dev->dev->name, 5);
 	skb->data[5] = '['; 
 	skb->data[6] = rxtx; 
 	skb->data[7] = ']'; 
 	skb->data[8] = ':'; 
 	skb->data[9] = ' '; 
-	memcpy(&skb->data[10], buf, len); 
+	skb_copy_to_linear_data_offset(skb, 10, buf, len);
 	netif_rx(skb); 
 } 	
 
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index fc5c0c611ff..35eded7ffb2 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -999,11 +999,6 @@ get_rx_buf( struct net_device  *dev )
 	if( !skb )
 		return  NULL;
 
-#ifdef CONFIG_SBNI_MULTILINE
-	skb->dev = ((struct net_local *) dev->priv)->master;
-#else
-	skb->dev = dev;
-#endif
 	skb_reserve( skb, 2 );		/* Align IP on longword boundaries */
 	return  skb;
 }
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 70fb1b98b1d..131358108c5 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -61,7 +61,7 @@ static void sealevel_input(struct z8530_channel *c, struct sk_buff *skb)
 	/* Drop the CRC - it's not a good idea to try and negotiate it ;) */
 	skb_trim(skb, skb->len-2);
 	skb->protocol=htons(ETH_P_WAN_PPP);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->dev=c->netdevice;
 	/*
 	 *	Send it to the PPP layer. We don't have time to process
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c
index 218f7b574ab..67fc67cfd45 100644
--- a/drivers/net/wan/syncppp.c
+++ b/drivers/net/wan/syncppp.c
@@ -227,7 +227,7 @@ static void sppp_input (struct net_device *dev, struct sk_buff *skb)
 	unsigned long flags;
 
 	skb->dev=dev;
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 
 	if (dev->flags & IFF_RUNNING)
 	{
diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 8b4540bfc1b..98ef400908b 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -1656,7 +1656,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 		else
 		{
 			skb_put(skb, ct);
-			memcpy(skb->data, rxb, ct);
+			skb_copy_to_linear_data(skb, rxb, ct);
 			c->stats.rx_packets++;
 			c->stats.rx_bytes+=ct;
 		}
@@ -1782,7 +1782,7 @@ int z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 		 */
 		c->tx_next_ptr=c->tx_dma_buf[c->tx_dma_used];
 		c->tx_dma_used^=1;	/* Flip temp buffer */
-		memcpy(c->tx_next_ptr, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, c->tx_next_ptr, skb->len);
 	}
 	else
 		c->tx_next_ptr=skb->data;	
diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index ece3d9c2dc6..4426841b2be 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -2,47 +2,21 @@
 # Wireless LAN device configuration
 #
 
-menu "Wireless LAN (non-hamradio)"
-	depends on NETDEVICES
-
-config NET_RADIO
-	bool "Wireless LAN drivers (non-hamradio) & Wireless Extensions"
-	select WIRELESS_EXT
-	---help---
-	  Support for wireless LANs and everything having to do with radio,
-	  but not with amateur radio or FM broadcasting.
-
-	  Saying Y here also enables the Wireless Extensions (creates
-	  /proc/net/wireless and enables iwconfig access). The Wireless
-	  Extension is a generic API allowing a driver to expose to the user
-	  space configuration and statistics specific to common Wireless LANs.
-	  The beauty of it is that a single set of tool can support all the
-	  variations of Wireless LANs, regardless of their type (as long as
-	  the driver supports Wireless Extension). Another advantage is that
-	  these parameters may be changed on the fly without restarting the
-	  driver (or Linux). If you wish to use Wireless Extensions with
-	  wireless PCMCIA (PC-) cards, you need to say Y here; you can fetch
-	  the tools from
-	  <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
+menu "Wireless LAN"
 
-config NET_WIRELESS_RTNETLINK
-	bool "Wireless Extension API over RtNetlink"
-	depends on NET_RADIO
+config WLAN_PRE80211
+	bool "Wireless LAN (pre-802.11)"
+	depends on NETDEVICES
 	---help---
-	  Support the Wireless Extension API over the RtNetlink socket
-	  in addition to the traditional ioctl interface (selected above).
+	  Say Y if you have any pre-802.11 wireless LAN hardware.
 
-	  For now, few tools use this facility, but it might grow in the
-	  future. The only downside is that it adds 4.5 kB to your kernel.
-
-# Note : the cards are obsolete (can't buy them anymore), but the drivers
-# are not, as people are still using them...
-comment "Obsolete Wireless cards support (pre-802.11)"
-	depends on NET_RADIO && (INET || ISA || PCMCIA)
+	  This option does not affect the kernel build, it only
+	  lets you choose drivers.
 
 config STRIP
 	tristate "STRIP (Metricom starmode radio IP)"
-	depends on NET_RADIO && INET
+	depends on INET && WLAN_PRE80211
+	select WIRELESS_EXT
 	---help---
 	  Say Y if you have a Metricom radio and intend to use Starmode Radio
 	  IP. STRIP is a radio protocol developed for the MosquitoNet project
@@ -65,7 +39,8 @@ config STRIP
 
 config ARLAN
 	tristate "Aironet Arlan 655 & IC2200 DS support"
-	depends on NET_RADIO && ISA && !64BIT
+	depends on ISA && !64BIT && WLAN_PRE80211
+	select WIRELESS_EXT
 	---help---
 	  Aironet makes Arlan, a class of wireless LAN adapters. These use the
 	  www.Telxon.com chip, which is also used on several similar cards.
@@ -80,7 +55,8 @@ config ARLAN
 
 config WAVELAN
 	tristate "AT&T/Lucent old WaveLAN & DEC RoamAbout DS ISA support"
-	depends on NET_RADIO && ISA
+	depends on ISA && WLAN_PRE80211
+	select WIRELESS_EXT
 	---help---
 	  The Lucent WaveLAN (formerly NCR and AT&T; or DEC RoamAbout DS) is
 	  a Radio LAN (wireless Ethernet-like Local Area Network) using the
@@ -107,7 +83,8 @@ config WAVELAN
 
 config PCMCIA_WAVELAN
 	tristate "AT&T/Lucent old WaveLAN Pcmcia wireless support"
-	depends on NET_RADIO && PCMCIA
+	depends on PCMCIA && WLAN_PRE80211
+	select WIRELESS_EXT
 	help
 	  Say Y here if you intend to attach an AT&T/Lucent Wavelan PCMCIA
 	  (PC-card) wireless Ethernet networking card to your computer.  This
@@ -118,7 +95,8 @@ config PCMCIA_WAVELAN
 
 config PCMCIA_NETWAVE
 	tristate "Xircom Netwave AirSurfer Pcmcia wireless support"
-	depends on NET_RADIO && PCMCIA
+	depends on PCMCIA && WLAN_PRE80211
+	select WIRELESS_EXT
 	help
 	  Say Y here if you intend to attach this type of PCMCIA (PC-card)
 	  wireless Ethernet networking card to your computer.
@@ -126,12 +104,20 @@ config PCMCIA_NETWAVE
 	  To compile this driver as a module, choose M here: the module will be
 	  called netwave_cs.  If unsure, say N.
 
-comment "Wireless 802.11 Frequency Hopping cards support"
-	depends on NET_RADIO && PCMCIA
+
+config WLAN_80211
+	bool "Wireless LAN (IEEE 802.11)"
+	depends on NETDEVICES
+	---help---
+	  Say Y if you have any 802.11 wireless LAN hardware.
+
+	  This option does not affect the kernel build, it only
+	  lets you choose drivers.
 
 config PCMCIA_RAYCS
 	tristate "Aviator/Raytheon 2.4MHz wireless support"
-	depends on NET_RADIO && PCMCIA
+	depends on PCMCIA && WLAN_80211
+	select WIRELESS_EXT
 	---help---
 	  Say Y here if you intend to attach an Aviator/Raytheon PCMCIA
 	  (PC-card) wireless Ethernet networking card to your computer.
@@ -141,12 +127,10 @@ config PCMCIA_RAYCS
 	  To compile this driver as a module, choose M here: the module will be
 	  called ray_cs.  If unsure, say N.
 
-comment "Wireless 802.11b ISA/PCI cards support"
-	depends on NET_RADIO && (ISA || PCI || PPC_PMAC || PCMCIA)
-
 config IPW2100
 	tristate "Intel PRO/Wireless 2100 Network Connection"
-	depends on NET_RADIO && PCI
+	depends on PCI && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	select IEEE80211
 	---help---
@@ -200,7 +184,8 @@ config IPW2100_DEBUG
 
 config IPW2200
 	tristate "Intel PRO/Wireless 2200BG and 2915ABG Network Connection"
-	depends on NET_RADIO && PCI
+	depends on PCI && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	select IEEE80211
 	---help---
@@ -282,7 +267,8 @@ config IPW2200_DEBUG
 
 config AIRO
 	tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards"
- 	depends on NET_RADIO && ISA_DMA_API && (PCI || BROKEN)
+	depends on ISA_DMA_API && WLAN_80211 && (PCI || BROKEN)
+	select WIRELESS_EXT
 	select CRYPTO
 	---help---
 	  This is the standard Linux driver to support Cisco/Aironet ISA and
@@ -299,7 +285,8 @@ config AIRO
 
 config HERMES
 	tristate "Hermes chipset 802.11b support (Orinoco/Prism2/Symbol)"
-	depends on NET_RADIO && (PPC_PMAC || PCI || PCMCIA)
+	depends on (PPC_PMAC || PCI || PCMCIA) && WLAN_80211
+	select WIRELESS_EXT
 	---help---
 	  A driver for 802.11b wireless cards based on the "Hermes" or
 	  Intersil HFA384x (Prism 2) MAC controller.  This includes the vast
@@ -373,7 +360,8 @@ config PCI_HERMES
 
 config ATMEL
       tristate "Atmel at76c50x chipset  802.11b support"
-      depends on NET_RADIO && (PCI || PCMCIA)
+      depends on (PCI || PCMCIA) && WLAN_80211
+      select WIRELESS_EXT
       select FW_LOADER
       select CRC32
        ---help---
@@ -394,13 +382,9 @@ config PCI_ATMEL
         Enable support for PCI and mini-PCI cards containing the
         Atmel at76c506 chip.
 
-# If Pcmcia is compiled in, offer Pcmcia cards...
-comment "Wireless 802.11b Pcmcia/Cardbus cards support"
-	depends on NET_RADIO && PCMCIA
-
 config PCMCIA_HERMES
 	tristate "Hermes PCMCIA card support"
-	depends on NET_RADIO && PCMCIA && HERMES
+	depends on PCMCIA && HERMES
 	---help---
 	  A driver for "Hermes" chipset based PCMCIA wireless adaptors, such
 	  as the Lucent WavelanIEEE/Orinoco cards and their OEM (Cabletron/
@@ -420,7 +404,7 @@ config PCMCIA_HERMES
 
 config PCMCIA_SPECTRUM
 	tristate "Symbol Spectrum24 Trilogy PCMCIA card support"
-	depends on NET_RADIO && PCMCIA && HERMES
+	depends on PCMCIA && HERMES
 	select FW_LOADER
 	---help---
 
@@ -434,7 +418,8 @@ config PCMCIA_SPECTRUM
 
 config AIRO_CS
 	tristate "Cisco/Aironet 34X/35X/4500/4800 PCMCIA cards"
-	depends on NET_RADIO && PCMCIA && (BROKEN || !M32R)
+	depends on PCMCIA && (BROKEN || !M32R) && WLAN_80211
+	select WIRELESS_EXT
 	select CRYPTO
 	select CRYPTO_AES
 	---help---
@@ -458,7 +443,8 @@ config AIRO_CS
 
 config PCMCIA_ATMEL
 	tristate "Atmel at76c502/at76c504 PCMCIA cards"
-	depends on NET_RADIO && ATMEL && PCMCIA
+	depends on ATMEL && PCMCIA
+	select WIRELESS_EXT
 	select FW_LOADER
 	select CRC32
 	---help---
@@ -467,17 +453,17 @@ config PCMCIA_ATMEL
 
 config PCMCIA_WL3501
       tristate "Planet WL3501 PCMCIA cards"
-      depends on NET_RADIO && EXPERIMENTAL && PCMCIA
+      depends on EXPERIMENTAL && PCMCIA && WLAN_80211
+      select WIRELESS_EXT
        ---help---
          A driver for WL3501 PCMCIA 802.11 wireless cards made by Planet.
 	 It has basic support for Linux wireless extensions and initial
 	 micro support for ethtool.
 
-comment "Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support"
-	depends on NET_RADIO && PCI
 config PRISM54
 	tristate 'Intersil Prism GT/Duette/Indigo PCI/Cardbus' 
-	depends on PCI && NET_RADIO && EXPERIMENTAL
+	depends on PCI && EXPERIMENTAL && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	---help---
 	  Enable PCI and Cardbus support for the following chipset based cards:
@@ -523,7 +509,8 @@ config PRISM54
 
 config USB_ZD1201
 	tristate "USB ZD1201 based Wireless device support"
-	depends on USB && NET_RADIO
+	depends on USB && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	---help---
 	  Say Y if you want to use wireless LAN adapters based on the ZyDAS
@@ -542,11 +529,4 @@ source "drivers/net/wireless/hostap/Kconfig"
 source "drivers/net/wireless/bcm43xx/Kconfig"
 source "drivers/net/wireless/zd1211rw/Kconfig"
 
-# yes, this works even when no drivers are selected
-config NET_WIRELESS
-	bool
-	depends on NET_RADIO && (ISA || PCI || PPC_PMAC || PCMCIA)
-	default y
-
 endmenu
-
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index 2ada76a93cb..7fe0a61091a 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -2444,7 +2444,7 @@ static int add_airo_dev( struct net_device *dev );
 
 static int wll_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	memcpy(haddr, skb->mac.raw + 10, ETH_ALEN);
+	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN);
 	return ETH_ALEN;
 }
 
@@ -3411,14 +3411,12 @@ badrx:
 			OUT4500( apriv, EVACK, EV_RX);
 
 			if (test_bit(FLAG_802_11, &apriv->flags)) {
-				skb->mac.raw = skb->data;
+				skb_reset_mac_header(skb);
 				skb->pkt_type = PACKET_OTHERHOST;
 				skb->dev = apriv->wifidev;
 				skb->protocol = htons(ETH_P_802_2);
-			} else {
-				skb->dev = dev;
+			} else
 				skb->protocol = eth_type_trans(skb,dev);
-			}
 			skb->dev->last_rx = jiffies;
 			skb->ip_summed = CHECKSUM_NONE;
 
@@ -3641,7 +3639,6 @@ badmic:
 		}
 #endif /* WIRELESS_SPY */
 
-		skb->dev = ai->dev;
 		skb->ip_summed = CHECKSUM_NONE;
 		skb->protocol = eth_type_trans(skb, ai->dev);
 		skb->dev->last_rx = jiffies;
@@ -3749,7 +3746,7 @@ void mpi_receive_802_11 (struct airo_info *ai)
 		wireless_spy_update(ai->dev, sa, &wstats);
 	}
 #endif /* IW_WIRELESS_SPY */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_OTHERHOST;
 	skb->dev = ai->wifidev;
 	skb->protocol = htons(ETH_P_802_2);
diff --git a/drivers/net/wireless/arlan-main.c b/drivers/net/wireless/arlan-main.c
index 4688e56b69c..498e8486d12 100644
--- a/drivers/net/wireless/arlan-main.c
+++ b/drivers/net/wireless/arlan-main.c
@@ -1500,7 +1500,6 @@ static void arlan_rx_interrupt(struct net_device *dev, u_char rxStatus, u_short
 				break;
 			}
 			skb_reserve(skb, 2);
-			skb->dev = dev;
 			skbtmp = skb_put(skb, pkt_len);
 
 			memcpy_fromio(skbtmp + ARLAN_FAKE_HDR_LEN, ((char __iomem *) arlan) + rxOffset, pkt_len - ARLAN_FAKE_HDR_LEN);
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index 23eba698aec..51a7db53afa 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -827,14 +827,14 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev)
 	if (priv->wep_is_on)
 		frame_ctl |= IEEE80211_FCTL_PROTECTED;
 	if (priv->operating_mode == IW_MODE_ADHOC) {
-		memcpy(&header.addr1, skb->data, 6);
+		skb_copy_from_linear_data(skb, &header.addr1, 6);
 		memcpy(&header.addr2, dev->dev_addr, 6);
 		memcpy(&header.addr3, priv->BSSID, 6);
 	} else {
 		frame_ctl |= IEEE80211_FCTL_TODS;
 		memcpy(&header.addr1, priv->CurrentBSSID, 6);
 		memcpy(&header.addr2, dev->dev_addr, 6);
-		memcpy(&header.addr3, skb->data, 6);
+		skb_copy_from_linear_data(skb, &header.addr3, 6);
 	}
 
 	if (priv->use_wpa)
@@ -920,7 +920,6 @@ static void fast_rx_path(struct atmel_private *priv,
 		memcpy(&skbp[6], header->addr2, 6); /* source address */
 
 	priv->dev->last_rx = jiffies;
-	skb->dev = priv->dev;
 	skb->protocol = eth_type_trans(skb, priv->dev);
 	skb->ip_summed = CHECKSUM_NONE;
 	netif_rx(skb);
@@ -1028,7 +1027,6 @@ static void frag_rx_path(struct atmel_private *priv,
 				       priv->rx_buf,
 				       priv->frag_len + 12);
 				priv->dev->last_rx = jiffies;
-				skb->dev = priv->dev;
 				skb->protocol = eth_type_trans(skb, priv->dev);
 				skb->ip_summed = CHECKSUM_NONE;
 				netif_rx(skb);
diff --git a/drivers/net/wireless/bcm43xx/Kconfig b/drivers/net/wireless/bcm43xx/Kconfig
index 533993f538f..ce397e4284f 100644
--- a/drivers/net/wireless/bcm43xx/Kconfig
+++ b/drivers/net/wireless/bcm43xx/Kconfig
@@ -1,6 +1,7 @@
 config BCM43XX
 	tristate "Broadcom BCM43xx wireless support"
-	depends on PCI && IEEE80211 && IEEE80211_SOFTMAC && NET_RADIO && EXPERIMENTAL
+	depends on PCI && IEEE80211 && IEEE80211_SOFTMAC && WLAN_80211 && EXPERIMENTAL
+	select WIRELESS_EXT
 	select FW_LOADER
 	select HW_RANDOM
 	---help---
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_dma.c b/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
index 6e0dc76400e..e3d2e61a31e 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
@@ -998,7 +998,8 @@ static void dma_tx_fragment(struct bcm43xx_dmaring *ring,
 			assert(0);
 			return;
 		}
-		memcpy(skb_put(bounce_skb, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(bounce_skb, skb->len),
+					  skb->len);
 		dev_kfree_skb_any(skb);
 		skb = bounce_skb;
 	}
diff --git a/drivers/net/wireless/hostap/Kconfig b/drivers/net/wireless/hostap/Kconfig
index 308f773ad56..1fef33169fd 100644
--- a/drivers/net/wireless/hostap/Kconfig
+++ b/drivers/net/wireless/hostap/Kconfig
@@ -1,6 +1,7 @@
 config HOSTAP
 	tristate "IEEE 802.11 for Host AP (Prism2/2.5/3 and WEP/TKIP/CCMP)"
-	depends on NET_RADIO
+	depends on WLAN_80211
+	select WIRELESS_EXT
 	select IEEE80211
 	select IEEE80211_CRYPT_WEP
 	---help---
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 7e04dc94b3b..cbedc9ee740 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -167,7 +167,7 @@ hdr->f.status = s; hdr->f.len = l; hdr->f.data = d
 
 	ret = skb->len - phdrlen;
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, hdrlen);
 	if (prism_header)
 		skb_pull(skb, phdrlen);
@@ -933,12 +933,14 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		if (frag == 0) {
 			/* copy first fragment (including full headers) into
 			 * beginning of the fragment cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data, flen);
+			skb_copy_from_linear_data(skb, skb_put(frag_skb, flen),
+						  flen);
 		} else {
 			/* append frame payload to the end of the fragment
 			 * cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data + hdrlen,
-			       flen);
+			skb_copy_from_linear_data_offset(skb, hdrlen,
+							 skb_put(frag_skb,
+								 flen), flen);
 		}
 		dev_kfree_skb(skb);
 		skb = NULL;
@@ -1044,8 +1046,9 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 	    skb->len >= ETH_HLEN + ETH_ALEN) {
 		/* Non-standard frame: get addr4 from its bogus location after
 		 * the payload */
-		memcpy(skb->data + ETH_ALEN,
-		       skb->data + skb->len - ETH_ALEN, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, skb->len - ETH_ALEN,
+						 skb->data + ETH_ALEN,
+						 ETH_ALEN);
 		skb_trim(skb, skb->len - ETH_ALEN);
 	}
 
@@ -1073,17 +1076,17 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 
 	if (skb2 != NULL) {
 		/* send to wireless media */
-		skb2->protocol = __constant_htons(ETH_P_802_3);
-		skb2->mac.raw = skb2->nh.raw = skb2->data;
-		/* skb2->nh.raw = skb2->data + ETH_HLEN; */
 		skb2->dev = dev;
+		skb2->protocol = __constant_htons(ETH_P_802_3);
+		skb_reset_mac_header(skb2);
+		skb_reset_network_header(skb2);
+		/* skb2->network_header += ETH_HLEN; */
 		dev_queue_xmit(skb2);
 	}
 
 	if (skb) {
 		skb->protocol = eth_type_trans(skb, dev);
 		memset(skb->cb, 0, sizeof(skb->cb));
-		skb->dev = dev;
 		netif_rx(skb);
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_80211_tx.c b/drivers/net/wireless/hostap/hostap_80211_tx.c
index 4a5be70c041..246fac0e800 100644
--- a/drivers/net/wireless/hostap/hostap_80211_tx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_tx.c
@@ -146,7 +146,8 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS;
 			/* From&To DS: Addr1 = RA, Addr2 = TA, Addr3 = DA,
 			 * Addr4 = SA */
-			memcpy(&hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
+			skb_copy_from_linear_data_offset(skb, ETH_ALEN,
+							 &hdr.addr4, ETH_ALEN);
 			hdr_len += ETH_ALEN;
 		} else {
 			/* bogus 4-addr format to workaround Prism2 station
@@ -159,7 +160,8 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			/* SA from skb->data + ETH_ALEN will be added after
 			 * frame payload; use hdr.addr4 as a temporary buffer
 			 */
-			memcpy(&hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
+			skb_copy_from_linear_data_offset(skb, ETH_ALEN,
+							 &hdr.addr4, ETH_ALEN);
 			need_tailroom += ETH_ALEN;
 		}
 
@@ -174,24 +176,27 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		else
 			memcpy(&hdr.addr1, local->bssid, ETH_ALEN);
 		memcpy(&hdr.addr2, dev->dev_addr, ETH_ALEN);
-		memcpy(&hdr.addr3, skb->data, ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr3, ETH_ALEN);
 	} else if (local->iw_mode == IW_MODE_MASTER && !to_assoc_ap) {
 		fc |= IEEE80211_FCTL_FROMDS;
 		/* From DS: Addr1 = DA, Addr2 = BSSID, Addr3 = SA */
-		memcpy(&hdr.addr1, skb->data, ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr1, ETH_ALEN);
 		memcpy(&hdr.addr2, dev->dev_addr, ETH_ALEN);
-		memcpy(&hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr3,
+						 ETH_ALEN);
 	} else if (local->iw_mode == IW_MODE_INFRA || to_assoc_ap) {
 		fc |= IEEE80211_FCTL_TODS;
 		/* To DS: Addr1 = BSSID, Addr2 = SA, Addr3 = DA */
 		memcpy(&hdr.addr1, to_assoc_ap ?
 		       local->assoc_ap_addr : local->bssid, ETH_ALEN);
-		memcpy(&hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
-		memcpy(&hdr.addr3, skb->data, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr2,
+						 ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr3, ETH_ALEN);
 	} else if (local->iw_mode == IW_MODE_ADHOC) {
 		/* not From/To DS: Addr1 = DA, Addr2 = SA, Addr3 = BSSID */
-		memcpy(&hdr.addr1, skb->data, ETH_ALEN);
-		memcpy(&hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr1, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr2,
+						 ETH_ALEN);
 		memcpy(&hdr.addr3, local->bssid, ETH_ALEN);
 	}
 
@@ -237,7 +242,7 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	iface->stats.tx_packets++;
 	iface->stats.tx_bytes += skb->len;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	meta = (struct hostap_skb_tx_data *) skb->cb;
 	memset(meta, 0, sizeof(*meta));
 	meta->magic = HOSTAP_SKB_TX_DATA_MAGIC;
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index efb8cf3bd8a..4ca8a27b8c5 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -982,7 +982,8 @@ static void prism2_send_mgmt(struct net_device *dev,
 	meta->tx_cb_idx = tx_cb_idx;
 
 	skb->dev = dev;
-	skb->mac.raw = skb->nh.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
 	dev_queue_xmit(skb);
 }
 #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
@@ -1276,8 +1277,8 @@ static char * ap_auth_make_challenge(struct ap_data *ap)
 		return NULL;
 	}
 
-	memcpy(tmpbuf, skb->data + ap->crypt->extra_mpdu_prefix_len,
-	       WLAN_AUTH_CHALLENGE_LEN);
+	skb_copy_from_linear_data_offset(skb, ap->crypt->extra_mpdu_prefix_len,
+					 tmpbuf, WLAN_AUTH_CHALLENGE_LEN);
 	dev_kfree_skb(skb);
 
 	return tmpbuf;
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 3079378fb8c..fb01fb95a9f 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -1838,13 +1838,14 @@ static int prism2_tx_80211(struct sk_buff *skb, struct net_device *dev)
 
 	/* skb->data starts with txdesc->frame_control */
 	hdr_len = 24;
-	memcpy(&txdesc.frame_control, skb->data, hdr_len);
+	skb_copy_from_linear_data(skb, &txdesc.frame_control, hdr_len);
  	fc = le16_to_cpu(txdesc.frame_control);
 	if (WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA &&
 	    (fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS) &&
 	    skb->len >= 30) {
 		/* Addr4 */
-		memcpy(txdesc.addr4, skb->data + hdr_len, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, hdr_len, txdesc.addr4,
+						 ETH_ALEN);
 		hdr_len += ETH_ALEN;
 	}
 
@@ -2217,7 +2218,7 @@ static void hostap_tx_callback(local_info_t *local,
 		memcpy(skb_put(skb, len), payload, len);
 
 	skb->dev = local->dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 
 	cb->func(skb, ok, cb->data);
 }
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 9077e6edde3..1f9edd91565 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -590,20 +590,20 @@ void hostap_dump_tx_header(const char *name, const struct hfa384x_tx_frame *tx)
 
 int hostap_80211_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	memcpy(haddr, skb->mac.raw + 10, ETH_ALEN); /* addr2 */
+	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */
 	return ETH_ALEN;
 }
 
 
 int hostap_80211_prism_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	if (*(u32 *)skb->mac.raw == LWNG_CAP_DID_BASE) {
-		memcpy(haddr, skb->mac.raw +
-		       sizeof(struct linux_wlan_ng_prism_hdr) + 10,
+	const unsigned char *mac = skb_mac_header(skb);
+
+	if (*(u32 *)mac == LWNG_CAP_DID_BASE) {
+		memcpy(haddr, mac + sizeof(struct linux_wlan_ng_prism_hdr) + 10,
 		       ETH_ALEN); /* addr2 */
-	} else { /* (*(u32 *)skb->mac.raw == htonl(LWNG_CAPHDR_VERSION)) */
-		memcpy(haddr, skb->mac.raw +
-		       sizeof(struct linux_wlan_ng_cap_hdr) + 10,
+	} else { /* (*(u32 *)mac == htonl(LWNG_CAPHDR_VERSION)) */
+		memcpy(haddr, mac + sizeof(struct linux_wlan_ng_cap_hdr) + 10,
 		       ETH_ALEN); /* addr2 */
 	}
 	return ETH_ALEN;
@@ -1063,7 +1063,8 @@ int prism2_sta_send_mgmt(local_info_t *local, u8 *dst, u16 stype,
 	meta->iface = netdev_priv(dev);
 
 	skb->dev = dev;
-	skb->mac.raw = skb->nh.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb_reset_network_header(skb);
 	dev_queue_xmit(skb);
 
 	return 0;
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index ad6e4a42835..9137a4dd02e 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -2416,8 +2416,9 @@ static void isr_rx(struct ipw2100_priv *priv, int i,
 #ifdef IPW2100_RX_DEBUG
 	/* Make a copy of the frame so we can dump it to the logs if
 	 * ieee80211_rx fails */
-	memcpy(packet_data, packet->skb->data,
-	       min_t(u32, status->frame_size, IPW_RX_NIC_BUFFER_LENGTH));
+	skb_copy_from_linear_data(packet->skb, packet_data,
+				  min_t(u32, status->frame_size,
+					     IPW_RX_NIC_BUFFER_LENGTH));
 #endif
 
 	if (!ieee80211_rx(priv->ieee, packet->skb, stats)) {
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index c878a2f3239..4839a45098c 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -8133,7 +8133,7 @@ static void ipw_handle_mgmt_packet(struct ipw_priv *priv,
 		skb->dev = priv->ieee->dev;
 
 		/* Point raw at the ieee80211_stats */
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 
 		skb->pkt_type = PACKET_OTHERHOST;
 		skb->protocol = __constant_htons(ETH_P_80211_STATS);
@@ -10355,7 +10355,7 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
 
 		rt_hdr->it_len = dst->len;
 
-		memcpy(skb_put(dst, len), src->data, len);
+		skb_copy_from_linear_data(src, skb_put(dst, len), len);
 
 		if (!ieee80211_rx(priv->prom_priv->ieee, dst, &dummystats))
 			dev_kfree_skb_any(dst);
diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c
index a009ab51771..45b00e13ab2 100644
--- a/drivers/net/wireless/netwave_cs.c
+++ b/drivers/net/wireless/netwave_cs.c
@@ -1283,7 +1283,6 @@ static int netwave_rx(struct net_device *dev)
 
 	skb_reserve( skb, 2);  /* Align IP on 16 byte */
 	skb_put( skb, rcvLen);
-	skb->dev = dev;
 
 	/* Copy packet fragments to the skb data area */
 	ptr = (u_char*) skb->data;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index 4e7f6cf5143..062286dc8e1 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -689,7 +689,7 @@ static void orinoco_stat_gather(struct net_device *dev,
 	/* Note : gcc will optimise the whole section away if
 	 * WIRELESS_SPY is not defined... - Jean II */
 	if (SPY_NUMBER(priv)) {
-		orinoco_spy_gather(dev, skb->mac.raw + ETH_ALEN,
+		orinoco_spy_gather(dev, skb_mac_header(skb) + ETH_ALEN,
 				   desc->signal, desc->silence);
 	}
 }
@@ -770,7 +770,7 @@ static void orinoco_rx_monitor(struct net_device *dev, u16 rxfid,
 
 	/* Copy the 802.11 header to the skb */
 	memcpy(skb_put(skb, hdrlen), &(desc->frame_ctl), hdrlen);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 
 	/* If any, copy the data from the card to the skb */
 	if (datalen > 0) {
@@ -915,7 +915,6 @@ static void __orinoco_ev_rx(struct net_device *dev, hermes_t *hw)
 		memcpy(hdr->h_source, desc.addr2, ETH_ALEN);
 
 	dev->last_rx = jiffies;
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	skb->ip_summed = CHECKSUM_NONE;
 	if (fc & IEEE80211_FCTL_TODS)
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index b1122912ee2..dd070cccf32 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -136,7 +136,7 @@ islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
 				printk("islpci_eth_transmit:wds_mac\n");
 #endif
 				memmove(skb->data + 6, src, skb->len);
-				memcpy(skb->data, wds_mac, 6);
+				skb_copy_to_linear_data(skb, wds_mac, 6);
 			} else {
 				memmove(skb->data, src, skb->len);
 			}
@@ -162,13 +162,16 @@ islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
 
 			skb_put(newskb, init_wds ? skb->len + 6 : skb->len);
 			if (init_wds) {
-				memcpy(newskb->data + 6, skb->data, skb->len);
-				memcpy(newskb->data, wds_mac, 6);
+				skb_copy_from_linear_data(skb,
+							  newskb->data + 6,
+							  skb->len);
+				skb_copy_to_linear_data(newskb, wds_mac, 6);
 #ifdef ISLPCI_ETH_DEBUG
 				printk("islpci_eth_transmit:wds_mac\n");
 #endif
 			} else
-				memcpy(newskb->data, skb->data, skb->len);
+				skb_copy_from_linear_data(skb, newskb->data,
+							  skb->len);
 
 #if VERBOSE > SHOW_ERROR_MESSAGES
 			DEBUG(SHOW_TRACING, "memcpy %p %p %i wds %i\n",
@@ -303,7 +306,7 @@ islpci_monitor_rx(islpci_private *priv, struct sk_buff **skb)
 		skb_pull(*skb, sizeof (struct rfmon_header));
 
 	(*skb)->protocol = htons(ETH_P_802_2);
-	(*skb)->mac.raw = (*skb)->data;
+	skb_reset_mac_header(*skb);
 	(*skb)->pkt_type = PACKET_OTHERHOST;
 
 	return 0;
@@ -374,10 +377,6 @@ islpci_eth_receive(islpci_private *priv)
 	DEBUG(SHOW_BUFFER_CONTENTS, "\nrx %p ", skb->data);
 	display_buffer((char *) skb->data, skb->len);
 #endif
-
-	/* do some additional sk_buff and network layer parameters */
-	skb->dev = ndev;
-
 	/* take care of monitor mode and spy monitoring. */
 	if (unlikely(priv->iw_mode == IW_MODE_MONITOR))
 		discard = islpci_monitor_rx(priv, &skb);
@@ -398,8 +397,10 @@ islpci_eth_receive(islpci_private *priv)
 			/* Update spy records */
 			wireless_spy_update(ndev, annex->addr2, &wstats);
 
-			memcpy(skb->data + sizeof (struct rfmon_header),
-			       skb->data, 2 * ETH_ALEN);
+			skb_copy_from_linear_data(skb,
+						  (skb->data +
+						   sizeof(struct rfmon_header)),
+						  2 * ETH_ALEN);
 			skb_pull(skb, sizeof (struct rfmon_header));
 		}
 		skb->protocol = eth_type_trans(skb, ndev);
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 47b2ccb6a63..3be624295a1 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2232,7 +2232,6 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, unsigned i
         return;
     }
     skb_reserve( skb, 2);   /* Align IP on 16 byte (TBD check this)*/
-    skb->dev = dev;
 
     DEBUG(4,"ray_cs rx_data total_len = %x, rx_len = %x\n",total_len,rx_len);
 
@@ -2243,7 +2242,8 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, unsigned i
     rx_ptr += copy_from_rx_buff(local, rx_ptr, pkt_addr & RX_BUFF_END, rx_len);
     /* Get source address */
 #ifdef WIRELESS_SPY
-    memcpy(linksrcaddr, ((struct mac_header *)skb->data)->addr_2, ETH_ALEN);
+    skb_copy_from_linear_data_offset(skb, offsetof(struct mac_header, addr_2),
+				     linksrcaddr, ETH_ALEN);
 #endif
     /* Now, deal with encapsulation/translation/sniffer */
     if (!sniffer) {
diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c
index f5ce1c6063d..2a299a0676a 100644
--- a/drivers/net/wireless/strip.c
+++ b/drivers/net/wireless/strip.c
@@ -2009,7 +2009,7 @@ static void deliver_packet(struct strip *strip_info, STRIP_Header * header,
 		       packetlen);
 		skb->dev = get_strip_dev(strip_info);
 		skb->protocol = header->protocol;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 
 		/* Having put a fake header on the front of the sk_buff for the */
 		/* benefit of tools like tcpdump, skb_pull now 'consumes' that  */
diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 2aa3c761dd8..1cf090d60ed 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -2512,14 +2512,13 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
 		return;
 	}
 
-	skb->dev = dev;
-
 	/* Copy the packet to the buffer. */
 	obram_read(ioaddr, buf_off, skb_put(skb, sksize), sksize);
 	skb->protocol = eth_type_trans(skb, dev);
 
 #ifdef DEBUG_RX_INFO
-	wv_packet_info(skb->mac.raw, sksize, dev->name, "wv_packet_read");
+	wv_packet_info(skb_mac_header(skb), sksize, dev->name,
+		       "wv_packet_read");
 #endif				/* DEBUG_RX_INFO */
 
 	/* Statistics-gathering and associated stuff.
@@ -2555,7 +2554,7 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
 
 		/* Spying stuff */
 #ifdef IW_WIRELESS_SPY
-		wl_spy_gather(dev, skb->mac.raw + WAVELAN_ADDR_SIZE,
+		wl_spy_gather(dev, skb_mac_header(skb) + WAVELAN_ADDR_SIZE,
 			      stats);
 #endif /* IW_WIRELESS_SPY */
 #ifdef HISTOGRAM
@@ -2939,7 +2938,7 @@ static int wavelan_packet_xmit(struct sk_buff *skb, struct net_device * dev)
 	 * need to pad. Jean II */
 	if (skb->len < ETH_ZLEN) {
 		memset(data, 0, ETH_ZLEN);
-		memcpy(data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, data, skb->len);
 		/* Write packet on the card */
 		if(wv_packet_write(dev, data, ETH_ZLEN))
 			return 1;	/* We failed */
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index b04239792f6..67b867f837c 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -2884,14 +2884,12 @@ wv_packet_read(struct net_device *		dev,
       return;
     }
 
-  skb->dev = dev;
-
   skb_reserve(skb, 2);
   fd_p = read_ringbuf(dev, fd_p, (char *) skb_put(skb, sksize), sksize);
   skb->protocol = eth_type_trans(skb, dev);
 
 #ifdef DEBUG_RX_INFO
-  wv_packet_info(skb->mac.raw, sksize, dev->name, "wv_packet_read");
+  wv_packet_info(skb_mac_header(skb), sksize, dev->name, "wv_packet_read");
 #endif	/* DEBUG_RX_INFO */
      
   /* Statistics gathering & stuff associated.
@@ -2925,7 +2923,7 @@ wv_packet_read(struct net_device *		dev,
 #endif	/* WAVELAN_ROAMING */
 	  
 #ifdef WIRELESS_SPY
-      wl_spy_gather(dev, skb->mac.raw + WAVELAN_ADDR_SIZE, stats);
+      wl_spy_gather(dev, skb_mac_header(skb) + WAVELAN_ADDR_SIZE, stats);
 #endif	/* WIRELESS_SPY */
 #ifdef HISTOGRAM
       wl_his_gather(dev, stats);
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 6cb66a356c9..935b144d9b5 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -327,7 +327,6 @@ static void zd1201_usbrx(struct urb *urb)
 			memcpy(skb_put(skb, 6), &data[datalen-8], 6);
 			memcpy(skb_put(skb, 2), &data[datalen-24], 2);
 			memcpy(skb_put(skb, len), data, len);
-			skb->dev = zd->dev;
 			skb->dev->last_rx = jiffies;
 			skb->protocol = eth_type_trans(skb, zd->dev);
 			zd->stats.rx_packets++;
@@ -385,7 +384,6 @@ static void zd1201_usbrx(struct urb *urb)
 			memcpy(skb_put(skb, 2), &data[6], 2);
 			memcpy(skb_put(skb, len), data+8, len);
 		}
-		skb->dev = zd->dev;
 		skb->dev->last_rx = jiffies;
 		skb->protocol = eth_type_trans(skb, zd->dev);
 		zd->stats.rx_packets++;
@@ -809,10 +807,10 @@ static int zd1201_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	txbuf[4] = 0x00;
 	txbuf[5] = 0x00;
 
-	memcpy(txbuf+6, skb->data+12, skb->len-12);
+	skb_copy_from_linear_data_offset(skb, 12, txbuf + 6, skb->len - 12);
 	if (pad)
 		txbuf[skb->len-12+6]=0;
-	memcpy(txbuf+skb->len-12+6+pad, skb->data, 12);
+	skb_copy_from_linear_data(skb, txbuf + skb->len - 12 + 6 + pad, 12);
 	*(__be16*)&txbuf[skb->len+6+pad] = htons(skb->len-12+6);
 	txbuf[txbuflen-1] = 0;
 
diff --git a/drivers/net/wireless/zd1211rw/Kconfig b/drivers/net/wireless/zd1211rw/Kconfig
index 66ed55bc546..d1ab24a9563 100644
--- a/drivers/net/wireless/zd1211rw/Kconfig
+++ b/drivers/net/wireless/zd1211rw/Kconfig
@@ -1,6 +1,7 @@
 config ZD1211RW
 	tristate "ZyDAS ZD1211/ZD1211B USB-wireless support"
-	depends on USB && IEEE80211 && IEEE80211_SOFTMAC && NET_RADIO && EXPERIMENTAL
+	depends on USB && IEEE80211_SOFTMAC && WLAN_80211 && EXPERIMENTAL
+	select WIRELESS_EXT
 	select FW_LOADER
 	---help---
 	  This is an experimental driver for the ZyDAS ZD1211/ZD1211B wireless
diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c
index 2412ce4917f..3f4a7cf9efe 100644
--- a/drivers/net/yellowfin.c
+++ b/drivers/net/yellowfin.c
@@ -1137,7 +1137,6 @@ static int yellowfin_rx(struct net_device *dev)
 				skb = dev_alloc_skb(pkt_len + 2);
 				if (skb == NULL)
 					break;
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				eth_copy_and_sum(skb, rx_skb->data, pkt_len, 0);
 				skb_put(skb, pkt_len);
diff --git a/drivers/net/znet.c b/drivers/net/znet.c
index b24b0727108..4032e9f6f9b 100644
--- a/drivers/net/znet.c
+++ b/drivers/net/znet.c
@@ -774,7 +774,6 @@ static void znet_rx(struct net_device *dev)
 				znet->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = dev;
 
 			if (&znet->rx_cur[(pkt_len+1)>>1] > znet->rx_end) {
 				int semi_cnt = (znet->rx_end - znet->rx_cur)<<1;
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index d190c05d87e..453e6829756 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -372,9 +372,9 @@ static __inline__ int led_get_net_activity(void)
 		continue;
 	    if (LOOPBACK(in_dev->ifa_list->ifa_local))
 		continue;
-	    if (!dev->get_stats) 
-		continue;
 	    stats = dev->get_stats(dev);
+	    if (!stats)
+		continue;
 	    rx_total += stats->rx_packets;
 	    tx_total += stats->tx_packets;
 	}
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index eb5dc62f0d9..e71929db8b0 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -398,6 +398,9 @@ dasd_change_state(struct dasd_device *device)
 
 	if (device->state == device->target)
 		wake_up(&dasd_init_waitq);
+
+	/* let user-space know that the device status changed */
+	kobject_uevent(&device->cdev->dev.kobj, KOBJ_CHANGE);
 }
 
 /*
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index ed70852cc91..6a89cefe99b 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -19,6 +19,7 @@
 
 #include <asm/debug.h>
 #include <asm/uaccess.h>
+#include <asm/ipl.h>
 
 /* This is ugly... */
 #define PRINTK_HEADER "dasd_devmap:"
@@ -133,6 +134,8 @@ dasd_call_setup(char *str)
 __setup ("dasd=", dasd_call_setup);
 #endif	/* #ifndef MODULE */
 
+#define	DASD_IPLDEV	"ipldev"
+
 /*
  * Read a device busid/devno from a string.
  */
@@ -141,6 +144,20 @@ dasd_busid(char **str, int *id0, int *id1, int *devno)
 {
 	int val, old_style;
 
+	/* Interpret ipldev busid */
+	if (strncmp(DASD_IPLDEV, *str, strlen(DASD_IPLDEV)) == 0) {
+		if (ipl_info.type != IPL_TYPE_CCW) {
+			MESSAGE(KERN_ERR, "%s", "ipl device is not a ccw "
+				"device");
+			return -EINVAL;
+		}
+		*id0 = 0;
+		*id1 = ipl_info.data.ccw.dev_id.ssid;
+		*devno = ipl_info.data.ccw.dev_id.devno;
+		*str += strlen(DASD_IPLDEV);
+
+		return 0;
+	}
 	/* check for leading '0x' */
 	old_style = 0;
 	if ((*str)[0] == '0' && (*str)[1] == 'x') {
@@ -829,6 +846,46 @@ dasd_discipline_show(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(discipline, 0444, dasd_discipline_show, NULL);
 
 static ssize_t
+dasd_device_status_show(struct device *dev, struct device_attribute *attr,
+		     char *buf)
+{
+	struct dasd_device *device;
+	ssize_t len;
+
+	device = dasd_device_from_cdev(to_ccwdev(dev));
+	if (!IS_ERR(device)) {
+		switch (device->state) {
+		case DASD_STATE_NEW:
+			len = snprintf(buf, PAGE_SIZE, "new\n");
+			break;
+		case DASD_STATE_KNOWN:
+			len = snprintf(buf, PAGE_SIZE, "detected\n");
+			break;
+		case DASD_STATE_BASIC:
+			len = snprintf(buf, PAGE_SIZE, "basic\n");
+			break;
+		case DASD_STATE_UNFMT:
+			len = snprintf(buf, PAGE_SIZE, "unformatted\n");
+			break;
+		case DASD_STATE_READY:
+			len = snprintf(buf, PAGE_SIZE, "ready\n");
+			break;
+		case DASD_STATE_ONLINE:
+			len = snprintf(buf, PAGE_SIZE, "online\n");
+			break;
+		default:
+			len = snprintf(buf, PAGE_SIZE, "no stat\n");
+			break;
+		}
+		dasd_put_device(device);
+	} else
+		len = snprintf(buf, PAGE_SIZE, "unknown\n");
+	return len;
+}
+
+static DEVICE_ATTR(status, 0444, dasd_device_status_show, NULL);
+
+static ssize_t
 dasd_alias_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct dasd_devmap *devmap;
@@ -939,6 +996,7 @@ static DEVICE_ATTR(eer_enabled, 0644, dasd_eer_show, dasd_eer_store);
 static struct attribute * dasd_attrs[] = {
 	&dev_attr_readonly.attr,
 	&dev_attr_discipline.attr,
+	&dev_attr_status.attr,
 	&dev_attr_alias.attr,
 	&dev_attr_vendor.attr,
 	&dev_attr_uid.attr,
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index 293e667b50f..c210784bdf4 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \
-	 sclp_info.o
+	 sclp_info.o sclp_config.o sclp_chp.o
 
 obj-$(CONFIG_TN3270) += raw3270.o
 obj-$(CONFIG_TN3270_CONSOLE) += con3270.o
@@ -29,3 +29,6 @@ obj-$(CONFIG_S390_TAPE_34XX) += tape_34xx.o
 obj-$(CONFIG_S390_TAPE_3590) += tape_3590.o
 obj-$(CONFIG_MONREADER) += monreader.o
 obj-$(CONFIG_MONWRITER) += monwriter.o
+
+zcore_mod-objs := sclp_sdias.o zcore.o
+obj-$(CONFIG_ZFCPDUMP) += zcore_mod.o
diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c
index 9a328f14a64..6000bdee408 100644
--- a/drivers/s390/char/con3215.c
+++ b/drivers/s390/char/con3215.c
@@ -813,12 +813,6 @@ con3215_unblank(void)
 	spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags);
 }
 
-static int __init 
-con3215_consetup(struct console *co, char *options)
-{
-	return 0;
-}
-
 /*
  *  The console structure for the 3215 console
  */
@@ -827,7 +821,6 @@ static struct console con3215 = {
 	.write	 = con3215_write,
 	.device	 = con3215_device,
 	.unblank = con3215_unblank,
-	.setup	 = con3215_consetup,
 	.flags	 = CON_PRINTBUFFER,
 };
 
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index 8e7f2d7633d..fd3479119eb 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -555,12 +555,6 @@ con3270_unblank(void)
 	spin_unlock_irqrestore(&cp->view.lock, flags);
 }
 
-static int __init 
-con3270_consetup(struct console *co, char *options)
-{
-	return 0;
-}
-
 /*
  *  The console structure for the 3270 console
  */
@@ -569,7 +563,6 @@ static struct console con3270 = {
 	.write	 = con3270_write,
 	.device	 = con3270_device,
 	.unblank = con3270_unblank,
-	.setup	 = con3270_consetup,
 	.flags	 = CON_PRINTBUFFER,
 };
 
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index f171de3b0b1..fa62e694405 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -15,6 +15,7 @@
 #include <linux/timer.h>
 #include <linux/reboot.h>
 #include <linux/jiffies.h>
+#include <linux/init.h>
 #include <asm/types.h>
 #include <asm/s390_ext.h>
 
@@ -510,7 +511,7 @@ sclp_state_change_cb(struct evbuf_header *evbuf)
 }
 
 static struct sclp_register sclp_state_change_event = {
-	.receive_mask = EvTyp_StateChange_Mask,
+	.receive_mask = EVTYP_STATECHANGE_MASK,
 	.receiver_fn = sclp_state_change_cb
 };
 
@@ -930,3 +931,10 @@ sclp_init(void)
 	sclp_init_mask(1);
 	return 0;
 }
+
+static __init int sclp_initcall(void)
+{
+	return sclp_init();
+}
+
+arch_initcall(sclp_initcall);
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 7d29ab45a6e..87ac4a3ad49 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -19,33 +19,37 @@
 #define MAX_KMEM_PAGES (sizeof(unsigned long) << 3)
 #define MAX_CONSOLE_PAGES	4
 
-#define EvTyp_OpCmd		0x01
-#define EvTyp_Msg		0x02
-#define EvTyp_StateChange	0x08
-#define EvTyp_PMsgCmd		0x09
-#define EvTyp_CntlProgOpCmd	0x20
-#define EvTyp_CntlProgIdent	0x0B
-#define EvTyp_SigQuiesce	0x1D
-#define EvTyp_VT220Msg		0x1A
-
-#define EvTyp_OpCmd_Mask	0x80000000
-#define EvTyp_Msg_Mask		0x40000000
-#define EvTyp_StateChange_Mask	0x01000000
-#define EvTyp_PMsgCmd_Mask	0x00800000
-#define EvTyp_CtlProgOpCmd_Mask	0x00000001
-#define EvTyp_CtlProgIdent_Mask	0x00200000
-#define EvTyp_SigQuiesce_Mask	0x00000008
-#define EvTyp_VT220Msg_Mask	0x00000040
-
-#define GnrlMsgFlgs_DOM		0x8000
-#define GnrlMsgFlgs_SndAlrm	0x4000
-#define GnrlMsgFlgs_HoldMsg	0x2000
-
-#define LnTpFlgs_CntlText	0x8000
-#define LnTpFlgs_LabelText	0x4000
-#define LnTpFlgs_DataText	0x2000
-#define LnTpFlgs_EndText	0x1000
-#define LnTpFlgs_PromptText	0x0800
+#define EVTYP_OPCMD		0x01
+#define EVTYP_MSG		0x02
+#define EVTYP_STATECHANGE	0x08
+#define EVTYP_PMSGCMD		0x09
+#define EVTYP_CNTLPROGOPCMD	0x20
+#define EVTYP_CNTLPROGIDENT	0x0B
+#define EVTYP_SIGQUIESCE	0x1D
+#define EVTYP_VT220MSG		0x1A
+#define EVTYP_CONFMGMDATA	0x04
+#define EVTYP_SDIAS		0x1C
+
+#define EVTYP_OPCMD_MASK	0x80000000
+#define EVTYP_MSG_MASK		0x40000000
+#define EVTYP_STATECHANGE_MASK	0x01000000
+#define EVTYP_PMSGCMD_MASK	0x00800000
+#define EVTYP_CTLPROGOPCMD_MASK	0x00000001
+#define EVTYP_CTLPROGIDENT_MASK	0x00200000
+#define EVTYP_SIGQUIESCE_MASK	0x00000008
+#define EVTYP_VT220MSG_MASK	0x00000040
+#define EVTYP_CONFMGMDATA_MASK	0x10000000
+#define EVTYP_SDIAS_MASK	0x00000010
+
+#define GNRLMSGFLGS_DOM		0x8000
+#define GNRLMSGFLGS_SNDALRM	0x4000
+#define GNRLMSGFLGS_HOLDMSG	0x2000
+
+#define LNTPFLGS_CNTLTEXT	0x8000
+#define LNTPFLGS_LABELTEXT	0x4000
+#define LNTPFLGS_DATATEXT	0x2000
+#define LNTPFLGS_ENDTEXT	0x1000
+#define LNTPFLGS_PROMPTTEXT	0x0800
 
 typedef unsigned int sclp_cmdw_t;
 
@@ -56,15 +60,15 @@ typedef unsigned int sclp_cmdw_t;
 #define SCLP_CMDW_READ_SCP_INFO_FORCED	0x00120001
 
 #define GDS_ID_MDSMU		0x1310
-#define GDS_ID_MDSRouteInfo	0x1311
-#define GDS_ID_AgUnWrkCorr	0x1549
-#define GDS_ID_SNACondReport	0x1532
+#define GDS_ID_MDSROUTEINFO	0x1311
+#define GDS_ID_AGUNWRKCORR	0x1549
+#define GDS_ID_SNACONDREPORT	0x1532
 #define GDS_ID_CPMSU		0x1212
-#define GDS_ID_RoutTargInstr	0x154D
-#define GDS_ID_OpReq		0x8070
-#define GDS_ID_TextCmd		0x1320
+#define GDS_ID_ROUTTARGINSTR	0x154D
+#define GDS_ID_OPREQ		0x8070
+#define GDS_ID_TEXTCMD		0x1320
 
-#define GDS_KEY_SelfDefTextMsg	0x31
+#define GDS_KEY_SELFDEFTEXTMSG	0x31
 
 typedef u32 sccb_mask_t;	/* ATTENTION: assumes 32bit mask !!! */
 
diff --git a/drivers/s390/char/sclp_chp.c b/drivers/s390/char/sclp_chp.c
new file mode 100644
index 00000000000..a66b914519b
--- /dev/null
+++ b/drivers/s390/char/sclp_chp.c
@@ -0,0 +1,196 @@
+/*
+ *  drivers/s390/char/sclp_chp.c
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/gfp.h>
+#include <linux/errno.h>
+#include <linux/completion.h>
+#include <asm/sclp.h>
+#include <asm/chpid.h>
+
+#include "sclp.h"
+
+#define TAG	"sclp_chp: "
+
+#define SCLP_CMDW_CONFIGURE_CHANNEL_PATH	0x000f0001
+#define SCLP_CMDW_DECONFIGURE_CHANNEL_PATH	0x000e0001
+#define SCLP_CMDW_READ_CHANNEL_PATH_INFORMATION	0x00030001
+
+static inline sclp_cmdw_t get_configure_cmdw(struct chp_id chpid)
+{
+	return SCLP_CMDW_CONFIGURE_CHANNEL_PATH | chpid.id << 8;
+}
+
+static inline sclp_cmdw_t get_deconfigure_cmdw(struct chp_id chpid)
+{
+	return SCLP_CMDW_DECONFIGURE_CHANNEL_PATH | chpid.id << 8;
+}
+
+static void chp_callback(struct sclp_req *req, void *data)
+{
+	struct completion *completion = data;
+
+	complete(completion);
+}
+
+struct chp_cfg_sccb {
+	struct sccb_header header;
+	u8 ccm;
+	u8 reserved[6];
+	u8 cssid;
+} __attribute__((packed));
+
+struct chp_cfg_data {
+	struct chp_cfg_sccb sccb;
+	struct sclp_req req;
+	struct completion completion;
+} __attribute__((packed));
+
+static int do_configure(sclp_cmdw_t cmd)
+{
+	struct chp_cfg_data *data;
+	int rc;
+
+	/* Prepare sccb. */
+	data = (struct chp_cfg_data *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	if (!data)
+		return -ENOMEM;
+	data->sccb.header.length = sizeof(struct chp_cfg_sccb);
+	data->req.command = cmd;
+	data->req.sccb = &(data->sccb);
+	data->req.status = SCLP_REQ_FILLED;
+	data->req.callback = chp_callback;
+	data->req.callback_data = &(data->completion);
+	init_completion(&data->completion);
+
+	/* Perform sclp request. */
+	rc = sclp_add_request(&(data->req));
+	if (rc)
+		goto out;
+	wait_for_completion(&data->completion);
+
+	/* Check response .*/
+	if (data->req.status != SCLP_REQ_DONE) {
+		printk(KERN_WARNING TAG "configure channel-path request failed "
+		       "(status=0x%02x)\n", data->req.status);
+		rc = -EIO;
+		goto out;
+	}
+	switch (data->sccb.header.response_code) {
+	case 0x0020:
+	case 0x0120:
+	case 0x0440:
+	case 0x0450:
+		break;
+	default:
+		printk(KERN_WARNING TAG "configure channel-path failed "
+		       "(cmd=0x%08x, response=0x%04x)\n", cmd,
+		       data->sccb.header.response_code);
+		rc = -EIO;
+		break;
+	}
+out:
+	free_page((unsigned long) data);
+
+	return rc;
+}
+
+/**
+ * sclp_chp_configure - perform configure channel-path sclp command
+ * @chpid: channel-path ID
+ *
+ * Perform configure channel-path command sclp command for specified chpid.
+ * Return 0 after command successfully finished, non-zero otherwise.
+ */
+int sclp_chp_configure(struct chp_id chpid)
+{
+	return do_configure(get_configure_cmdw(chpid));
+}
+
+/**
+ * sclp_chp_deconfigure - perform deconfigure channel-path sclp command
+ * @chpid: channel-path ID
+ *
+ * Perform deconfigure channel-path command sclp command for specified chpid
+ * and wait for completion. On success return 0. Return non-zero otherwise.
+ */
+int sclp_chp_deconfigure(struct chp_id chpid)
+{
+	return do_configure(get_deconfigure_cmdw(chpid));
+}
+
+struct chp_info_sccb {
+	struct sccb_header header;
+	u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
+	u8 standby[SCLP_CHP_INFO_MASK_SIZE];
+	u8 configured[SCLP_CHP_INFO_MASK_SIZE];
+	u8 ccm;
+	u8 reserved[6];
+	u8 cssid;
+} __attribute__((packed));
+
+struct chp_info_data {
+	struct chp_info_sccb sccb;
+	struct sclp_req req;
+	struct completion completion;
+} __attribute__((packed));
+
+/**
+ * sclp_chp_read_info - perform read channel-path information sclp command
+ * @info: resulting channel-path information data
+ *
+ * Perform read channel-path information sclp command and wait for completion.
+ * On success, store channel-path information in @info and return 0. Return
+ * non-zero otherwise.
+ */
+int sclp_chp_read_info(struct sclp_chp_info *info)
+{
+	struct chp_info_data *data;
+	int rc;
+
+	/* Prepare sccb. */
+	data = (struct chp_info_data *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	if (!data)
+		return -ENOMEM;
+	data->sccb.header.length = sizeof(struct chp_info_sccb);
+	data->req.command = SCLP_CMDW_READ_CHANNEL_PATH_INFORMATION;
+	data->req.sccb = &(data->sccb);
+	data->req.status = SCLP_REQ_FILLED;
+	data->req.callback = chp_callback;
+	data->req.callback_data = &(data->completion);
+	init_completion(&data->completion);
+
+	/* Perform sclp request. */
+	rc = sclp_add_request(&(data->req));
+	if (rc)
+		goto out;
+	wait_for_completion(&data->completion);
+
+	/* Check response .*/
+	if (data->req.status != SCLP_REQ_DONE) {
+		printk(KERN_WARNING TAG "read channel-path info request failed "
+		       "(status=0x%02x)\n", data->req.status);
+		rc = -EIO;
+		goto out;
+	}
+	if (data->sccb.header.response_code != 0x0010) {
+		printk(KERN_WARNING TAG "read channel-path info failed "
+		       "(response=0x%04x)\n", data->sccb.header.response_code);
+		rc = -EIO;
+		goto out;
+	}
+	memcpy(info->recognized, data->sccb.recognized,
+	       SCLP_CHP_INFO_MASK_SIZE);
+	memcpy(info->standby, data->sccb.standby,
+	       SCLP_CHP_INFO_MASK_SIZE);
+	memcpy(info->configured, data->sccb.configured,
+	       SCLP_CHP_INFO_MASK_SIZE);
+out:
+	free_page((unsigned long) data);
+
+	return rc;
+}
diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c
new file mode 100644
index 00000000000..5322e5e54a9
--- /dev/null
+++ b/drivers/s390/char/sclp_config.c
@@ -0,0 +1,75 @@
+/*
+ *  drivers/s390/char/sclp_config.c
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/cpu.h>
+#include <linux/sysdev.h>
+#include <linux/workqueue.h>
+#include "sclp.h"
+
+#define TAG	"sclp_config: "
+
+struct conf_mgm_data {
+	u8 reserved;
+	u8 ev_qualifier;
+} __attribute__((packed));
+
+#define EV_QUAL_CAP_CHANGE	3
+
+static struct work_struct sclp_cpu_capability_work;
+
+static void sclp_cpu_capability_notify(struct work_struct *work)
+{
+	int cpu;
+	struct sys_device *sysdev;
+
+	printk(KERN_WARNING TAG "cpu capability changed.\n");
+	lock_cpu_hotplug();
+	for_each_online_cpu(cpu) {
+		sysdev = get_cpu_sysdev(cpu);
+		kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+	}
+	unlock_cpu_hotplug();
+}
+
+static void sclp_conf_receiver_fn(struct evbuf_header *evbuf)
+{
+	struct conf_mgm_data *cdata;
+
+	cdata = (struct conf_mgm_data *)(evbuf + 1);
+	if (cdata->ev_qualifier == EV_QUAL_CAP_CHANGE)
+		schedule_work(&sclp_cpu_capability_work);
+}
+
+static struct sclp_register sclp_conf_register =
+{
+	.receive_mask = EVTYP_CONFMGMDATA_MASK,
+	.receiver_fn  = sclp_conf_receiver_fn,
+};
+
+static int __init sclp_conf_init(void)
+{
+	int rc;
+
+	INIT_WORK(&sclp_cpu_capability_work, sclp_cpu_capability_notify);
+
+	rc = sclp_register(&sclp_conf_register);
+	if (rc) {
+		printk(KERN_ERR TAG "failed to register (%d).\n", rc);
+		return rc;
+	}
+
+	if (!(sclp_conf_register.sclp_receive_mask & EVTYP_CONFMGMDATA_MASK)) {
+		printk(KERN_WARNING TAG "no configuration management.\n");
+		sclp_unregister(&sclp_conf_register);
+		rc = -ENOSYS;
+	}
+	return rc;
+}
+
+__initcall(sclp_conf_init);
diff --git a/drivers/s390/char/sclp_cpi.c b/drivers/s390/char/sclp_cpi.c
index 65aa2c85737..29fe2a5ec2f 100644
--- a/drivers/s390/char/sclp_cpi.c
+++ b/drivers/s390/char/sclp_cpi.c
@@ -46,7 +46,7 @@ struct cpi_sccb {
 /* Event type structure for write message and write priority message */
 static struct sclp_register sclp_cpi_event =
 {
-	.send_mask = EvTyp_CtlProgIdent_Mask
+	.send_mask = EVTYP_CTLPROGIDENT_MASK
 };
 
 MODULE_LICENSE("GPL");
@@ -201,7 +201,7 @@ cpi_module_init(void)
 		       "console.\n");
 		return -EINVAL;
 	}
-	if (!(sclp_cpi_event.sclp_send_mask & EvTyp_CtlProgIdent_Mask)) {
+	if (!(sclp_cpi_event.sclp_send_mask & EVTYP_CTLPROGIDENT_MASK)) {
 		printk(KERN_WARNING "cpi: no control program identification "
 		       "support\n");
 		sclp_unregister(&sclp_cpi_event);
diff --git a/drivers/s390/char/sclp_quiesce.c b/drivers/s390/char/sclp_quiesce.c
index baa8fe669ed..45ff25e787c 100644
--- a/drivers/s390/char/sclp_quiesce.c
+++ b/drivers/s390/char/sclp_quiesce.c
@@ -43,7 +43,7 @@ sclp_quiesce_handler(struct evbuf_header *evbuf)
 }
 
 static struct sclp_register sclp_quiesce_event = {
-	.receive_mask = EvTyp_SigQuiesce_Mask,
+	.receive_mask = EVTYP_SIGQUIESCE_MASK,
 	.receiver_fn = sclp_quiesce_handler
 };
 
diff --git a/drivers/s390/char/sclp_rw.c b/drivers/s390/char/sclp_rw.c
index 2486783ea58..bbd5b8b66f4 100644
--- a/drivers/s390/char/sclp_rw.c
+++ b/drivers/s390/char/sclp_rw.c
@@ -30,7 +30,7 @@
 
 /* Event type structure for write message and write priority message */
 static struct sclp_register sclp_rw_event = {
-	.send_mask = EvTyp_Msg_Mask | EvTyp_PMsgCmd_Mask
+	.send_mask = EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK
 };
 
 /*
@@ -64,7 +64,7 @@ sclp_make_buffer(void *page, unsigned short columns, unsigned short htab)
 	memset(sccb, 0, sizeof(struct write_sccb));
 	sccb->header.length = sizeof(struct write_sccb);
 	sccb->msg_buf.header.length = sizeof(struct msg_buf);
-	sccb->msg_buf.header.type = EvTyp_Msg;
+	sccb->msg_buf.header.type = EVTYP_MSG;
 	sccb->msg_buf.mdb.header.length = sizeof(struct mdb);
 	sccb->msg_buf.mdb.header.type = 1;
 	sccb->msg_buf.mdb.header.tag = 0xD4C4C240;	/* ebcdic "MDB " */
@@ -114,7 +114,7 @@ sclp_initialize_mto(struct sclp_buffer *buffer, int max_len)
 	memset(mto, 0, sizeof(struct mto));
 	mto->length = sizeof(struct mto);
 	mto->type = 4;	/* message text object */
-	mto->line_type_flags = LnTpFlgs_EndText; /* end text */
+	mto->line_type_flags = LNTPFLGS_ENDTEXT; /* end text */
 
 	/* set pointer to first byte after struct mto. */
 	buffer->current_line = (char *) (mto + 1);
@@ -215,7 +215,7 @@ sclp_write(struct sclp_buffer *buffer, const unsigned char *msg, int count)
 		case '\a':	/* bell, one for several times	*/
 			/* set SCLP sound alarm bit in General Object */
 			buffer->sccb->msg_buf.mdb.go.general_msg_flags |=
-				GnrlMsgFlgs_SndAlrm;
+				GNRLMSGFLGS_SNDALRM;
 			break;
 		case '\t':	/* horizontal tabulator	 */
 			/* check if new mto needs to be created */
@@ -452,12 +452,12 @@ sclp_emit_buffer(struct sclp_buffer *buffer,
 		return -EIO;
 
 	sccb = buffer->sccb;
-	if (sclp_rw_event.sclp_send_mask & EvTyp_Msg_Mask)
+	if (sclp_rw_event.sclp_send_mask & EVTYP_MSG_MASK)
 		/* Use normal write message */
-		sccb->msg_buf.header.type = EvTyp_Msg;
-	else if (sclp_rw_event.sclp_send_mask & EvTyp_PMsgCmd_Mask)
+		sccb->msg_buf.header.type = EVTYP_MSG;
+	else if (sclp_rw_event.sclp_send_mask & EVTYP_PMSGCMD_MASK)
 		/* Use write priority message */
-		sccb->msg_buf.header.type = EvTyp_PMsgCmd;
+		sccb->msg_buf.header.type = EVTYP_PMSGCMD;
 	else
 		return -ENOSYS;
 	buffer->request.command = SCLP_CMDW_WRITE_EVENT_DATA;
diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c
new file mode 100644
index 00000000000..52283daddae
--- /dev/null
+++ b/drivers/s390/char/sclp_sdias.c
@@ -0,0 +1,255 @@
+/*
+ * Sclp "store data in absolut storage"
+ *
+ * Copyright IBM Corp. 2003,2007
+ * Author(s): Michael Holzheu
+ */
+
+#include <linux/sched.h>
+#include <asm/sclp.h>
+#include <asm/debug.h>
+#include <asm/ipl.h>
+#include "sclp.h"
+#include "sclp_rw.h"
+
+#define TRACE(x...) debug_sprintf_event(sdias_dbf, 1, x)
+#define ERROR_MSG(x...) printk ( KERN_ALERT "SDIAS: " x )
+
+#define SDIAS_RETRIES 300
+#define SDIAS_SLEEP_TICKS 50
+
+#define EQ_STORE_DATA	0x0
+#define EQ_SIZE		0x1
+#define DI_FCP_DUMP	0x0
+#define ASA_SIZE_32	0x0
+#define ASA_SIZE_64	0x1
+#define EVSTATE_ALL_STORED	0x0
+#define EVSTATE_NO_DATA		0x3
+#define EVSTATE_PART_STORED	0x10
+
+static struct debug_info *sdias_dbf;
+
+static struct sclp_register sclp_sdias_register = {
+	.send_mask = EVTYP_SDIAS_MASK,
+};
+
+struct sdias_evbuf {
+	struct	evbuf_header hdr;
+	u8	event_qual;
+	u8	data_id;
+	u64	reserved2;
+	u32	event_id;
+	u16	reserved3;
+	u8	asa_size;
+	u8	event_status;
+	u32	reserved4;
+	u32	blk_cnt;
+	u64	asa;
+	u32	reserved5;
+	u32	fbn;
+	u32	reserved6;
+	u32	lbn;
+	u16	reserved7;
+	u16	dbs;
+} __attribute__((packed));
+
+struct sdias_sccb {
+	struct sccb_header  hdr;
+	struct sdias_evbuf  evbuf;
+} __attribute__((packed));
+
+static struct sdias_sccb sccb __attribute__((aligned(4096)));
+
+static int sclp_req_done;
+static wait_queue_head_t sdias_wq;
+static DEFINE_MUTEX(sdias_mutex);
+
+static void sdias_callback(struct sclp_req *request, void *data)
+{
+	struct sdias_sccb *sccb;
+
+	sccb = (struct sdias_sccb *) request->sccb;
+	sclp_req_done = 1;
+	wake_up(&sdias_wq); /* Inform caller, that request is complete */
+	TRACE("callback done\n");
+}
+
+static int sdias_sclp_send(struct sclp_req *req)
+{
+	int retries;
+	int rc;
+
+	for (retries = SDIAS_RETRIES; retries; retries--) {
+		sclp_req_done = 0;
+		TRACE("add request\n");
+		rc = sclp_add_request(req);
+		if (rc) {
+			/* not initiated, wait some time and retry */
+			set_current_state(TASK_INTERRUPTIBLE);
+			TRACE("add request failed: rc = %i\n",rc);
+			schedule_timeout(SDIAS_SLEEP_TICKS);
+			continue;
+		}
+		/* initiated, wait for completion of service call */
+		wait_event(sdias_wq, (sclp_req_done == 1));
+		if (req->status == SCLP_REQ_FAILED) {
+			TRACE("sclp request failed\n");
+			rc = -EIO;
+			continue;
+		}
+		TRACE("request done\n");
+		break;
+	}
+	return rc;
+}
+
+/*
+ * Get number of blocks (4K) available in the HSA
+ */
+int sclp_sdias_blk_count(void)
+{
+	struct sclp_req request;
+	int rc;
+
+	mutex_lock(&sdias_mutex);
+
+	memset(&sccb, 0, sizeof(sccb));
+	memset(&request, 0, sizeof(request));
+
+	sccb.hdr.length = sizeof(sccb);
+	sccb.evbuf.hdr.length = sizeof(struct sdias_evbuf);
+	sccb.evbuf.hdr.type = EVTYP_SDIAS;
+	sccb.evbuf.event_qual = EQ_SIZE;
+	sccb.evbuf.data_id = DI_FCP_DUMP;
+	sccb.evbuf.event_id = 4712;
+	sccb.evbuf.dbs = 1;
+
+	request.sccb = &sccb;
+	request.command = SCLP_CMDW_WRITE_EVENT_DATA;
+	request.status = SCLP_REQ_FILLED;
+	request.callback = sdias_callback;
+
+	rc = sdias_sclp_send(&request);
+	if (rc) {
+		ERROR_MSG("sclp_send failed for get_nr_blocks\n");
+		goto out;
+	}
+	if (sccb.hdr.response_code != 0x0020) {
+		TRACE("send failed: %x\n", sccb.hdr.response_code);
+		rc = -EIO;
+		goto out;
+	}
+
+	switch (sccb.evbuf.event_status) {
+		case 0:
+			rc = sccb.evbuf.blk_cnt;
+			break;
+		default:
+			ERROR_MSG("SCLP error: %x\n", sccb.evbuf.event_status);
+			rc = -EIO;
+			goto out;
+	}
+	TRACE("%i blocks\n", rc);
+out:
+	mutex_unlock(&sdias_mutex);
+	return rc;
+}
+
+/*
+ * Copy from HSA to absolute storage (not reentrant):
+ *
+ * @dest     : Address of buffer where data should be copied
+ * @start_blk: Start Block (beginning with 1)
+ * @nr_blks  : Number of 4K blocks to copy
+ *
+ * Return Value: 0 : Requested 'number' of blocks of data copied
+ *		 <0: ERROR - negative event status
+ */
+int sclp_sdias_copy(void *dest, int start_blk, int nr_blks)
+{
+	struct sclp_req request;
+	int rc;
+
+	mutex_lock(&sdias_mutex);
+
+	memset(&sccb, 0, sizeof(sccb));
+	memset(&request, 0, sizeof(request));
+
+	sccb.hdr.length = sizeof(sccb);
+	sccb.evbuf.hdr.length = sizeof(struct sdias_evbuf);
+	sccb.evbuf.hdr.type = EVTYP_SDIAS;
+	sccb.evbuf.hdr.flags = 0;
+	sccb.evbuf.event_qual = EQ_STORE_DATA;
+	sccb.evbuf.data_id = DI_FCP_DUMP;
+	sccb.evbuf.event_id = 4712;
+#ifdef __s390x__
+	sccb.evbuf.asa_size = ASA_SIZE_64;
+#else
+	sccb.evbuf.asa_size = ASA_SIZE_32;
+#endif
+	sccb.evbuf.event_status = 0;
+	sccb.evbuf.blk_cnt = nr_blks;
+	sccb.evbuf.asa = (unsigned long)dest;
+	sccb.evbuf.fbn = start_blk;
+	sccb.evbuf.lbn = 0;
+	sccb.evbuf.dbs = 1;
+
+	request.sccb	 = &sccb;
+	request.command  = SCLP_CMDW_WRITE_EVENT_DATA;
+	request.status	 = SCLP_REQ_FILLED;
+	request.callback = sdias_callback;
+
+	rc = sdias_sclp_send(&request);
+	if (rc) {
+		ERROR_MSG("sclp_send failed: %x\n", rc);
+		goto out;
+	}
+	if (sccb.hdr.response_code != 0x0020) {
+		TRACE("copy failed: %x\n", sccb.hdr.response_code);
+		rc = -EIO;
+		goto out;
+	}
+
+	switch (sccb.evbuf.event_status) {
+		case EVSTATE_ALL_STORED:
+			TRACE("all stored\n");
+		case EVSTATE_PART_STORED:
+			TRACE("part stored: %i\n", sccb.evbuf.blk_cnt);
+			break;
+		case EVSTATE_NO_DATA:
+			TRACE("no data\n");
+		default:
+			ERROR_MSG("Error from SCLP while copying hsa. "
+				  "Event status = %x\n",
+				sccb.evbuf.event_status);
+			rc = -EIO;
+	}
+out:
+	mutex_unlock(&sdias_mutex);
+	return rc;
+}
+
+int __init sdias_init(void)
+{
+	int rc;
+
+	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+		return 0;
+	sdias_dbf = debug_register("dump_sdias", 4, 1, 4 * sizeof(long));
+	debug_register_view(sdias_dbf, &debug_sprintf_view);
+	debug_set_level(sdias_dbf, 6);
+	rc = sclp_register(&sclp_sdias_register);
+	if (rc) {
+		ERROR_MSG("sclp register failed\n");
+		return rc;
+	}
+	init_waitqueue_head(&sdias_wq);
+	TRACE("init done\n");
+	return 0;
+}
+
+void __exit sdias_exit(void)
+{
+	debug_unregister(sdias_dbf);
+	sclp_unregister(&sclp_sdias_register);
+}
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index 076816b9d52..e3b3d390b4a 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -648,7 +648,7 @@ sclp_eval_textcmd(struct gds_subvector *start,
 	subvec = start;
 	while (subvec < end) {
 		subvec = find_gds_subvector(subvec, end,
-					    GDS_KEY_SelfDefTextMsg);
+					    GDS_KEY_SELFDEFTEXTMSG);
 		if (!subvec)
 			break;
 		sclp_eval_selfdeftextmsg((struct gds_subvector *)(subvec + 1),
@@ -664,7 +664,7 @@ sclp_eval_cpmsu(struct gds_vector *start, struct gds_vector *end)
 
 	vec = start;
 	while (vec < end) {
-		vec = find_gds_vector(vec, end, GDS_ID_TextCmd);
+		vec = find_gds_vector(vec, end, GDS_ID_TEXTCMD);
 		if (!vec)
 			break;
 		sclp_eval_textcmd((struct gds_subvector *)(vec + 1),
@@ -703,7 +703,7 @@ sclp_tty_state_change(struct sclp_register *reg)
 
 static struct sclp_register sclp_input_event =
 {
-	.receive_mask = EvTyp_OpCmd_Mask | EvTyp_PMsgCmd_Mask,
+	.receive_mask = EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK,
 	.state_change_fn = sclp_tty_state_change,
 	.receiver_fn = sclp_tty_receiver
 };
diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c
index f77dc33b5f8..726334757bb 100644
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@@ -99,8 +99,8 @@ static void sclp_vt220_emit_current(void);
 
 /* Registration structure for our interest in SCLP event buffers */
 static struct sclp_register sclp_vt220_register = {
-	.send_mask		= EvTyp_VT220Msg_Mask,
-	.receive_mask		= EvTyp_VT220Msg_Mask,
+	.send_mask		= EVTYP_VT220MSG_MASK,
+	.receive_mask		= EVTYP_VT220MSG_MASK,
 	.state_change_fn	= NULL,
 	.receiver_fn		= sclp_vt220_receiver_fn
 };
@@ -202,7 +202,7 @@ sclp_vt220_callback(struct sclp_req *request, void *data)
 static int
 __sclp_vt220_emit(struct sclp_vt220_request *request)
 {
-	if (!(sclp_vt220_register.sclp_send_mask & EvTyp_VT220Msg_Mask)) {
+	if (!(sclp_vt220_register.sclp_send_mask & EVTYP_VT220MSG_MASK)) {
 		request->sclp_req.status = SCLP_REQ_FAILED;
 		return -EIO;
 	}
@@ -284,7 +284,7 @@ sclp_vt220_initialize_page(void *page)
 	sccb->header.length = sizeof(struct sclp_vt220_sccb);
 	sccb->header.function_code = SCLP_NORMAL_WRITE;
 	sccb->header.response_code = 0x0000;
-	sccb->evbuf.type = EvTyp_VT220Msg;
+	sccb->evbuf.type = EVTYP_VT220MSG;
 	sccb->evbuf.length = sizeof(struct evbuf_header);
 
 	return request;
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index b87d3b01993..a5a00e9ae4d 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -125,7 +125,7 @@ static struct vmlogrdr_priv_t sys_ser[] = {
 	  .recording_name = "EREP",
 	  .minor_num      = 0,
 	  .buffer_free    = 1,
-	  .priv_lock      = SPIN_LOCK_UNLOCKED,
+	  .priv_lock	  = __SPIN_LOCK_UNLOCKED(sys_ser[0].priv_lock),
 	  .autorecording  = 1,
 	  .autopurge      = 1,
 	},
@@ -134,7 +134,7 @@ static struct vmlogrdr_priv_t sys_ser[] = {
 	  .recording_name = "ACCOUNT",
 	  .minor_num      = 1,
 	  .buffer_free    = 1,
-	  .priv_lock      = SPIN_LOCK_UNLOCKED,
+	  .priv_lock	  = __SPIN_LOCK_UNLOCKED(sys_ser[1].priv_lock),
 	  .autorecording  = 1,
 	  .autopurge      = 1,
 	},
@@ -143,7 +143,7 @@ static struct vmlogrdr_priv_t sys_ser[] = {
 	  .recording_name = "SYMPTOM",
 	  .minor_num      = 2,
 	  .buffer_free    = 1,
-	  .priv_lock      = SPIN_LOCK_UNLOCKED,
+	  .priv_lock	  = __SPIN_LOCK_UNLOCKED(sys_ser[2].priv_lock),
 	  .autorecording  = 1,
 	  .autopurge      = 1,
 	}
@@ -385,6 +385,9 @@ static int vmlogrdr_release (struct inode *inode, struct file *filp)
 
 	struct vmlogrdr_priv_t * logptr = filp->private_data;
 
+	iucv_path_sever(logptr->path, NULL);
+	kfree(logptr->path);
+	logptr->path = NULL;
 	if (logptr->autorecording) {
 		ret = vmlogrdr_recording(logptr,0,logptr->autopurge);
 		if (ret)
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
new file mode 100644
index 00000000000..89d439316a5
--- /dev/null
+++ b/drivers/s390/char/zcore.c
@@ -0,0 +1,651 @@
+/*
+ * zcore module to export memory content and register sets for creating system
+ * dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same
+ * dump format as s390 standalone dumps.
+ *
+ * For more information please refer to Documentation/s390/zfcpdump.txt
+ *
+ * Copyright IBM Corp. 2003,2007
+ * Author(s): Michael Holzheu
+ */
+
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/utsname.h>
+#include <linux/debugfs.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/setup.h>
+#include <asm/sigp.h>
+#include <asm/uaccess.h>
+#include <asm/debug.h>
+#include <asm/processor.h>
+#include <asm/irqflags.h>
+
+#define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x)
+#define MSG(x...) printk( KERN_ALERT x )
+#define ERROR_MSG(x...) printk ( KERN_ALERT "DUMP: " x )
+
+#define TO_USER		0
+#define TO_KERNEL	1
+
+enum arch_id {
+	ARCH_S390	= 0,
+	ARCH_S390X	= 1,
+};
+
+/* dump system info */
+
+struct sys_info {
+	enum arch_id	arch;
+	unsigned long	sa_base;
+	u32		sa_size;
+	int		cpu_map[NR_CPUS];
+	unsigned long	mem_size;
+	union save_area	lc_mask;
+};
+
+static struct sys_info sys_info;
+static struct debug_info *zcore_dbf;
+static int hsa_available;
+static struct dentry *zcore_dir;
+static struct dentry *zcore_file;
+
+/*
+ * Copy memory from HSA to kernel or user memory (not reentrant):
+ *
+ * @dest:  Kernel or user buffer where memory should be copied to
+ * @src:   Start address within HSA where data should be copied
+ * @count: Size of buffer, which should be copied
+ * @mode:  Either TO_KERNEL or TO_USER
+ */
+static int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode)
+{
+	int offs, blk_num;
+	static char buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+
+	if (count == 0)
+		return 0;
+
+	/* copy first block */
+	offs = 0;
+	if ((src % PAGE_SIZE) != 0) {
+		blk_num = src / PAGE_SIZE + 2;
+		if (sclp_sdias_copy(buf, blk_num, 1)) {
+			TRACE("sclp_sdias_copy() failed\n");
+			return -EIO;
+		}
+		offs = min((PAGE_SIZE - (src % PAGE_SIZE)), count);
+		if (mode == TO_USER) {
+			if (copy_to_user((__force __user void*) dest,
+					 buf + (src % PAGE_SIZE), offs))
+				return -EFAULT;
+		} else
+			memcpy(dest, buf + (src % PAGE_SIZE), offs);
+	}
+	if (offs == count)
+		goto out;
+
+	/* copy middle */
+	for (; (offs + PAGE_SIZE) <= count; offs += PAGE_SIZE) {
+		blk_num = (src + offs) / PAGE_SIZE + 2;
+		if (sclp_sdias_copy(buf, blk_num, 1)) {
+			TRACE("sclp_sdias_copy() failed\n");
+			return -EIO;
+		}
+		if (mode == TO_USER) {
+			if (copy_to_user((__force __user void*) dest + offs,
+					 buf, PAGE_SIZE))
+				return -EFAULT;
+		} else
+			memcpy(dest + offs, buf, PAGE_SIZE);
+	}
+	if (offs == count)
+		goto out;
+
+	/* copy last block */
+	blk_num = (src + offs) / PAGE_SIZE + 2;
+	if (sclp_sdias_copy(buf, blk_num, 1)) {
+		TRACE("sclp_sdias_copy() failed\n");
+		return -EIO;
+	}
+	if (mode == TO_USER) {
+		if (copy_to_user((__force __user void*) dest + offs, buf,
+				 PAGE_SIZE))
+			return -EFAULT;
+	} else
+		memcpy(dest + offs, buf, count - offs);
+out:
+	return 0;
+}
+
+static int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count)
+{
+	return memcpy_hsa((void __force *) dest, src, count, TO_USER);
+}
+
+static int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count)
+{
+	return memcpy_hsa(dest, src, count, TO_KERNEL);
+}
+
+static int memcpy_real(void *dest, unsigned long src, size_t count)
+{
+	unsigned long flags;
+	int rc = -EFAULT;
+	register unsigned long _dest asm("2") = (unsigned long) dest;
+	register unsigned long _len1 asm("3") = (unsigned long) count;
+	register unsigned long _src  asm("4") = src;
+	register unsigned long _len2 asm("5") = (unsigned long) count;
+
+	if (count == 0)
+		return 0;
+	flags = __raw_local_irq_stnsm(0xf8); /* switch to real mode */
+	asm volatile (
+		"0:	mvcle	%1,%2,0x0\n"
+		"1:	jo	0b\n"
+		"	lhi	%0,0x0\n"
+		"2:\n"
+		EX_TABLE(1b,2b)
+		: "+d" (rc)
+		: "d" (_dest), "d" (_src), "d" (_len1), "d" (_len2)
+		: "cc", "memory");
+	__raw_local_irq_ssm(flags);
+
+	return rc;
+}
+
+static int memcpy_real_user(__user void *dest, unsigned long src, size_t count)
+{
+	static char buf[4096];
+	int offs = 0, size;
+
+	while (offs < count) {
+		size = min(sizeof(buf), count - offs);
+		if (memcpy_real(buf, src + offs, size))
+			return -EFAULT;
+		if (copy_to_user(dest + offs, buf, size))
+			return -EFAULT;
+		offs += size;
+	}
+	return 0;
+}
+
+#ifdef __s390x__
+/*
+ * Convert s390x (64 bit) cpu info to s390 (32 bit) cpu info
+ */
+static void __init s390x_to_s390_regs(union save_area *out, union save_area *in,
+				      int cpu)
+{
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		out->s390.gp_regs[i] = in->s390x.gp_regs[i] & 0x00000000ffffffff;
+		out->s390.acc_regs[i] = in->s390x.acc_regs[i];
+		out->s390.ctrl_regs[i] =
+			in->s390x.ctrl_regs[i] & 0x00000000ffffffff;
+	}
+	/* locore for 31 bit has only space for fpregs 0,2,4,6 */
+	out->s390.fp_regs[0] = in->s390x.fp_regs[0];
+	out->s390.fp_regs[1] = in->s390x.fp_regs[2];
+	out->s390.fp_regs[2] = in->s390x.fp_regs[4];
+	out->s390.fp_regs[3] = in->s390x.fp_regs[6];
+	memcpy(&(out->s390.psw[0]), &(in->s390x.psw[0]), 4);
+	out->s390.psw[1] |= 0x8; /* set bit 12 */
+	memcpy(&(out->s390.psw[4]),&(in->s390x.psw[12]), 4);
+	out->s390.psw[4] |= 0x80; /* set (31bit) addressing bit */
+	out->s390.pref_reg = in->s390x.pref_reg;
+	out->s390.timer = in->s390x.timer;
+	out->s390.clk_cmp = in->s390x.clk_cmp;
+}
+
+static void __init s390x_to_s390_save_areas(void)
+{
+	int i = 1;
+	static union save_area tmp;
+
+	while (zfcpdump_save_areas[i]) {
+		s390x_to_s390_regs(&tmp, zfcpdump_save_areas[i], i);
+		memcpy(zfcpdump_save_areas[i], &tmp, sizeof(tmp));
+		i++;
+	}
+}
+
+#endif /* __s390x__ */
+
+static int __init init_cpu_info(enum arch_id arch)
+{
+	union save_area *sa;
+
+	/* get info for boot cpu from lowcore, stored in the HSA */
+
+	sa = kmalloc(sizeof(*sa), GFP_KERNEL);
+	if (!sa) {
+		ERROR_MSG("kmalloc failed: %s: %i\n",__FUNCTION__, __LINE__);
+		return -ENOMEM;
+	}
+	if (memcpy_hsa_kernel(sa, sys_info.sa_base, sys_info.sa_size) < 0) {
+		ERROR_MSG("could not copy from HSA\n");
+		kfree(sa);
+		return -EIO;
+	}
+	zfcpdump_save_areas[0] = sa;
+
+#ifdef __s390x__
+	/* convert s390x regs to s390, if we are dumping an s390 Linux */
+
+	if (arch == ARCH_S390)
+		s390x_to_s390_save_areas();
+#endif
+
+	return 0;
+}
+
+static DEFINE_MUTEX(zcore_mutex);
+
+#define DUMP_VERSION	0x3
+#define DUMP_MAGIC	0xa8190173618f23fdULL
+#define DUMP_ARCH_S390X	2
+#define DUMP_ARCH_S390	1
+#define HEADER_SIZE	4096
+
+/* dump header dumped according to s390 crash dump format */
+
+struct zcore_header {
+	u64 magic;
+	u32 version;
+	u32 header_size;
+	u32 dump_level;
+	u32 page_size;
+	u64 mem_size;
+	u64 mem_start;
+	u64 mem_end;
+	u32 num_pages;
+	u32 pad1;
+	u64 tod;
+	cpuid_t cpu_id;
+	u32 arch_id;
+	u32 build_arch;
+	char pad2[4016];
+} __attribute__((packed,__aligned__(16)));
+
+static struct zcore_header zcore_header = {
+	.magic		= DUMP_MAGIC,
+	.version	= DUMP_VERSION,
+	.header_size	= 4096,
+	.dump_level	= 0,
+	.page_size	= PAGE_SIZE,
+	.mem_start	= 0,
+#ifdef __s390x__
+	.build_arch	= DUMP_ARCH_S390X,
+#else
+	.build_arch	= DUMP_ARCH_S390,
+#endif
+};
+
+/*
+ * Copy lowcore info to buffer. Use map in order to copy only register parts.
+ *
+ * @buf:    User buffer
+ * @sa:     Pointer to save area
+ * @sa_off: Offset in save area to copy
+ * @len:    Number of bytes to copy
+ */
+static int copy_lc(void __user *buf, void *sa, int sa_off, int len)
+{
+	int i;
+	char *lc_mask = (char*)&sys_info.lc_mask;
+
+	for (i = 0; i < len; i++) {
+		if (!lc_mask[i + sa_off])
+			continue;
+		if (copy_to_user(buf + i, sa + sa_off + i, 1))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+/*
+ * Copy lowcores info to memory, if necessary
+ *
+ * @buf:   User buffer
+ * @addr:  Start address of buffer in dump memory
+ * @count: Size of buffer
+ */
+static int zcore_add_lc(char __user *buf, unsigned long start, size_t count)
+{
+	unsigned long end;
+	int i = 0;
+
+	if (count == 0)
+		return 0;
+
+	end = start + count;
+	while (zfcpdump_save_areas[i]) {
+		unsigned long cp_start, cp_end; /* copy range */
+		unsigned long sa_start, sa_end; /* save area range */
+		unsigned long prefix;
+		unsigned long sa_off, len, buf_off;
+
+		if (sys_info.arch == ARCH_S390)
+			prefix = zfcpdump_save_areas[i]->s390.pref_reg;
+		else
+			prefix = zfcpdump_save_areas[i]->s390x.pref_reg;
+
+		sa_start = prefix + sys_info.sa_base;
+		sa_end = prefix + sys_info.sa_base + sys_info.sa_size;
+
+		if ((end < sa_start) || (start > sa_end))
+			goto next;
+		cp_start = max(start, sa_start);
+		cp_end = min(end, sa_end);
+
+		buf_off = cp_start - start;
+		sa_off = cp_start - sa_start;
+		len = cp_end - cp_start;
+
+		TRACE("copy_lc for: %lx\n", start);
+		if (copy_lc(buf + buf_off, zfcpdump_save_areas[i], sa_off, len))
+			return -EFAULT;
+next:
+		i++;
+	}
+	return 0;
+}
+
+/*
+ * Read routine for zcore character device
+ * First 4K are dump header
+ * Next 32MB are HSA Memory
+ * Rest is read from absolute Memory
+ */
+static ssize_t zcore_read(struct file *file, char __user *buf, size_t count,
+			  loff_t *ppos)
+{
+	unsigned long mem_start; /* Start address in memory */
+	size_t mem_offs;	 /* Offset in dump memory */
+	size_t hdr_count;	 /* Size of header part of output buffer */
+	size_t size;
+	int rc;
+
+	mutex_lock(&zcore_mutex);
+
+	if (*ppos > (sys_info.mem_size + HEADER_SIZE)) {
+		rc = -EINVAL;
+		goto fail;
+	}
+
+	count = min(count, (size_t) (sys_info.mem_size + HEADER_SIZE - *ppos));
+
+	/* Copy dump header */
+	if (*ppos < HEADER_SIZE) {
+		size = min(count, (size_t) (HEADER_SIZE - *ppos));
+		if (copy_to_user(buf, &zcore_header + *ppos, size)) {
+			rc = -EFAULT;
+			goto fail;
+		}
+		hdr_count = size;
+		mem_start = 0;
+	} else {
+		hdr_count = 0;
+		mem_start = *ppos - HEADER_SIZE;
+	}
+
+	mem_offs = 0;
+
+	/* Copy from HSA data */
+	if (*ppos < (ZFCPDUMP_HSA_SIZE + HEADER_SIZE)) {
+		size = min((count - hdr_count), (size_t) (ZFCPDUMP_HSA_SIZE
+			   - mem_start));
+		rc = memcpy_hsa_user(buf + hdr_count, mem_start, size);
+		if (rc)
+			goto fail;
+
+		mem_offs += size;
+	}
+
+	/* Copy from real mem */
+	size = count - mem_offs - hdr_count;
+	rc = memcpy_real_user(buf + hdr_count + mem_offs, mem_start + mem_offs,
+			      size);
+	if (rc)
+		goto fail;
+
+	/*
+	 * Since s390 dump analysis tools like lcrash or crash
+	 * expect register sets in the prefix pages of the cpus,
+	 * we copy them into the read buffer, if necessary.
+	 * buf + hdr_count: Start of memory part of output buffer
+	 * mem_start: Start memory address to copy from
+	 * count - hdr_count: Size of memory area to copy
+	 */
+	if (zcore_add_lc(buf + hdr_count, mem_start, count - hdr_count)) {
+		rc = -EFAULT;
+		goto fail;
+	}
+	*ppos += count;
+fail:
+	mutex_unlock(&zcore_mutex);
+	return (rc < 0) ? rc : count;
+}
+
+static int zcore_open(struct inode *inode, struct file *filp)
+{
+	if (!hsa_available)
+		return -ENODATA;
+	else
+		return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
+static int zcore_release(struct inode *inode, struct file *filep)
+{
+	diag308(DIAG308_REL_HSA, NULL);
+	hsa_available = 0;
+	return 0;
+}
+
+static loff_t zcore_lseek(struct file *file, loff_t offset, int orig)
+{
+	loff_t rc;
+
+	mutex_lock(&zcore_mutex);
+	switch (orig) {
+	case 0:
+		file->f_pos = offset;
+		rc = file->f_pos;
+		break;
+	case 1:
+		file->f_pos += offset;
+		rc = file->f_pos;
+		break;
+	default:
+		rc = -EINVAL;
+	}
+	mutex_unlock(&zcore_mutex);
+	return rc;
+}
+
+static struct file_operations zcore_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= zcore_lseek,
+	.read		= zcore_read,
+	.open		= zcore_open,
+	.release	= zcore_release,
+};
+
+
+static void __init set_s390_lc_mask(union save_area *map)
+{
+	memset(&map->s390.ext_save, 0xff, sizeof(map->s390.ext_save));
+	memset(&map->s390.timer, 0xff, sizeof(map->s390.timer));
+	memset(&map->s390.clk_cmp, 0xff, sizeof(map->s390.clk_cmp));
+	memset(&map->s390.psw, 0xff, sizeof(map->s390.psw));
+	memset(&map->s390.pref_reg, 0xff, sizeof(map->s390.pref_reg));
+	memset(&map->s390.acc_regs, 0xff, sizeof(map->s390.acc_regs));
+	memset(&map->s390.fp_regs, 0xff, sizeof(map->s390.fp_regs));
+	memset(&map->s390.gp_regs, 0xff, sizeof(map->s390.gp_regs));
+	memset(&map->s390.ctrl_regs, 0xff, sizeof(map->s390.ctrl_regs));
+}
+
+static void __init set_s390x_lc_mask(union save_area *map)
+{
+	memset(&map->s390x.fp_regs, 0xff, sizeof(map->s390x.fp_regs));
+	memset(&map->s390x.gp_regs, 0xff, sizeof(map->s390x.gp_regs));
+	memset(&map->s390x.psw, 0xff, sizeof(map->s390x.psw));
+	memset(&map->s390x.pref_reg, 0xff, sizeof(map->s390x.pref_reg));
+	memset(&map->s390x.fp_ctrl_reg, 0xff, sizeof(map->s390x.fp_ctrl_reg));
+	memset(&map->s390x.tod_reg, 0xff, sizeof(map->s390x.tod_reg));
+	memset(&map->s390x.timer, 0xff, sizeof(map->s390x.timer));
+	memset(&map->s390x.clk_cmp, 0xff, sizeof(map->s390x.clk_cmp));
+	memset(&map->s390x.acc_regs, 0xff, sizeof(map->s390x.acc_regs));
+	memset(&map->s390x.ctrl_regs, 0xff, sizeof(map->s390x.ctrl_regs));
+}
+
+/*
+ * Initialize dump globals for a given architecture
+ */
+static int __init sys_info_init(enum arch_id arch)
+{
+	switch (arch) {
+	case ARCH_S390X:
+		MSG("DETECTED 'S390X (64 bit) OS'\n");
+		sys_info.sa_base = SAVE_AREA_BASE_S390X;
+		sys_info.sa_size = sizeof(struct save_area_s390x);
+		set_s390x_lc_mask(&sys_info.lc_mask);
+		break;
+	case ARCH_S390:
+		MSG("DETECTED 'S390 (32 bit) OS'\n");
+		sys_info.sa_base = SAVE_AREA_BASE_S390;
+		sys_info.sa_size = sizeof(struct save_area_s390);
+		set_s390_lc_mask(&sys_info.lc_mask);
+		break;
+	default:
+		ERROR_MSG("unknown architecture 0x%x.\n",arch);
+		return -EINVAL;
+	}
+	sys_info.arch = arch;
+	if (init_cpu_info(arch)) {
+		ERROR_MSG("get cpu info failed\n");
+		return -ENOMEM;
+	}
+	sys_info.mem_size = real_memory_size;
+
+	return 0;
+}
+
+static int __init check_sdias(void)
+{
+	int rc, act_hsa_size;
+
+	rc = sclp_sdias_blk_count();
+	if (rc < 0) {
+		ERROR_MSG("Could not determine HSA size\n");
+		return rc;
+	}
+	act_hsa_size = (rc - 1) * PAGE_SIZE;
+	if (act_hsa_size < ZFCPDUMP_HSA_SIZE) {
+		ERROR_MSG("HSA size too small: %i\n", act_hsa_size);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void __init zcore_header_init(int arch, struct zcore_header *hdr)
+{
+	if (arch == ARCH_S390X)
+		hdr->arch_id = DUMP_ARCH_S390X;
+	else
+		hdr->arch_id = DUMP_ARCH_S390;
+	hdr->mem_size = sys_info.mem_size;
+	hdr->mem_end = sys_info.mem_size;
+	hdr->num_pages = sys_info.mem_size / PAGE_SIZE;
+	hdr->tod = get_clock();
+	get_cpu_id(&hdr->cpu_id);
+}
+
+extern int sdias_init(void);
+
+static int __init zcore_init(void)
+{
+	unsigned char arch;
+	int rc;
+
+	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+		return -ENODATA;
+
+	zcore_dbf = debug_register("zcore", 4, 1, 4 * sizeof(long));
+	debug_register_view(zcore_dbf, &debug_sprintf_view);
+	debug_set_level(zcore_dbf, 6);
+
+	TRACE("devno:  %x\n", ipl_info.data.fcp.dev_id.devno);
+	TRACE("wwpn:   %llx\n", (unsigned long long) ipl_info.data.fcp.wwpn);
+	TRACE("lun:    %llx\n", (unsigned long long) ipl_info.data.fcp.lun);
+
+	rc = sdias_init();
+	if (rc)
+		goto fail;
+
+	rc = check_sdias();
+	if (rc) {
+		ERROR_MSG("Dump initialization failed\n");
+		goto fail;
+	}
+
+	rc = memcpy_hsa_kernel(&arch, __LC_AR_MODE_ID, 1);
+	if (rc) {
+		ERROR_MSG("sdial memcpy for arch id failed\n");
+		goto fail;
+	}
+
+#ifndef __s390x__
+	if (arch == ARCH_S390X) {
+		ERROR_MSG("32 bit dumper can't dump 64 bit system!\n");
+		rc = -EINVAL;
+		goto fail;
+	}
+#endif
+
+	rc = sys_info_init(arch);
+	if (rc) {
+		ERROR_MSG("arch init failed\n");
+		goto fail;
+	}
+
+	zcore_header_init(arch, &zcore_header);
+
+	zcore_dir = debugfs_create_dir("zcore" , NULL);
+	if (!zcore_dir) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	zcore_file = debugfs_create_file("mem", S_IRUSR, zcore_dir, NULL,
+					 &zcore_fops);
+	if (!zcore_file) {
+		debugfs_remove(zcore_dir);
+		rc = -ENOMEM;
+		goto fail;
+	}
+	hsa_available = 1;
+	return 0;
+
+fail:
+	diag308(DIAG308_REL_HSA, NULL);
+	return rc;
+}
+
+extern void sdias_exit(void);
+
+static void __exit zcore_exit(void)
+{
+	debug_unregister(zcore_dbf);
+	sdias_exit();
+	diag308(DIAG308_REL_HSA, NULL);
+}
+
+MODULE_AUTHOR("Copyright IBM Corp. 2003,2007");
+MODULE_DESCRIPTION("zcore module for zfcpdump support");
+MODULE_LICENSE("GPL");
+
+subsys_initcall(zcore_init);
+module_exit(zcore_exit);
diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile
index c490c2a1c2f..cfaf77b320f 100644
--- a/drivers/s390/cio/Makefile
+++ b/drivers/s390/cio/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the S/390 common i/o drivers
 #
 
-obj-y += airq.o blacklist.o chsc.o cio.o css.o
+obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o idset.o
 ccw_device-objs += device.o device_fsm.o device_ops.o
 ccw_device-objs += device_id.o device_pgid.o device_status.o
 obj-y += ccw_device.o cmf.o
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index 5aeb68e732b..e5ccda63e88 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -75,8 +75,10 @@ static void ccwgroup_ungroup_callback(struct device *dev)
 {
 	struct ccwgroup_device *gdev = to_ccwgroupdev(dev);
 
+	mutex_lock(&gdev->reg_mutex);
 	__ccwgroup_remove_symlinks(gdev);
 	device_unregister(dev);
+	mutex_unlock(&gdev->reg_mutex);
 }
 
 static ssize_t
@@ -173,7 +175,8 @@ ccwgroup_create(struct device *root,
 		return -ENOMEM;
 
 	atomic_set(&gdev->onoff, 0);
-
+	mutex_init(&gdev->reg_mutex);
+	mutex_lock(&gdev->reg_mutex);
 	for (i = 0; i < argc; i++) {
 		gdev->cdev[i] = get_ccwdev_by_busid(cdrv, argv[i]);
 
@@ -183,12 +186,12 @@ ccwgroup_create(struct device *root,
 		    || gdev->cdev[i]->id.driver_info !=
 		    gdev->cdev[0]->id.driver_info) {
 			rc = -EINVAL;
-			goto free_dev;
+			goto error;
 		}
 		/* Don't allow a device to belong to more than one group. */
 		if (gdev->cdev[i]->dev.driver_data) {
 			rc = -EINVAL;
-			goto free_dev;
+			goto error;
 		}
 		gdev->cdev[i]->dev.driver_data = gdev;
 	}
@@ -203,9 +206,8 @@ ccwgroup_create(struct device *root,
 			gdev->cdev[0]->dev.bus_id);
 
 	rc = device_register(&gdev->dev);
-	
 	if (rc)
-		goto free_dev;
+		goto error;
 	get_device(&gdev->dev);
 	rc = device_create_file(&gdev->dev, &dev_attr_ungroup);
 
@@ -216,6 +218,7 @@ ccwgroup_create(struct device *root,
 
 	rc = __ccwgroup_create_symlinks(gdev);
 	if (!rc) {
+		mutex_unlock(&gdev->reg_mutex);
 		put_device(&gdev->dev);
 		return 0;
 	}
@@ -224,19 +227,12 @@ ccwgroup_create(struct device *root,
 error:
 	for (i = 0; i < argc; i++)
 		if (gdev->cdev[i]) {
-			put_device(&gdev->cdev[i]->dev);
-			gdev->cdev[i]->dev.driver_data = NULL;
-		}
-	put_device(&gdev->dev);
-	return rc;
-free_dev:
-	for (i = 0; i < argc; i++)
-		if (gdev->cdev[i]) {
 			if (gdev->cdev[i]->dev.driver_data == gdev)
 				gdev->cdev[i]->dev.driver_data = NULL;
 			put_device(&gdev->cdev[i]->dev);
 		}
-	kfree(gdev);
+	mutex_unlock(&gdev->reg_mutex);
+	put_device(&gdev->dev);
 	return rc;
 }
 
@@ -422,8 +418,12 @@ ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver)
 	get_driver(&cdriver->driver);
 	while ((dev = driver_find_device(&cdriver->driver, NULL, NULL,
 					 __ccwgroup_match_all))) {
-		__ccwgroup_remove_symlinks(to_ccwgroupdev(dev));
+		struct ccwgroup_device *gdev = to_ccwgroupdev(dev);
+
+		mutex_lock(&gdev->reg_mutex);
+		__ccwgroup_remove_symlinks(gdev);
 		device_unregister(dev);
+		mutex_unlock(&gdev->reg_mutex);
 		put_device(dev);
 	}
 	put_driver(&cdriver->driver);
@@ -444,8 +444,10 @@ __ccwgroup_get_gdev_by_cdev(struct ccw_device *cdev)
 	if (cdev->dev.driver_data) {
 		gdev = (struct ccwgroup_device *)cdev->dev.driver_data;
 		if (get_device(&gdev->dev)) {
+			mutex_lock(&gdev->reg_mutex);
 			if (device_is_registered(&gdev->dev))
 				return gdev;
+			mutex_unlock(&gdev->reg_mutex);
 			put_device(&gdev->dev);
 		}
 		return NULL;
@@ -465,6 +467,7 @@ ccwgroup_remove_ccwdev(struct ccw_device *cdev)
 	if (gdev) {
 		__ccwgroup_remove_symlinks(gdev);
 		device_unregister(&gdev->dev);
+		mutex_unlock(&gdev->reg_mutex);
 		put_device(&gdev->dev);
 	}
 }
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
new file mode 100644
index 00000000000..ac289e6eadf
--- /dev/null
+++ b/drivers/s390/cio/chp.c
@@ -0,0 +1,683 @@
+/*
+ *  drivers/s390/cio/chp.c
+ *
+ *    Copyright IBM Corp. 1999,2007
+ *    Author(s): Cornelia Huck (cornelia.huck@de.ibm.com)
+ *		 Arnd Bergmann (arndb@de.ibm.com)
+ *		 Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#include <linux/bug.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/wait.h>
+#include <linux/mutex.h>
+#include <asm/errno.h>
+#include <asm/chpid.h>
+#include <asm/sclp.h>
+
+#include "cio.h"
+#include "css.h"
+#include "ioasm.h"
+#include "cio_debug.h"
+#include "chp.h"
+
+#define to_channelpath(device) container_of(device, struct channel_path, dev)
+#define CHP_INFO_UPDATE_INTERVAL	1*HZ
+
+enum cfg_task_t {
+	cfg_none,
+	cfg_configure,
+	cfg_deconfigure
+};
+
+/* Map for pending configure tasks. */
+static enum cfg_task_t chp_cfg_task[__MAX_CSSID + 1][__MAX_CHPID + 1];
+static DEFINE_MUTEX(cfg_lock);
+static int cfg_busy;
+
+/* Map for channel-path status. */
+static struct sclp_chp_info chp_info;
+static DEFINE_MUTEX(info_lock);
+
+/* Time after which channel-path status may be outdated. */
+static unsigned long chp_info_expires;
+
+/* Workqueue to perform pending configure tasks. */
+static struct workqueue_struct *chp_wq;
+static struct work_struct cfg_work;
+
+/* Wait queue for configure completion events. */
+static wait_queue_head_t cfg_wait_queue;
+
+/* Return channel_path struct for given chpid. */
+static inline struct channel_path *chpid_to_chp(struct chp_id chpid)
+{
+	return css[chpid.cssid]->chps[chpid.id];
+}
+
+/* Set vary state for given chpid. */
+static void set_chp_logically_online(struct chp_id chpid, int onoff)
+{
+	chpid_to_chp(chpid)->state = onoff;
+}
+
+/* On succes return 0 if channel-path is varied offline, 1 if it is varied
+ * online. Return -ENODEV if channel-path is not registered. */
+int chp_get_status(struct chp_id chpid)
+{
+	return (chpid_to_chp(chpid) ? chpid_to_chp(chpid)->state : -ENODEV);
+}
+
+/**
+ * chp_get_sch_opm - return opm for subchannel
+ * @sch: subchannel
+ *
+ * Calculate and return the operational path mask (opm) based on the chpids
+ * used by the subchannel and the status of the associated channel-paths.
+ */
+u8 chp_get_sch_opm(struct subchannel *sch)
+{
+	struct chp_id chpid;
+	int opm;
+	int i;
+
+	opm = 0;
+	chp_id_init(&chpid);
+	for (i=0; i < 8; i++) {
+		opm <<= 1;
+		chpid.id = sch->schib.pmcw.chpid[i];
+		if (chp_get_status(chpid) != 0)
+			opm |= 1;
+	}
+	return opm;
+}
+
+/**
+ * chp_is_registered - check if a channel-path is registered
+ * @chpid: channel-path ID
+ *
+ * Return non-zero if a channel-path with the given chpid is registered,
+ * zero otherwise.
+ */
+int chp_is_registered(struct chp_id chpid)
+{
+	return chpid_to_chp(chpid) != NULL;
+}
+
+/*
+ * Function: s390_vary_chpid
+ * Varies the specified chpid online or offline
+ */
+static int s390_vary_chpid(struct chp_id chpid, int on)
+{
+	char dbf_text[15];
+	int status;
+
+	sprintf(dbf_text, on?"varyon%x.%02x":"varyoff%x.%02x", chpid.cssid,
+		chpid.id);
+	CIO_TRACE_EVENT( 2, dbf_text);
+
+	status = chp_get_status(chpid);
+	if (status < 0) {
+		printk(KERN_ERR "Can't vary unknown chpid %x.%02x\n",
+		       chpid.cssid, chpid.id);
+		return -EINVAL;
+	}
+
+	if (!on && !status) {
+		printk(KERN_ERR "chpid %x.%02x is already offline\n",
+		       chpid.cssid, chpid.id);
+		return -EINVAL;
+	}
+
+	set_chp_logically_online(chpid, on);
+	chsc_chp_vary(chpid, on);
+	return 0;
+}
+
+/*
+ * Channel measurement related functions
+ */
+static ssize_t chp_measurement_chars_read(struct kobject *kobj, char *buf,
+					  loff_t off, size_t count)
+{
+	struct channel_path *chp;
+	unsigned int size;
+
+	chp = to_channelpath(container_of(kobj, struct device, kobj));
+	if (!chp->cmg_chars)
+		return 0;
+
+	size = sizeof(struct cmg_chars);
+
+	if (off > size)
+		return 0;
+	if (off + count > size)
+		count = size - off;
+	memcpy(buf, chp->cmg_chars + off, count);
+	return count;
+}
+
+static struct bin_attribute chp_measurement_chars_attr = {
+	.attr = {
+		.name = "measurement_chars",
+		.mode = S_IRUSR,
+		.owner = THIS_MODULE,
+	},
+	.size = sizeof(struct cmg_chars),
+	.read = chp_measurement_chars_read,
+};
+
+static void chp_measurement_copy_block(struct cmg_entry *buf,
+				       struct channel_subsystem *css,
+				       struct chp_id chpid)
+{
+	void *area;
+	struct cmg_entry *entry, reference_buf;
+	int idx;
+
+	if (chpid.id < 128) {
+		area = css->cub_addr1;
+		idx = chpid.id;
+	} else {
+		area = css->cub_addr2;
+		idx = chpid.id - 128;
+	}
+	entry = area + (idx * sizeof(struct cmg_entry));
+	do {
+		memcpy(buf, entry, sizeof(*entry));
+		memcpy(&reference_buf, entry, sizeof(*entry));
+	} while (reference_buf.values[0] != buf->values[0]);
+}
+
+static ssize_t chp_measurement_read(struct kobject *kobj, char *buf,
+				    loff_t off, size_t count)
+{
+	struct channel_path *chp;
+	struct channel_subsystem *css;
+	unsigned int size;
+
+	chp = to_channelpath(container_of(kobj, struct device, kobj));
+	css = to_css(chp->dev.parent);
+
+	size = sizeof(struct cmg_entry);
+
+	/* Only allow single reads. */
+	if (off || count < size)
+		return 0;
+	chp_measurement_copy_block((struct cmg_entry *)buf, css, chp->chpid);
+	count = size;
+	return count;
+}
+
+static struct bin_attribute chp_measurement_attr = {
+	.attr = {
+		.name = "measurement",
+		.mode = S_IRUSR,
+		.owner = THIS_MODULE,
+	},
+	.size = sizeof(struct cmg_entry),
+	.read = chp_measurement_read,
+};
+
+void chp_remove_cmg_attr(struct channel_path *chp)
+{
+	device_remove_bin_file(&chp->dev, &chp_measurement_chars_attr);
+	device_remove_bin_file(&chp->dev, &chp_measurement_attr);
+}
+
+int chp_add_cmg_attr(struct channel_path *chp)
+{
+	int ret;
+
+	ret = device_create_bin_file(&chp->dev, &chp_measurement_chars_attr);
+	if (ret)
+		return ret;
+	ret = device_create_bin_file(&chp->dev, &chp_measurement_attr);
+	if (ret)
+		device_remove_bin_file(&chp->dev, &chp_measurement_chars_attr);
+	return ret;
+}
+
+/*
+ * Files for the channel path entries.
+ */
+static ssize_t chp_status_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct channel_path *chp = container_of(dev, struct channel_path, dev);
+
+	if (!chp)
+		return 0;
+	return (chp_get_status(chp->chpid) ? sprintf(buf, "online\n") :
+		sprintf(buf, "offline\n"));
+}
+
+static ssize_t chp_status_write(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct channel_path *cp = container_of(dev, struct channel_path, dev);
+	char cmd[10];
+	int num_args;
+	int error;
+
+	num_args = sscanf(buf, "%5s", cmd);
+	if (!num_args)
+		return count;
+
+	if (!strnicmp(cmd, "on", 2) || !strcmp(cmd, "1"))
+		error = s390_vary_chpid(cp->chpid, 1);
+	else if (!strnicmp(cmd, "off", 3) || !strcmp(cmd, "0"))
+		error = s390_vary_chpid(cp->chpid, 0);
+	else
+		error = -EINVAL;
+
+	return error < 0 ? error : count;
+
+}
+
+static DEVICE_ATTR(status, 0644, chp_status_show, chp_status_write);
+
+static ssize_t chp_configure_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct channel_path *cp;
+	int status;
+
+	cp = container_of(dev, struct channel_path, dev);
+	status = chp_info_get_status(cp->chpid);
+	if (status < 0)
+		return status;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", status);
+}
+
+static int cfg_wait_idle(void);
+
+static ssize_t chp_configure_write(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct channel_path *cp;
+	int val;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	cp = container_of(dev, struct channel_path, dev);
+	chp_cfg_schedule(cp->chpid, val);
+	cfg_wait_idle();
+
+	return count;
+}
+
+static DEVICE_ATTR(configure, 0644, chp_configure_show, chp_configure_write);
+
+static ssize_t chp_type_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct channel_path *chp = container_of(dev, struct channel_path, dev);
+
+	if (!chp)
+		return 0;
+	return sprintf(buf, "%x\n", chp->desc.desc);
+}
+
+static DEVICE_ATTR(type, 0444, chp_type_show, NULL);
+
+static ssize_t chp_cmg_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct channel_path *chp = to_channelpath(dev);
+
+	if (!chp)
+		return 0;
+	if (chp->cmg == -1) /* channel measurements not available */
+		return sprintf(buf, "unknown\n");
+	return sprintf(buf, "%x\n", chp->cmg);
+}
+
+static DEVICE_ATTR(cmg, 0444, chp_cmg_show, NULL);
+
+static ssize_t chp_shared_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct channel_path *chp = to_channelpath(dev);
+
+	if (!chp)
+		return 0;
+	if (chp->shared == -1) /* channel measurements not available */
+		return sprintf(buf, "unknown\n");
+	return sprintf(buf, "%x\n", chp->shared);
+}
+
+static DEVICE_ATTR(shared, 0444, chp_shared_show, NULL);
+
+static struct attribute * chp_attrs[] = {
+	&dev_attr_status.attr,
+	&dev_attr_configure.attr,
+	&dev_attr_type.attr,
+	&dev_attr_cmg.attr,
+	&dev_attr_shared.attr,
+	NULL,
+};
+
+static struct attribute_group chp_attr_group = {
+	.attrs = chp_attrs,
+};
+
+static void chp_release(struct device *dev)
+{
+	struct channel_path *cp;
+
+	cp = container_of(dev, struct channel_path, dev);
+	kfree(cp);
+}
+
+/**
+ * chp_new - register a new channel-path
+ * @chpid - channel-path ID
+ *
+ * Create and register data structure representing new channel-path. Return
+ * zero on success, non-zero otherwise.
+ */
+int chp_new(struct chp_id chpid)
+{
+	struct channel_path *chp;
+	int ret;
+
+	if (chp_is_registered(chpid))
+		return 0;
+	chp = kzalloc(sizeof(struct channel_path), GFP_KERNEL);
+	if (!chp)
+		return -ENOMEM;
+
+	/* fill in status, etc. */
+	chp->chpid = chpid;
+	chp->state = 1;
+	chp->dev.parent = &css[chpid.cssid]->device;
+	chp->dev.release = chp_release;
+	snprintf(chp->dev.bus_id, BUS_ID_SIZE, "chp%x.%02x", chpid.cssid,
+		 chpid.id);
+
+	/* Obtain channel path description and fill it in. */
+	ret = chsc_determine_channel_path_description(chpid, &chp->desc);
+	if (ret)
+		goto out_free;
+	if ((chp->desc.flags & 0x80) == 0) {
+		ret = -ENODEV;
+		goto out_free;
+	}
+	/* Get channel-measurement characteristics. */
+	if (css_characteristics_avail && css_chsc_characteristics.scmc
+	    && css_chsc_characteristics.secm) {
+		ret = chsc_get_channel_measurement_chars(chp);
+		if (ret)
+			goto out_free;
+	} else {
+		static int msg_done;
+
+		if (!msg_done) {
+			printk(KERN_WARNING "cio: Channel measurements not "
+			       "available, continuing.\n");
+			msg_done = 1;
+		}
+		chp->cmg = -1;
+	}
+
+	/* make it known to the system */
+	ret = device_register(&chp->dev);
+	if (ret) {
+		printk(KERN_WARNING "%s: could not register %x.%02x\n",
+		       __func__, chpid.cssid, chpid.id);
+		goto out_free;
+	}
+	ret = sysfs_create_group(&chp->dev.kobj, &chp_attr_group);
+	if (ret) {
+		device_unregister(&chp->dev);
+		goto out_free;
+	}
+	mutex_lock(&css[chpid.cssid]->mutex);
+	if (css[chpid.cssid]->cm_enabled) {
+		ret = chp_add_cmg_attr(chp);
+		if (ret) {
+			sysfs_remove_group(&chp->dev.kobj, &chp_attr_group);
+			device_unregister(&chp->dev);
+			mutex_unlock(&css[chpid.cssid]->mutex);
+			goto out_free;
+		}
+	}
+	css[chpid.cssid]->chps[chpid.id] = chp;
+	mutex_unlock(&css[chpid.cssid]->mutex);
+	return ret;
+out_free:
+	kfree(chp);
+	return ret;
+}
+
+/**
+ * chp_get_chp_desc - return newly allocated channel-path description
+ * @chpid: channel-path ID
+ *
+ * On success return a newly allocated copy of the channel-path description
+ * data associated with the given channel-path ID. Return %NULL on error.
+ */
+void *chp_get_chp_desc(struct chp_id chpid)
+{
+	struct channel_path *chp;
+	struct channel_path_desc *desc;
+
+	chp = chpid_to_chp(chpid);
+	if (!chp)
+		return NULL;
+	desc = kmalloc(sizeof(struct channel_path_desc), GFP_KERNEL);
+	if (!desc)
+		return NULL;
+	memcpy(desc, &chp->desc, sizeof(struct channel_path_desc));
+	return desc;
+}
+
+/**
+ * chp_process_crw - process channel-path status change
+ * @id: channel-path ID number
+ * @status: non-zero if channel-path has become available, zero otherwise
+ *
+ * Handle channel-report-words indicating that the status of a channel-path
+ * has changed.
+ */
+void chp_process_crw(int id, int status)
+{
+	struct chp_id chpid;
+
+	chp_id_init(&chpid);
+	chpid.id = id;
+	if (status) {
+		if (!chp_is_registered(chpid))
+			chp_new(chpid);
+		chsc_chp_online(chpid);
+	} else
+		chsc_chp_offline(chpid);
+}
+
+static inline int info_bit_num(struct chp_id id)
+{
+	return id.id + id.cssid * (__MAX_CHPID + 1);
+}
+
+/* Force chp_info refresh on next call to info_validate(). */
+static void info_expire(void)
+{
+	mutex_lock(&info_lock);
+	chp_info_expires = jiffies - 1;
+	mutex_unlock(&info_lock);
+}
+
+/* Ensure that chp_info is up-to-date. */
+static int info_update(void)
+{
+	int rc;
+
+	mutex_lock(&info_lock);
+	rc = 0;
+	if (time_after(jiffies, chp_info_expires)) {
+		/* Data is too old, update. */
+		rc = sclp_chp_read_info(&chp_info);
+		chp_info_expires = jiffies + CHP_INFO_UPDATE_INTERVAL ;
+	}
+	mutex_unlock(&info_lock);
+
+	return rc;
+}
+
+/**
+ * chp_info_get_status - retrieve configure status of a channel-path
+ * @chpid: channel-path ID
+ *
+ * On success, return 0 for standby, 1 for configured, 2 for reserved,
+ * 3 for not recognized. Return negative error code on error.
+ */
+int chp_info_get_status(struct chp_id chpid)
+{
+	int rc;
+	int bit;
+
+	rc = info_update();
+	if (rc)
+		return rc;
+
+	bit = info_bit_num(chpid);
+	mutex_lock(&info_lock);
+	if (!chp_test_bit(chp_info.recognized, bit))
+		rc = CHP_STATUS_NOT_RECOGNIZED;
+	else if (chp_test_bit(chp_info.configured, bit))
+		rc = CHP_STATUS_CONFIGURED;
+	else if (chp_test_bit(chp_info.standby, bit))
+		rc = CHP_STATUS_STANDBY;
+	else
+		rc = CHP_STATUS_RESERVED;
+	mutex_unlock(&info_lock);
+
+	return rc;
+}
+
+/* Return configure task for chpid. */
+static enum cfg_task_t cfg_get_task(struct chp_id chpid)
+{
+	return chp_cfg_task[chpid.cssid][chpid.id];
+}
+
+/* Set configure task for chpid. */
+static void cfg_set_task(struct chp_id chpid, enum cfg_task_t cfg)
+{
+	chp_cfg_task[chpid.cssid][chpid.id] = cfg;
+}
+
+/* Perform one configure/deconfigure request. Reschedule work function until
+ * last request. */
+static void cfg_func(struct work_struct *work)
+{
+	struct chp_id chpid;
+	enum cfg_task_t t;
+
+	mutex_lock(&cfg_lock);
+	t = cfg_none;
+	chp_id_for_each(&chpid) {
+		t = cfg_get_task(chpid);
+		if (t != cfg_none) {
+			cfg_set_task(chpid, cfg_none);
+			break;
+		}
+	}
+	mutex_unlock(&cfg_lock);
+
+	switch (t) {
+	case cfg_configure:
+		sclp_chp_configure(chpid);
+		info_expire();
+		chsc_chp_online(chpid);
+		break;
+	case cfg_deconfigure:
+		sclp_chp_deconfigure(chpid);
+		info_expire();
+		chsc_chp_offline(chpid);
+		break;
+	case cfg_none:
+		/* Get updated information after last change. */
+		info_update();
+		mutex_lock(&cfg_lock);
+		cfg_busy = 0;
+		mutex_unlock(&cfg_lock);
+		wake_up_interruptible(&cfg_wait_queue);
+		return;
+	}
+	queue_work(chp_wq, &cfg_work);
+}
+
+/**
+ * chp_cfg_schedule - schedule chpid configuration request
+ * @chpid - channel-path ID
+ * @configure - Non-zero for configure, zero for deconfigure
+ *
+ * Schedule a channel-path configuration/deconfiguration request.
+ */
+void chp_cfg_schedule(struct chp_id chpid, int configure)
+{
+	CIO_MSG_EVENT(2, "chp_cfg_sched%x.%02x=%d\n", chpid.cssid, chpid.id,
+		      configure);
+	mutex_lock(&cfg_lock);
+	cfg_set_task(chpid, configure ? cfg_configure : cfg_deconfigure);
+	cfg_busy = 1;
+	mutex_unlock(&cfg_lock);
+	queue_work(chp_wq, &cfg_work);
+}
+
+/**
+ * chp_cfg_cancel_deconfigure - cancel chpid deconfiguration request
+ * @chpid - channel-path ID
+ *
+ * Cancel an active channel-path deconfiguration request if it has not yet
+ * been performed.
+ */
+void chp_cfg_cancel_deconfigure(struct chp_id chpid)
+{
+	CIO_MSG_EVENT(2, "chp_cfg_cancel:%x.%02x\n", chpid.cssid, chpid.id);
+	mutex_lock(&cfg_lock);
+	if (cfg_get_task(chpid) == cfg_deconfigure)
+		cfg_set_task(chpid, cfg_none);
+	mutex_unlock(&cfg_lock);
+}
+
+static int cfg_wait_idle(void)
+{
+	if (wait_event_interruptible(cfg_wait_queue, !cfg_busy))
+		return -ERESTARTSYS;
+	return 0;
+}
+
+static int __init chp_init(void)
+{
+	struct chp_id chpid;
+
+	chp_wq = create_singlethread_workqueue("cio_chp");
+	if (!chp_wq)
+		return -ENOMEM;
+	INIT_WORK(&cfg_work, cfg_func);
+	init_waitqueue_head(&cfg_wait_queue);
+	if (info_update())
+		return 0;
+	/* Register available channel-paths. */
+	chp_id_for_each(&chpid) {
+		if (chp_info_get_status(chpid) != CHP_STATUS_NOT_RECOGNIZED)
+			chp_new(chpid);
+	}
+
+	return 0;
+}
+
+subsys_initcall(chp_init);
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
new file mode 100644
index 00000000000..65286563c59
--- /dev/null
+++ b/drivers/s390/cio/chp.h
@@ -0,0 +1,53 @@
+/*
+ *  drivers/s390/cio/chp.h
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef S390_CHP_H
+#define S390_CHP_H S390_CHP_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <asm/chpid.h>
+#include "chsc.h"
+
+#define CHP_STATUS_STANDBY		0
+#define CHP_STATUS_CONFIGURED		1
+#define CHP_STATUS_RESERVED		2
+#define CHP_STATUS_NOT_RECOGNIZED	3
+
+static inline int chp_test_bit(u8 *bitmap, int num)
+{
+	int byte = num >> 3;
+	int mask = 128 >> (num & 7);
+
+	return (bitmap[byte] & mask) ? 1 : 0;
+}
+
+
+struct channel_path {
+	struct chp_id chpid;
+	int state;
+	struct channel_path_desc desc;
+	/* Channel-measurement related stuff: */
+	int cmg;
+	int shared;
+	void *cmg_chars;
+	struct device dev;
+};
+
+int chp_get_status(struct chp_id chpid);
+u8 chp_get_sch_opm(struct subchannel *sch);
+int chp_is_registered(struct chp_id chpid);
+void *chp_get_chp_desc(struct chp_id chpid);
+void chp_process_crw(int id, int available);
+void chp_remove_cmg_attr(struct channel_path *chp);
+int chp_add_cmg_attr(struct channel_path *chp);
+int chp_new(struct chp_id chpid);
+void chp_cfg_schedule(struct chp_id chpid, int configure);
+void chp_cfg_cancel_deconfigure(struct chp_id chpid);
+int chp_info_get_status(struct chp_id chpid);
+
+#endif /* S390_CHP_H */
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 6f05a44e381..ea92ac4d657 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -15,202 +15,124 @@
 #include <linux/device.h>
 
 #include <asm/cio.h>
+#include <asm/chpid.h>
 
 #include "css.h"
 #include "cio.h"
 #include "cio_debug.h"
 #include "ioasm.h"
+#include "chp.h"
 #include "chsc.h"
 
 static void *sei_page;
 
-static int new_channel_path(int chpid);
-
-static inline void
-set_chp_logically_online(int chp, int onoff)
-{
-	css[0]->chps[chp]->state = onoff;
-}
-
-static int
-get_chp_status(int chp)
-{
-	return (css[0]->chps[chp] ? css[0]->chps[chp]->state : -ENODEV);
-}
-
-void
-chsc_validate_chpids(struct subchannel *sch)
-{
-	int mask, chp;
-
-	for (chp = 0; chp <= 7; chp++) {
-		mask = 0x80 >> chp;
-		if (!get_chp_status(sch->schib.pmcw.chpid[chp]))
-			/* disable using this path */
-			sch->opm &= ~mask;
-	}
-}
-
-void
-chpid_is_actually_online(int chp)
-{
-	int state;
-
-	state = get_chp_status(chp);
-	if (state < 0) {
-		need_rescan = 1;
-		queue_work(slow_path_wq, &slow_path_work);
-	} else
-		WARN_ON(!state);
-}
+struct chsc_ssd_area {
+	struct chsc_header request;
+	u16 :10;
+	u16 ssid:2;
+	u16 :4;
+	u16 f_sch;	  /* first subchannel */
+	u16 :16;
+	u16 l_sch;	  /* last subchannel */
+	u32 :32;
+	struct chsc_header response;
+	u32 :32;
+	u8 sch_valid : 1;
+	u8 dev_valid : 1;
+	u8 st	     : 3; /* subchannel type */
+	u8 zeroes    : 3;
+	u8  unit_addr;	  /* unit address */
+	u16 devno;	  /* device number */
+	u8 path_mask;
+	u8 fla_valid_mask;
+	u16 sch;	  /* subchannel */
+	u8 chpid[8];	  /* chpids 0-7 */
+	u16 fla[8];	  /* full link addresses 0-7 */
+} __attribute__ ((packed));
 
-/* FIXME: this is _always_ called for every subchannel. shouldn't we
- *	  process more than one at a time? */
-static int
-chsc_get_sch_desc_irq(struct subchannel *sch, void *page)
+int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd)
 {
-	int ccode, j;
-
-	struct {
-		struct chsc_header request;
-		u16 reserved1a:10;
-		u16 ssid:2;
-		u16 reserved1b:4;
-		u16 f_sch;	  /* first subchannel */
-		u16 reserved2;
-		u16 l_sch;	  /* last subchannel */
-		u32 reserved3;
-		struct chsc_header response;
-		u32 reserved4;
-		u8 sch_valid : 1;
-		u8 dev_valid : 1;
-		u8 st	     : 3; /* subchannel type */
-		u8 zeroes    : 3;
-		u8  unit_addr;	  /* unit address */
-		u16 devno;	  /* device number */
-		u8 path_mask;
-		u8 fla_valid_mask;
-		u16 sch;	  /* subchannel */
-		u8 chpid[8];	  /* chpids 0-7 */
-		u16 fla[8];	  /* full link addresses 0-7 */
-	} __attribute__ ((packed)) *ssd_area;
-
-	ssd_area = page;
+	unsigned long page;
+	struct chsc_ssd_area *ssd_area;
+	int ccode;
+	int ret;
+	int i;
+	int mask;
 
+	page = get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	if (!page)
+		return -ENOMEM;
+	ssd_area = (struct chsc_ssd_area *) page;
 	ssd_area->request.length = 0x0010;
 	ssd_area->request.code = 0x0004;
-
-	ssd_area->ssid = sch->schid.ssid;
-	ssd_area->f_sch = sch->schid.sch_no;
-	ssd_area->l_sch = sch->schid.sch_no;
+	ssd_area->ssid = schid.ssid;
+	ssd_area->f_sch = schid.sch_no;
+	ssd_area->l_sch = schid.sch_no;
 
 	ccode = chsc(ssd_area);
+	/* Check response. */
 	if (ccode > 0) {
-		pr_debug("chsc returned with ccode = %d\n", ccode);
-		return (ccode == 3) ? -ENODEV : -EBUSY;
+		ret = (ccode == 3) ? -ENODEV : -EBUSY;
+		goto out_free;
 	}
-
-	switch (ssd_area->response.code) {
-	case 0x0001: /* everything ok */
-		break;
-	case 0x0002:
-		CIO_CRW_EVENT(2, "Invalid command!\n");
-		return -EINVAL;
-	case 0x0003:
-		CIO_CRW_EVENT(2, "Error in chsc request block!\n");
-		return -EINVAL;
-	case 0x0004:
-		CIO_CRW_EVENT(2, "Model does not provide ssd\n");
-		return -EOPNOTSUPP;
-	default:
-		CIO_CRW_EVENT(2, "Unknown CHSC response %d\n",
+	if (ssd_area->response.code != 0x0001) {
+		CIO_MSG_EVENT(2, "chsc: ssd failed for 0.%x.%04x (rc=%04x)\n",
+			      schid.ssid, schid.sch_no,
 			      ssd_area->response.code);
-		return -EIO;
+		ret = -EIO;
+		goto out_free;
 	}
-
-	/*
-	 * ssd_area->st stores the type of the detected
-	 * subchannel, with the following definitions:
-	 *
-	 * 0: I/O subchannel:	  All fields have meaning
-	 * 1: CHSC subchannel:	  Only sch_val, st and sch
-	 *			  have meaning
-	 * 2: Message subchannel: All fields except unit_addr
-	 *			  have meaning
-	 * 3: ADM subchannel:	  Only sch_val, st and sch
-	 *			  have meaning
-	 *
-	 * Other types are currently undefined.
-	 */
-	if (ssd_area->st > 3) { /* uhm, that looks strange... */
-		CIO_CRW_EVENT(0, "Strange subchannel type %d"
-			      " for sch 0.%x.%04x\n", ssd_area->st,
-			      sch->schid.ssid, sch->schid.sch_no);
-		/*
-		 * There may have been a new subchannel type defined in the
-		 * time since this code was written; since we don't know which
-		 * fields have meaning and what to do with it we just jump out
-		 */
-		return 0;
-	} else {
-		const char *type[4] = {"I/O", "chsc", "message", "ADM"};
-		CIO_CRW_EVENT(6, "ssd: sch 0.%x.%04x is %s subchannel\n",
-			      sch->schid.ssid, sch->schid.sch_no,
-			      type[ssd_area->st]);
-
-		sch->ssd_info.valid = 1;
-		sch->ssd_info.type = ssd_area->st;
+	if (!ssd_area->sch_valid) {
+		ret = -ENODEV;
+		goto out_free;
 	}
-
-	if (ssd_area->st == 0 || ssd_area->st == 2) {
-		for (j = 0; j < 8; j++) {
-			if (!((0x80 >> j) & ssd_area->path_mask &
-			      ssd_area->fla_valid_mask))
-				continue;
-			sch->ssd_info.chpid[j] = ssd_area->chpid[j];
-			sch->ssd_info.fla[j]   = ssd_area->fla[j];
+	/* Copy data */
+	ret = 0;
+	memset(ssd, 0, sizeof(struct chsc_ssd_info));
+	if ((ssd_area->st != 0) && (ssd_area->st != 2))
+		goto out_free;
+	ssd->path_mask = ssd_area->path_mask;
+	ssd->fla_valid_mask = ssd_area->fla_valid_mask;
+	for (i = 0; i < 8; i++) {
+		mask = 0x80 >> i;
+		if (ssd_area->path_mask & mask) {
+			chp_id_init(&ssd->chpid[i]);
+			ssd->chpid[i].id = ssd_area->chpid[i];
 		}
+		if (ssd_area->fla_valid_mask & mask)
+			ssd->fla[i] = ssd_area->fla[i];
 	}
-	return 0;
+out_free:
+	free_page(page);
+	return ret;
 }
 
-int
-css_get_ssd_info(struct subchannel *sch)
+static int check_for_io_on_path(struct subchannel *sch, int mask)
 {
-	int ret;
-	void *page;
+	int cc;
 
-	page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
-	if (!page)
-		return -ENOMEM;
-	spin_lock_irq(sch->lock);
-	ret = chsc_get_sch_desc_irq(sch, page);
-	if (ret) {
-		static int cio_chsc_err_msg;
-		
-		if (!cio_chsc_err_msg) {
-			printk(KERN_ERR
-			       "chsc_get_sch_descriptions:"
-			       " Error %d while doing chsc; "
-			       "processing some machine checks may "
-			       "not work\n", ret);
-			cio_chsc_err_msg = 1;
-		}
-	}
-	spin_unlock_irq(sch->lock);
-	free_page((unsigned long)page);
-	if (!ret) {
-		int j, chpid, mask;
-		/* Allocate channel path structures, if needed. */
-		for (j = 0; j < 8; j++) {
-			mask = 0x80 >> j;
-			chpid = sch->ssd_info.chpid[j];
-			if ((sch->schib.pmcw.pim & mask) &&
-			    (get_chp_status(chpid) < 0))
-			    new_channel_path(chpid);
-		}
+	cc = stsch(sch->schid, &sch->schib);
+	if (cc)
+		return 0;
+	if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == mask)
+		return 1;
+	return 0;
+}
+
+static void terminate_internal_io(struct subchannel *sch)
+{
+	if (cio_clear(sch)) {
+		/* Recheck device in case clear failed. */
+		sch->lpm = 0;
+		if (device_trigger_verify(sch) != 0)
+			css_schedule_eval(sch->schid);
+		return;
 	}
-	return ret;
+	/* Request retry of internal operation. */
+	device_set_intretry(sch);
+	/* Call handler. */
+	if (sch->driver && sch->driver->termination)
+		sch->driver->termination(&sch->dev);
 }
 
 static int
@@ -219,7 +141,7 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 	int j;
 	int mask;
 	struct subchannel *sch;
-	struct channel_path *chpid;
+	struct chp_id *chpid;
 	struct schib schib;
 
 	sch = to_subchannel(dev);
@@ -243,106 +165,50 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 	if (sch->schib.pmcw.pim == 0x80)
 		goto out_unreg;
 
-	if ((sch->schib.scsw.actl & SCSW_ACTL_DEVACT) &&
-	    (sch->schib.scsw.actl & SCSW_ACTL_SCHACT) &&
-	    (sch->schib.pmcw.lpum == mask)) {
-		int cc;
-
-		cc = cio_clear(sch);
-		if (cc == -ENODEV)
+	if (check_for_io_on_path(sch, mask)) {
+		if (device_is_online(sch))
+			device_kill_io(sch);
+		else {
+			terminate_internal_io(sch);
+			/* Re-start path verification. */
+			if (sch->driver && sch->driver->verify)
+				sch->driver->verify(&sch->dev);
+		}
+	} else {
+		/* trigger path verification. */
+		if (sch->driver && sch->driver->verify)
+			sch->driver->verify(&sch->dev);
+		else if (sch->lpm == mask)
 			goto out_unreg;
-		/* Request retry of internal operation. */
-		device_set_intretry(sch);
-		/* Call handler. */
-		if (sch->driver && sch->driver->termination)
-			sch->driver->termination(&sch->dev);
-		goto out_unlock;
 	}
 
-	/* trigger path verification. */
-	if (sch->driver && sch->driver->verify)
-		sch->driver->verify(&sch->dev);
-	else if (sch->lpm == mask)
-		goto out_unreg;
-out_unlock:
 	spin_unlock_irq(sch->lock);
 	return 0;
+
 out_unreg:
-	spin_unlock_irq(sch->lock);
 	sch->lpm = 0;
-	if (css_enqueue_subchannel_slow(sch->schid)) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-	}
+	spin_unlock_irq(sch->lock);
+	css_schedule_eval(sch->schid);
 	return 0;
 }
 
-static void
-s390_set_chpid_offline( __u8 chpid)
+void chsc_chp_offline(struct chp_id chpid)
 {
 	char dbf_txt[15];
-	struct device *dev;
 
-	sprintf(dbf_txt, "chpr%x", chpid);
+	sprintf(dbf_txt, "chpr%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
-	if (get_chp_status(chpid) <= 0)
+	if (chp_get_status(chpid) <= 0)
 		return;
-	dev = get_device(&css[0]->chps[chpid]->dev);
-	bus_for_each_dev(&css_bus_type, NULL, to_channelpath(dev),
+	bus_for_each_dev(&css_bus_type, NULL, &chpid,
 			 s390_subchannel_remove_chpid);
-
-	if (need_rescan || css_slow_subchannels_exist())
-		queue_work(slow_path_wq, &slow_path_work);
-	put_device(dev);
-}
-
-struct res_acc_data {
-	struct channel_path *chp;
-	u32 fla_mask;
-	u16 fla;
-};
-
-static int
-s390_process_res_acc_sch(struct res_acc_data *res_data, struct subchannel *sch)
-{
-	int found;
-	int chp;
-	int ccode;
-	
-	found = 0;
-	for (chp = 0; chp <= 7; chp++)
-		/*
-		 * check if chpid is in information updated by ssd
-		 */
-		if (sch->ssd_info.valid &&
-		    sch->ssd_info.chpid[chp] == res_data->chp->id &&
-		    (sch->ssd_info.fla[chp] & res_data->fla_mask)
-		    == res_data->fla) {
-			found = 1;
-			break;
-		}
-	
-	if (found == 0)
-		return 0;
-
-	/*
-	 * Do a stsch to update our subchannel structure with the
-	 * new path information and eventually check for logically
-	 * offline chpids.
-	 */
-	ccode = stsch(sch->schid, &sch->schib);
-	if (ccode > 0)
-		return 0;
-
-	return 0x80 >> chp;
 }
 
 static int
 s390_process_res_acc_new_sch(struct subchannel_id schid)
 {
 	struct schib schib;
-	int ret;
 	/*
 	 * We don't know the device yet, but since a path
 	 * may be available now to the device we'll have
@@ -353,14 +219,35 @@ s390_process_res_acc_new_sch(struct subchannel_id schid)
 	 */
 	if (stsch_err(schid, &schib))
 		/* We're through */
-		return need_rescan ? -EAGAIN : -ENXIO;
+		return -ENXIO;
 
 	/* Put it on the slow path. */
-	ret = css_enqueue_subchannel_slow(schid);
-	if (ret) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-		return -EAGAIN;
+	css_schedule_eval(schid);
+	return 0;
+}
+
+struct res_acc_data {
+	struct chp_id chpid;
+	u32 fla_mask;
+	u16 fla;
+};
+
+static int get_res_chpid_mask(struct chsc_ssd_info *ssd,
+			      struct res_acc_data *data)
+{
+	int i;
+	int mask;
+
+	for (i = 0; i < 8; i++) {
+		mask = 0x80 >> i;
+		if (!(ssd->path_mask & mask))
+			continue;
+		if (!chp_id_is_equal(&ssd->chpid[i], &data->chpid))
+			continue;
+		if ((ssd->fla_valid_mask & mask) &&
+		    ((ssd->fla[i] & data->fla_mask) != data->fla))
+			continue;
+		return mask;
 	}
 	return 0;
 }
@@ -379,14 +266,11 @@ __s390_process_res_acc(struct subchannel_id schid, void *data)
 		return s390_process_res_acc_new_sch(schid);
 
 	spin_lock_irq(sch->lock);
-
-	chp_mask = s390_process_res_acc_sch(res_data, sch);
-
-	if (chp_mask == 0) {
-		spin_unlock_irq(sch->lock);
-		put_device(&sch->dev);
-		return 0;
-	}
+	chp_mask = get_res_chpid_mask(&sch->ssd_info, res_data);
+	if (chp_mask == 0)
+		goto out;
+	if (stsch(sch->schid, &sch->schib))
+		goto out;
 	old_lpm = sch->lpm;
 	sch->lpm = ((sch->schib.pmcw.pim &
 		     sch->schib.pmcw.pam &
@@ -396,20 +280,18 @@ __s390_process_res_acc(struct subchannel_id schid, void *data)
 		device_trigger_reprobe(sch);
 	else if (sch->driver && sch->driver->verify)
 		sch->driver->verify(&sch->dev);
-
+out:
 	spin_unlock_irq(sch->lock);
 	put_device(&sch->dev);
 	return 0;
 }
 
-
-static int
-s390_process_res_acc (struct res_acc_data *res_data)
+static void s390_process_res_acc (struct res_acc_data *res_data)
 {
-	int rc;
 	char dbf_txt[15];
 
-	sprintf(dbf_txt, "accpr%x", res_data->chp->id);
+	sprintf(dbf_txt, "accpr%x.%02x", res_data->chpid.cssid,
+		res_data->chpid.id);
 	CIO_TRACE_EVENT( 2, dbf_txt);
 	if (res_data->fla != 0) {
 		sprintf(dbf_txt, "fla%x", res_data->fla);
@@ -423,12 +305,7 @@ s390_process_res_acc (struct res_acc_data *res_data)
 	 * The more information we have (info), the less scanning
 	 * will we have to do.
 	 */
-	rc = for_each_subchannel(__s390_process_res_acc, res_data);
-	if (css_slow_subchannels_exist())
-		rc = -EAGAIN;
-	else if (rc != -EAGAIN)
-		rc = 0;
-	return rc;
+	for_each_subchannel(__s390_process_res_acc, res_data);
 }
 
 static int
@@ -480,43 +357,45 @@ struct chsc_sei_area {
 	/* ccdf has to be big enough for a link-incident record */
 } __attribute__ ((packed));
 
-static int chsc_process_sei_link_incident(struct chsc_sei_area *sei_area)
+static void chsc_process_sei_link_incident(struct chsc_sei_area *sei_area)
 {
-	int chpid;
+	struct chp_id chpid;
+	int id;
 
 	CIO_CRW_EVENT(4, "chsc: link incident (rs=%02x, rs_id=%04x)\n",
 		      sei_area->rs, sei_area->rsid);
 	if (sei_area->rs != 4)
-		return 0;
-	chpid = __get_chpid_from_lir(sei_area->ccdf);
-	if (chpid < 0)
+		return;
+	id = __get_chpid_from_lir(sei_area->ccdf);
+	if (id < 0)
 		CIO_CRW_EVENT(4, "chsc: link incident - invalid LIR\n");
-	else
-		s390_set_chpid_offline(chpid);
-
-	return 0;
+	else {
+		chp_id_init(&chpid);
+		chpid.id = id;
+		chsc_chp_offline(chpid);
+	}
 }
 
-static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
+static void chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 {
 	struct res_acc_data res_data;
-	struct device *dev;
+	struct chp_id chpid;
 	int status;
-	int rc;
 
 	CIO_CRW_EVENT(4, "chsc: resource accessibility event (rs=%02x, "
 		      "rs_id=%04x)\n", sei_area->rs, sei_area->rsid);
 	if (sei_area->rs != 4)
-		return 0;
+		return;
+	chp_id_init(&chpid);
+	chpid.id = sei_area->rsid;
 	/* allocate a new channel path structure, if needed */
-	status = get_chp_status(sei_area->rsid);
+	status = chp_get_status(chpid);
 	if (status < 0)
-		new_channel_path(sei_area->rsid);
+		chp_new(chpid);
 	else if (!status)
-		return 0;
-	dev = get_device(&css[0]->chps[sei_area->rsid]->dev);
+		return;
 	memset(&res_data, 0, sizeof(struct res_acc_data));
-	res_data.chp = to_channelpath(dev);
+	res_data.chpid = chpid;
 	if ((sei_area->vf & 0xc0) != 0) {
 		res_data.fla = sei_area->fla;
 		if ((sei_area->vf & 0xc0) == 0xc0)
@@ -526,51 +405,82 @@ static int chsc_process_sei_res_acc(struct chsc_sei_area *sei_area)
 			/* link address */
 			res_data.fla_mask = 0xff00;
 	}
-	rc = s390_process_res_acc(&res_data);
-	put_device(dev);
-
-	return rc;
+	s390_process_res_acc(&res_data);
 }
 
-static int chsc_process_sei(struct chsc_sei_area *sei_area)
+struct chp_config_data {
+	u8 map[32];
+	u8 op;
+	u8 pc;
+};
+
+static void chsc_process_sei_chp_config(struct chsc_sei_area *sei_area)
 {
-	int rc;
+	struct chp_config_data *data;
+	struct chp_id chpid;
+	int num;
+
+	CIO_CRW_EVENT(4, "chsc: channel-path-configuration notification\n");
+	if (sei_area->rs != 0)
+		return;
+	data = (struct chp_config_data *) &(sei_area->ccdf);
+	chp_id_init(&chpid);
+	for (num = 0; num <= __MAX_CHPID; num++) {
+		if (!chp_test_bit(data->map, num))
+			continue;
+		chpid.id = num;
+		printk(KERN_WARNING "cio: processing configure event %d for "
+		       "chpid %x.%02x\n", data->op, chpid.cssid, chpid.id);
+		switch (data->op) {
+		case 0:
+			chp_cfg_schedule(chpid, 1);
+			break;
+		case 1:
+			chp_cfg_schedule(chpid, 0);
+			break;
+		case 2:
+			chp_cfg_cancel_deconfigure(chpid);
+			break;
+		}
+	}
+}
 
+static void chsc_process_sei(struct chsc_sei_area *sei_area)
+{
 	/* Check if we might have lost some information. */
-	if (sei_area->flags & 0x40)
+	if (sei_area->flags & 0x40) {
 		CIO_CRW_EVENT(2, "chsc: event overflow\n");
+		css_schedule_eval_all();
+	}
 	/* which kind of information was stored? */
-	rc = 0;
 	switch (sei_area->cc) {
 	case 1: /* link incident*/
-		rc = chsc_process_sei_link_incident(sei_area);
+		chsc_process_sei_link_incident(sei_area);
 		break;
 	case 2: /* i/o resource accessibiliy */
-		rc = chsc_process_sei_res_acc(sei_area);
+		chsc_process_sei_res_acc(sei_area);
+		break;
+	case 8: /* channel-path-configuration notification */
+		chsc_process_sei_chp_config(sei_area);
 		break;
 	default: /* other stuff */
 		CIO_CRW_EVENT(4, "chsc: unhandled sei content code %d\n",
 			      sei_area->cc);
 		break;
 	}
-
-	return rc;
 }
 
-int chsc_process_crw(void)
+void chsc_process_crw(void)
 {
 	struct chsc_sei_area *sei_area;
-	int ret;
-	int rc;
 
 	if (!sei_page)
-		return 0;
+		return;
 	/* Access to sei_page is serialized through machine check handler
 	 * thread, so no need for locking. */
 	sei_area = sei_page;
 
 	CIO_TRACE_EVENT( 2, "prcss");
-	ret = 0;
 	do {
 		memset(sei_area, 0, sizeof(*sei_area));
 		sei_area->request.length = 0x0010;
@@ -580,37 +490,26 @@ int chsc_process_crw(void)
 
 		if (sei_area->response.code == 0x0001) {
 			CIO_CRW_EVENT(4, "chsc: sei successful\n");
-			rc = chsc_process_sei(sei_area);
-			if (rc)
-				ret = rc;
+			chsc_process_sei(sei_area);
 		} else {
 			CIO_CRW_EVENT(2, "chsc: sei failed (rc=%04x)\n",
 				      sei_area->response.code);
-			ret = 0;
 			break;
 		}
 	} while (sei_area->flags & 0x80);
-
-	return ret;
 }
 
 static int
 __chp_add_new_sch(struct subchannel_id schid)
 {
 	struct schib schib;
-	int ret;
 
 	if (stsch_err(schid, &schib))
 		/* We're through */
-		return need_rescan ? -EAGAIN : -ENXIO;
+		return -ENXIO;
 
 	/* Put it on the slow path. */
-	ret = css_enqueue_subchannel_slow(schid);
-	if (ret) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-		return -EAGAIN;
-	}
+	css_schedule_eval(schid);
 	return 0;
 }
 
@@ -619,10 +518,10 @@ static int
 __chp_add(struct subchannel_id schid, void *data)
 {
 	int i, mask;
-	struct channel_path *chp;
+	struct chp_id *chpid;
 	struct subchannel *sch;
 
-	chp = data;
+	chpid = data;
 	sch = get_subchannel_by_schid(schid);
 	if (!sch)
 		/* Check if the subchannel is now available. */
@@ -631,7 +530,7 @@ __chp_add(struct subchannel_id schid, void *data)
 	for (i=0; i<8; i++) {
 		mask = 0x80 >> i;
 		if ((sch->schib.pmcw.pim & mask) &&
-		    (sch->schib.pmcw.chpid[i] == chp->id)) {
+		    (sch->schib.pmcw.chpid[i] == chpid->id)) {
 			if (stsch(sch->schid, &sch->schib) != 0) {
 				/* Endgame. */
 				spin_unlock_irq(sch->lock);
@@ -657,122 +556,58 @@ __chp_add(struct subchannel_id schid, void *data)
 	return 0;
 }
 
-static int
-chp_add(int chpid)
+void chsc_chp_online(struct chp_id chpid)
 {
-	int rc;
 	char dbf_txt[15];
-	struct device *dev;
 
-	if (!get_chp_status(chpid))
-		return 0; /* no need to do the rest */
-	
-	sprintf(dbf_txt, "cadd%x", chpid);
+	sprintf(dbf_txt, "cadd%x.%02x", chpid.cssid, chpid.id);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
-	dev = get_device(&css[0]->chps[chpid]->dev);
-	rc = for_each_subchannel(__chp_add, to_channelpath(dev));
-	if (css_slow_subchannels_exist())
-		rc = -EAGAIN;
-	if (rc != -EAGAIN)
-		rc = 0;
-	put_device(dev);
-	return rc;
+	if (chp_get_status(chpid) != 0)
+		for_each_subchannel(__chp_add, &chpid);
 }
 
-/* 
- * Handling of crw machine checks with channel path source.
- */
-int
-chp_process_crw(int chpid, int on)
-{
-	if (on == 0) {
-		/* Path has gone. We use the link incident routine.*/
-		s390_set_chpid_offline(chpid);
-		return 0; /* De-register is async anyway. */
-	}
-	/*
-	 * Path has come. Allocate a new channel path structure,
-	 * if needed.
-	 */
-	if (get_chp_status(chpid) < 0)
-		new_channel_path(chpid);
-	/* Avoid the extra overhead in process_rec_acc. */
-	return chp_add(chpid);
-}
-
-static int check_for_io_on_path(struct subchannel *sch, int index)
-{
-	int cc;
-
-	cc = stsch(sch->schid, &sch->schib);
-	if (cc)
-		return 0;
-	if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index))
-		return 1;
-	return 0;
-}
-
-static void terminate_internal_io(struct subchannel *sch)
-{
-	if (cio_clear(sch)) {
-		/* Recheck device in case clear failed. */
-		sch->lpm = 0;
-		if (device_trigger_verify(sch) != 0) {
-			if(css_enqueue_subchannel_slow(sch->schid)) {
-				css_clear_subchannel_slow_list();
-				need_rescan = 1;
-			}
-		}
-		return;
-	}
-	/* Request retry of internal operation. */
-	device_set_intretry(sch);
-	/* Call handler. */
-	if (sch->driver && sch->driver->termination)
-		sch->driver->termination(&sch->dev);
-}
-
-static void
-__s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on)
+static void __s390_subchannel_vary_chpid(struct subchannel *sch,
+					 struct chp_id chpid, int on)
 {
 	int chp, old_lpm;
+	int mask;
 	unsigned long flags;
 
-	if (!sch->ssd_info.valid)
-		return;
-	
 	spin_lock_irqsave(sch->lock, flags);
 	old_lpm = sch->lpm;
 	for (chp = 0; chp < 8; chp++) {
-		if (sch->ssd_info.chpid[chp] != chpid)
+		mask = 0x80 >> chp;
+		if (!(sch->ssd_info.path_mask & mask))
+			continue;
+		if (!chp_id_is_equal(&sch->ssd_info.chpid[chp], &chpid))
 			continue;
 
 		if (on) {
-			sch->opm |= (0x80 >> chp);
-			sch->lpm |= (0x80 >> chp);
+			sch->opm |= mask;
+			sch->lpm |= mask;
 			if (!old_lpm)
 				device_trigger_reprobe(sch);
 			else if (sch->driver && sch->driver->verify)
 				sch->driver->verify(&sch->dev);
 			break;
 		}
-		sch->opm &= ~(0x80 >> chp);
-		sch->lpm &= ~(0x80 >> chp);
-		if (check_for_io_on_path(sch, chp)) {
+		sch->opm &= ~mask;
+		sch->lpm &= ~mask;
+		if (check_for_io_on_path(sch, mask)) {
 			if (device_is_online(sch))
 				/* Path verification is done after killing. */
 				device_kill_io(sch);
-			else
+			else {
 				/* Kill and retry internal I/O. */
 				terminate_internal_io(sch);
-		} else if (!sch->lpm) {
-			if (device_trigger_verify(sch) != 0) {
-				if (css_enqueue_subchannel_slow(sch->schid)) {
-					css_clear_subchannel_slow_list();
-					need_rescan = 1;
-				}
+				/* Re-start path verification. */
+				if (sch->driver && sch->driver->verify)
+					sch->driver->verify(&sch->dev);
 			}
+		} else if (!sch->lpm) {
+			if (device_trigger_verify(sch) != 0)
+				css_schedule_eval(sch->schid);
 		} else if (sch->driver && sch->driver->verify)
 			sch->driver->verify(&sch->dev);
 		break;
@@ -780,11 +615,10 @@ __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on)
 	spin_unlock_irqrestore(sch->lock, flags);
 }
 
-static int
-s390_subchannel_vary_chpid_off(struct device *dev, void *data)
+static int s390_subchannel_vary_chpid_off(struct device *dev, void *data)
 {
 	struct subchannel *sch;
-	__u8 *chpid;
+	struct chp_id *chpid;
 
 	sch = to_subchannel(dev);
 	chpid = data;
@@ -793,11 +627,10 @@ s390_subchannel_vary_chpid_off(struct device *dev, void *data)
 	return 0;
 }
 
-static int
-s390_subchannel_vary_chpid_on(struct device *dev, void *data)
+static int s390_subchannel_vary_chpid_on(struct device *dev, void *data)
 {
 	struct subchannel *sch;
-	__u8 *chpid;
+	struct chp_id *chpid;
 
 	sch = to_subchannel(dev);
 	chpid = data;
@@ -821,40 +654,17 @@ __s390_vary_chpid_on(struct subchannel_id schid, void *data)
 		/* We're through */
 		return -ENXIO;
 	/* Put it on the slow path. */
-	if (css_enqueue_subchannel_slow(schid)) {
-		css_clear_subchannel_slow_list();
-		need_rescan = 1;
-		return -EAGAIN;
-	}
+	css_schedule_eval(schid);
 	return 0;
 }
 
-/*
- * Function: s390_vary_chpid
- * Varies the specified chpid online or offline
+/**
+ * chsc_chp_vary - propagate channel-path vary operation to subchannels
+ * @chpid: channl-path ID
+ * @on: non-zero for vary online, zero for vary offline
  */
-static int
-s390_vary_chpid( __u8 chpid, int on)
+int chsc_chp_vary(struct chp_id chpid, int on)
 {
-	char dbf_text[15];
-	int status;
-
-	sprintf(dbf_text, on?"varyon%x":"varyoff%x", chpid);
-	CIO_TRACE_EVENT( 2, dbf_text);
-
-	status = get_chp_status(chpid);
-	if (status < 0) {
-		printk(KERN_ERR "Can't vary unknown chpid %02X\n", chpid);
-		return -EINVAL;
-	}
-
-	if (!on && !status) {
-		printk(KERN_ERR "chpid %x is already offline\n", chpid);
-		return -EINVAL;
-	}
-
-	set_chp_logically_online(chpid, on);
-
 	/*
 	 * Redo PathVerification on the devices the chpid connects to
 	 */
@@ -865,118 +675,9 @@ s390_vary_chpid( __u8 chpid, int on)
 	if (on)
 		/* Scan for new devices on varied on path. */
 		for_each_subchannel(__s390_vary_chpid_on, NULL);
-	if (need_rescan || css_slow_subchannels_exist())
-		queue_work(slow_path_wq, &slow_path_work);
 	return 0;
 }
 
-/*
- * Channel measurement related functions
- */
-static ssize_t
-chp_measurement_chars_read(struct kobject *kobj, char *buf, loff_t off,
-			   size_t count)
-{
-	struct channel_path *chp;
-	unsigned int size;
-
-	chp = to_channelpath(container_of(kobj, struct device, kobj));
-	if (!chp->cmg_chars)
-		return 0;
-
-	size = sizeof(struct cmg_chars);
-
-	if (off > size)
-		return 0;
-	if (off + count > size)
-		count = size - off;
-	memcpy(buf, chp->cmg_chars + off, count);
-	return count;
-}
-
-static struct bin_attribute chp_measurement_chars_attr = {
-	.attr = {
-		.name = "measurement_chars",
-		.mode = S_IRUSR,
-		.owner = THIS_MODULE,
-	},
-	.size = sizeof(struct cmg_chars),
-	.read = chp_measurement_chars_read,
-};
-
-static void
-chp_measurement_copy_block(struct cmg_entry *buf,
-			   struct channel_subsystem *css, int chpid)
-{
-	void *area;
-	struct cmg_entry *entry, reference_buf;
-	int idx;
-
-	if (chpid < 128) {
-		area = css->cub_addr1;
-		idx = chpid;
-	} else {
-		area = css->cub_addr2;
-		idx = chpid - 128;
-	}
-	entry = area + (idx * sizeof(struct cmg_entry));
-	do {
-		memcpy(buf, entry, sizeof(*entry));
-		memcpy(&reference_buf, entry, sizeof(*entry));
-	} while (reference_buf.values[0] != buf->values[0]);
-}
-
-static ssize_t
-chp_measurement_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
-{
-	struct channel_path *chp;
-	struct channel_subsystem *css;
-	unsigned int size;
-
-	chp = to_channelpath(container_of(kobj, struct device, kobj));
-	css = to_css(chp->dev.parent);
-
-	size = sizeof(struct cmg_entry);
-
-	/* Only allow single reads. */
-	if (off || count < size)
-		return 0;
-	chp_measurement_copy_block((struct cmg_entry *)buf, css, chp->id);
-	count = size;
-	return count;
-}
-
-static struct bin_attribute chp_measurement_attr = {
-	.attr = {
-		.name = "measurement",
-		.mode = S_IRUSR,
-		.owner = THIS_MODULE,
-	},
-	.size = sizeof(struct cmg_entry),
-	.read = chp_measurement_read,
-};
-
-static void
-chsc_remove_chp_cmg_attr(struct channel_path *chp)
-{
-	device_remove_bin_file(&chp->dev, &chp_measurement_chars_attr);
-	device_remove_bin_file(&chp->dev, &chp_measurement_attr);
-}
-
-static int
-chsc_add_chp_cmg_attr(struct channel_path *chp)
-{
-	int ret;
-
-	ret = device_create_bin_file(&chp->dev, &chp_measurement_chars_attr);
-	if (ret)
-		return ret;
-	ret = device_create_bin_file(&chp->dev, &chp_measurement_attr);
-	if (ret)
-		device_remove_bin_file(&chp->dev, &chp_measurement_chars_attr);
-	return ret;
-}
-
 static void
 chsc_remove_cmg_attr(struct channel_subsystem *css)
 {
@@ -985,7 +686,7 @@ chsc_remove_cmg_attr(struct channel_subsystem *css)
 	for (i = 0; i <= __MAX_CHPID; i++) {
 		if (!css->chps[i])
 			continue;
-		chsc_remove_chp_cmg_attr(css->chps[i]);
+		chp_remove_cmg_attr(css->chps[i]);
 	}
 }
 
@@ -998,7 +699,7 @@ chsc_add_cmg_attr(struct channel_subsystem *css)
 	for (i = 0; i <= __MAX_CHPID; i++) {
 		if (!css->chps[i])
 			continue;
-		ret = chsc_add_chp_cmg_attr(css->chps[i]);
+		ret = chp_add_cmg_attr(css->chps[i]);
 		if (ret)
 			goto cleanup;
 	}
@@ -1007,12 +708,11 @@ cleanup:
 	for (--i; i >= 0; i--) {
 		if (!css->chps[i])
 			continue;
-		chsc_remove_chp_cmg_attr(css->chps[i]);
+		chp_remove_cmg_attr(css->chps[i]);
 	}
 	return ret;
 }
 
-
 static int
 __chsc_do_secm(struct channel_subsystem *css, int enable, void *page)
 {
@@ -1118,7 +818,7 @@ chsc_secm(struct channel_subsystem *css, int enable)
 		} else
 			chsc_remove_cmg_attr(css);
 	}
-	if (enable && !css->cm_enabled) {
+	if (!css->cm_enabled) {
 		free_page((unsigned long)css->cub_addr1);
 		free_page((unsigned long)css->cub_addr2);
 	}
@@ -1127,109 +827,8 @@ chsc_secm(struct channel_subsystem *css, int enable)
 	return ret;
 }
 
-/*
- * Files for the channel path entries.
- */
-static ssize_t
-chp_status_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct channel_path *chp = container_of(dev, struct channel_path, dev);
-
-	if (!chp)
-		return 0;
-	return (get_chp_status(chp->id) ? sprintf(buf, "online\n") :
-		sprintf(buf, "offline\n"));
-}
-
-static ssize_t
-chp_status_write(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct channel_path *cp = container_of(dev, struct channel_path, dev);
-	char cmd[10];
-	int num_args;
-	int error;
-
-	num_args = sscanf(buf, "%5s", cmd);
-	if (!num_args)
-		return count;
-
-	if (!strnicmp(cmd, "on", 2))
-		error = s390_vary_chpid(cp->id, 1);
-	else if (!strnicmp(cmd, "off", 3))
-		error = s390_vary_chpid(cp->id, 0);
-	else
-		error = -EINVAL;
-
-	return error < 0 ? error : count;
-
-}
-
-static DEVICE_ATTR(status, 0644, chp_status_show, chp_status_write);
-
-static ssize_t
-chp_type_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct channel_path *chp = container_of(dev, struct channel_path, dev);
-
-	if (!chp)
-		return 0;
-	return sprintf(buf, "%x\n", chp->desc.desc);
-}
-
-static DEVICE_ATTR(type, 0444, chp_type_show, NULL);
-
-static ssize_t
-chp_cmg_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct channel_path *chp = to_channelpath(dev);
-
-	if (!chp)
-		return 0;
-	if (chp->cmg == -1) /* channel measurements not available */
-		return sprintf(buf, "unknown\n");
-	return sprintf(buf, "%x\n", chp->cmg);
-}
-
-static DEVICE_ATTR(cmg, 0444, chp_cmg_show, NULL);
-
-static ssize_t
-chp_shared_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct channel_path *chp = to_channelpath(dev);
-
-	if (!chp)
-		return 0;
-	if (chp->shared == -1) /* channel measurements not available */
-		return sprintf(buf, "unknown\n");
-	return sprintf(buf, "%x\n", chp->shared);
-}
-
-static DEVICE_ATTR(shared, 0444, chp_shared_show, NULL);
-
-static struct attribute * chp_attrs[] = {
-	&dev_attr_status.attr,
-	&dev_attr_type.attr,
-	&dev_attr_cmg.attr,
-	&dev_attr_shared.attr,
-	NULL,
-};
-
-static struct attribute_group chp_attr_group = {
-	.attrs = chp_attrs,
-};
-
-static void
-chp_release(struct device *dev)
-{
-	struct channel_path *cp;
-	
-	cp = container_of(dev, struct channel_path, dev);
-	kfree(cp);
-}
-
-static int
-chsc_determine_channel_path_description(int chpid,
-					struct channel_path_desc *desc)
+int chsc_determine_channel_path_description(struct chp_id chpid,
+					    struct channel_path_desc *desc)
 {
 	int ccode, ret;
 
@@ -1252,8 +851,8 @@ chsc_determine_channel_path_description(int chpid,
 	scpd_area->request.length = 0x0010;
 	scpd_area->request.code = 0x0002;
 
-	scpd_area->first_chpid = chpid;
-	scpd_area->last_chpid = chpid;
+	scpd_area->first_chpid = chpid.id;
+	scpd_area->last_chpid = chpid.id;
 
 	ccode = chsc(scpd_area);
 	if (ccode > 0) {
@@ -1316,8 +915,7 @@ chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
 	}
 }
 
-static int
-chsc_get_channel_measurement_chars(struct channel_path *chp)
+int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
 	int ccode, ret;
 
@@ -1349,8 +947,8 @@ chsc_get_channel_measurement_chars(struct channel_path *chp)
 	scmc_area->request.length = 0x0010;
 	scmc_area->request.code = 0x0022;
 
-	scmc_area->first_chpid = chp->id;
-	scmc_area->last_chpid = chp->id;
+	scmc_area->first_chpid = chp->chpid.id;
+	scmc_area->last_chpid = chp->chpid.id;
 
 	ccode = chsc(scmc_area);
 	if (ccode > 0) {
@@ -1392,94 +990,6 @@ out:
 	return ret;
 }
 
-/*
- * Entries for chpids on the system bus.
- * This replaces /proc/chpids.
- */
-static int
-new_channel_path(int chpid)
-{
-	struct channel_path *chp;
-	int ret;
-
-	chp = kzalloc(sizeof(struct channel_path), GFP_KERNEL);
-	if (!chp)
-		return -ENOMEM;
-
-	/* fill in status, etc. */
-	chp->id = chpid;
-	chp->state = 1;
-	chp->dev.parent = &css[0]->device;
-	chp->dev.release = chp_release;
-	snprintf(chp->dev.bus_id, BUS_ID_SIZE, "chp0.%x", chpid);
-
-	/* Obtain channel path description and fill it in. */
-	ret = chsc_determine_channel_path_description(chpid, &chp->desc);
-	if (ret)
-		goto out_free;
-	/* Get channel-measurement characteristics. */
-	if (css_characteristics_avail && css_chsc_characteristics.scmc
-	    && css_chsc_characteristics.secm) {
-		ret = chsc_get_channel_measurement_chars(chp);
-		if (ret)
-			goto out_free;
-	} else {
-		static int msg_done;
-
-		if (!msg_done) {
-			printk(KERN_WARNING "cio: Channel measurements not "
-			       "available, continuing.\n");
-			msg_done = 1;
-		}
-		chp->cmg = -1;
-	}
-
-	/* make it known to the system */
-	ret = device_register(&chp->dev);
-	if (ret) {
-		printk(KERN_WARNING "%s: could not register %02x\n",
-		       __func__, chpid);
-		goto out_free;
-	}
-	ret = sysfs_create_group(&chp->dev.kobj, &chp_attr_group);
-	if (ret) {
-		device_unregister(&chp->dev);
-		goto out_free;
-	}
-	mutex_lock(&css[0]->mutex);
-	if (css[0]->cm_enabled) {
-		ret = chsc_add_chp_cmg_attr(chp);
-		if (ret) {
-			sysfs_remove_group(&chp->dev.kobj, &chp_attr_group);
-			device_unregister(&chp->dev);
-			mutex_unlock(&css[0]->mutex);
-			goto out_free;
-		}
-	}
-	css[0]->chps[chpid] = chp;
-	mutex_unlock(&css[0]->mutex);
-	return ret;
-out_free:
-	kfree(chp);
-	return ret;
-}
-
-void *
-chsc_get_chp_desc(struct subchannel *sch, int chp_no)
-{
-	struct channel_path *chp;
-	struct channel_path_desc *desc;
-
-	chp = css[0]->chps[sch->schib.pmcw.chpid[chp_no]];
-	if (!chp)
-		return NULL;
-	desc = kmalloc(sizeof(struct channel_path_desc), GFP_KERNEL);
-	if (!desc)
-		return NULL;
-	memcpy(desc, &chp->desc, sizeof(struct channel_path_desc));
-	return desc;
-}
-
 static int __init
 chsc_alloc_sei_area(void)
 {
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 0fb2b024208..2ad81d11cf7 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -1,9 +1,10 @@
 #ifndef S390_CHSC_H
 #define S390_CHSC_H
 
-#define CHSC_SEI_ACC_CHPID        1
-#define CHSC_SEI_ACC_LINKADDR     2
-#define CHSC_SEI_ACC_FULLLINKADDR 3
+#include <linux/types.h>
+#include <linux/device.h>
+#include <asm/chpid.h>
+#include "schid.h"
 
 #define CHSC_SDA_OC_MSS   0x2
 
@@ -33,23 +34,9 @@ struct channel_path_desc {
 	u8 chpp;
 } __attribute__ ((packed));
 
-struct channel_path {
-	int id;
-	int state;
-	struct channel_path_desc desc;
-	/* Channel-measurement related stuff: */
-	int cmg;
-	int shared;
-	void *cmg_chars;
-	struct device dev;
-};
+struct channel_path;
 
-extern void s390_process_css( void );
-extern void chsc_validate_chpids(struct subchannel *);
-extern void chpid_is_actually_online(int);
-extern int css_get_ssd_info(struct subchannel *);
-extern int chsc_process_crw(void);
-extern int chp_process_crw(int, int);
+extern void chsc_process_crw(void);
 
 struct css_general_char {
 	u64 : 41;
@@ -82,15 +69,26 @@ struct css_chsc_char {
 extern struct css_general_char css_general_characteristics;
 extern struct css_chsc_char css_chsc_characteristics;
 
+struct chsc_ssd_info {
+	u8 path_mask;
+	u8 fla_valid_mask;
+	struct chp_id chpid[8];
+	u16 fla[8];
+};
+extern int chsc_get_ssd_info(struct subchannel_id schid,
+			     struct chsc_ssd_info *ssd);
 extern int chsc_determine_css_characteristics(void);
 extern int css_characteristics_avail;
 
-extern void *chsc_get_chp_desc(struct subchannel*, int);
-
 extern int chsc_enable_facility(int);
 struct channel_subsystem;
 extern int chsc_secm(struct channel_subsystem *, int);
 
-#define to_channelpath(device) container_of(device, struct channel_path, dev)
+int chsc_chp_vary(struct chp_id chpid, int on);
+int chsc_determine_channel_path_description(struct chp_id chpid,
+					    struct channel_path_desc *desc);
+void chsc_chp_online(struct chp_id chpid);
+void chsc_chp_offline(struct chp_id chpid);
+int chsc_get_channel_measurement_chars(struct channel_path *chp);
 
 #endif
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 9cb129ab5be..ea1defba569 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -22,6 +22,7 @@
 #include <asm/setup.h>
 #include <asm/reset.h>
 #include <asm/ipl.h>
+#include <asm/chpid.h>
 #include "airq.h"
 #include "cio.h"
 #include "css.h"
@@ -29,6 +30,7 @@
 #include "ioasm.h"
 #include "blacklist.h"
 #include "cio_debug.h"
+#include "chp.h"
 #include "../s390mach.h"
 
 debug_info_t *cio_debug_msg_id;
@@ -592,9 +594,10 @@ cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid)
 		err = -ENODEV;
 		goto out;
 	}
-	sch->opm = 0xff;
-	if (!cio_is_console(sch->schid))
-		chsc_validate_chpids(sch);
+	if (cio_is_console(sch->schid))
+		sch->opm = 0xff;
+	else
+		sch->opm = chp_get_sch_opm(sch);
 	sch->lpm = sch->schib.pmcw.pam & sch->opm;
 
 	CIO_DEBUG(KERN_INFO, 0,
@@ -954,6 +957,7 @@ static void css_reset(void)
 {
 	int i, ret;
 	unsigned long long timeout;
+	struct chp_id chpid;
 
 	/* Reset subchannels. */
 	for_each_subchannel(__shutdown_subchannel_easy,  NULL);
@@ -963,8 +967,10 @@ static void css_reset(void)
 	__ctl_set_bit(14, 28);
 	/* Temporarily reenable machine checks. */
 	local_mcck_enable();
+	chp_id_init(&chpid);
 	for (i = 0; i <= __MAX_CHPID; i++) {
-		ret = rchp(i);
+		chpid.id = i;
+		ret = rchp(chpid);
 		if ((ret == 0) || (ret == 2))
 			/*
 			 * rchp either succeeded, or another rchp is already
@@ -1048,37 +1054,19 @@ void reipl_ccw_dev(struct ccw_dev_id *devid)
 	do_reipl_asm(*((__u32*)&schid));
 }
 
-static struct schib __initdata ipl_schib;
-
-/*
- * ipl_save_parameters gets called very early. It is not allowed to access
- * anything in the bss section at all. The bss section is not cleared yet,
- * but may contain some ipl parameters written by the firmware.
- * These parameters (if present) are copied to 0x2000.
- * To avoid corruption of the ipl parameters, all variables used by this
- * function must reside on the stack or in the data section.
- */
-void ipl_save_parameters(void)
+int __init cio_get_iplinfo(struct cio_iplinfo *iplinfo)
 {
 	struct subchannel_id schid;
-	unsigned int *ipl_ptr;
-	void *src, *dst;
+	struct schib schib;
 
 	schid = *(struct subchannel_id *)__LC_SUBCHANNEL_ID;
 	if (!schid.one)
-		return;
-	if (stsch(schid, &ipl_schib))
-		return;
-	if (!ipl_schib.pmcw.dnv)
-		return;
-	ipl_devno = ipl_schib.pmcw.dev;
-	ipl_flags |= IPL_DEVNO_VALID;
-	if (!ipl_schib.pmcw.qf)
-		return;
-	ipl_flags |= IPL_PARMBLOCK_VALID;
-	ipl_ptr = (unsigned int *)__LC_IPL_PARMBLOCK_PTR;
-	src = (void *)(unsigned long)*ipl_ptr;
-	dst = (void *)IPL_PARMBLOCK_ORIGIN;
-	memmove(dst, src, PAGE_SIZE);
-	*ipl_ptr = IPL_PARMBLOCK_ORIGIN;
+		return -ENODEV;
+	if (stsch(schid, &schib))
+		return -ENODEV;
+	if (!schib.pmcw.dnv)
+		return -ENODEV;
+	iplinfo->devno = schib.pmcw.dev;
+	iplinfo->is_qdio = schib.pmcw.qf;
+	return 0;
 }
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 35154a21035..7446c39951a 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -1,18 +1,11 @@
 #ifndef S390_CIO_H
 #define S390_CIO_H
 
-#include "schid.h"
 #include <linux/mutex.h>
-
-/*
- * where we put the ssd info
- */
-struct ssd_info {
-	__u8  valid:1;
-	__u8  type:7;		/* subchannel type */
-	__u8  chpid[8];		/* chpids */
-	__u16 fla[8];		/* full link addresses */
-} __attribute__ ((packed));
+#include <linux/device.h>
+#include <asm/chpid.h>
+#include "chsc.h"
+#include "schid.h"
 
 /*
  * path management control word
@@ -108,7 +101,7 @@ struct subchannel {
 	struct schib schib;	/* subchannel information block */
 	struct orb orb;		/* operation request block */
 	struct ccw1 sense_ccw;	/* static ccw for sense command */
-	struct ssd_info ssd_info;	/* subchannel description */
+	struct chsc_ssd_info ssd_info;	/* subchannel description */
 	struct device dev;	/* entry in device tree */
 	struct css_driver *driver;
 } __attribute__ ((aligned(8)));
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index 90b22faabbf..28abd697be1 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -476,7 +476,7 @@ struct cmb_area {
 };
 
 static struct cmb_area cmb_area = {
-	.lock = SPIN_LOCK_UNLOCKED,
+	.lock = __SPIN_LOCK_UNLOCKED(cmb_area.lock),
 	.list = LIST_HEAD_INIT(cmb_area.list),
 	.num_channels  = 1024,
 };
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index fe0ace7aece..27c6d9e55b2 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -20,8 +20,9 @@
 #include "ioasm.h"
 #include "chsc.h"
 #include "device.h"
+#include "idset.h"
+#include "chp.h"
 
-int need_rescan = 0;
 int css_init_done = 0;
 static int need_reprobe = 0;
 static int max_ssid = 0;
@@ -125,8 +126,52 @@ void css_sch_device_unregister(struct subchannel *sch)
 	mutex_unlock(&sch->reg_mutex);
 }
 
-static int
-css_register_subchannel(struct subchannel *sch)
+static void ssd_from_pmcw(struct chsc_ssd_info *ssd, struct pmcw *pmcw)
+{
+	int i;
+	int mask;
+
+	memset(ssd, 0, sizeof(struct chsc_ssd_info));
+	ssd->path_mask = pmcw->pim;
+	for (i = 0; i < 8; i++) {
+		mask = 0x80 >> i;
+		if (pmcw->pim & mask) {
+			chp_id_init(&ssd->chpid[i]);
+			ssd->chpid[i].id = pmcw->chpid[i];
+		}
+	}
+}
+
+static void ssd_register_chpids(struct chsc_ssd_info *ssd)
+{
+	int i;
+	int mask;
+
+	for (i = 0; i < 8; i++) {
+		mask = 0x80 >> i;
+		if (ssd->path_mask & mask)
+			if (!chp_is_registered(ssd->chpid[i]))
+				chp_new(ssd->chpid[i]);
+	}
+}
+
+void css_update_ssd_info(struct subchannel *sch)
+{
+	int ret;
+
+	if (cio_is_console(sch->schid)) {
+		/* Console is initialized too early for functions requiring
+		 * memory allocation. */
+		ssd_from_pmcw(&sch->ssd_info, &sch->schib.pmcw);
+	} else {
+		ret = chsc_get_ssd_info(sch->schid, &sch->ssd_info);
+		if (ret)
+			ssd_from_pmcw(&sch->ssd_info, &sch->schib.pmcw);
+		ssd_register_chpids(&sch->ssd_info);
+	}
+}
+
+static int css_register_subchannel(struct subchannel *sch)
 {
 	int ret;
 
@@ -135,9 +180,7 @@ css_register_subchannel(struct subchannel *sch)
 	sch->dev.bus = &css_bus_type;
 	sch->dev.release = &css_subchannel_release;
 	sch->dev.groups = subch_attr_groups;
-
-	css_get_ssd_info(sch);
-
+	css_update_ssd_info(sch);
 	/* make it known to the system */
 	ret = css_sch_device_register(sch);
 	if (ret) {
@@ -306,7 +349,7 @@ static int css_evaluate_new_subchannel(struct subchannel_id schid, int slow)
 	return css_probe_device(schid);
 }
 
-static int css_evaluate_subchannel(struct subchannel_id schid, int slow)
+static void css_evaluate_subchannel(struct subchannel_id schid, int slow)
 {
 	struct subchannel *sch;
 	int ret;
@@ -317,53 +360,66 @@ static int css_evaluate_subchannel(struct subchannel_id schid, int slow)
 		put_device(&sch->dev);
 	} else
 		ret = css_evaluate_new_subchannel(schid, slow);
-
-	return ret;
+	if (ret == -EAGAIN)
+		css_schedule_eval(schid);
 }
 
-static int
-css_rescan_devices(struct subchannel_id schid, void *data)
+static struct idset *slow_subchannel_set;
+static spinlock_t slow_subchannel_lock;
+
+static int __init slow_subchannel_init(void)
 {
-	return css_evaluate_subchannel(schid, 1);
+	spin_lock_init(&slow_subchannel_lock);
+	slow_subchannel_set = idset_sch_new();
+	if (!slow_subchannel_set) {
+		printk(KERN_WARNING "cio: could not allocate slow subchannel "
+		       "set\n");
+		return -ENOMEM;
+	}
+	return 0;
 }
 
-struct slow_subchannel {
-	struct list_head slow_list;
-	struct subchannel_id schid;
-};
-
-static LIST_HEAD(slow_subchannels_head);
-static DEFINE_SPINLOCK(slow_subchannel_lock);
+subsys_initcall(slow_subchannel_init);
 
-static void
-css_trigger_slow_path(struct work_struct *unused)
+static void css_slow_path_func(struct work_struct *unused)
 {
-	CIO_TRACE_EVENT(4, "slowpath");
-
-	if (need_rescan) {
-		need_rescan = 0;
-		for_each_subchannel(css_rescan_devices, NULL);
-		return;
-	}
+	struct subchannel_id schid;
 
+	CIO_TRACE_EVENT(4, "slowpath");
 	spin_lock_irq(&slow_subchannel_lock);
-	while (!list_empty(&slow_subchannels_head)) {
-		struct slow_subchannel *slow_sch =
-			list_entry(slow_subchannels_head.next,
-				   struct slow_subchannel, slow_list);
-
-		list_del_init(slow_subchannels_head.next);
+	init_subchannel_id(&schid);
+	while (idset_sch_get_first(slow_subchannel_set, &schid)) {
+		idset_sch_del(slow_subchannel_set, schid);
 		spin_unlock_irq(&slow_subchannel_lock);
-		css_evaluate_subchannel(slow_sch->schid, 1);
+		css_evaluate_subchannel(schid, 1);
 		spin_lock_irq(&slow_subchannel_lock);
-		kfree(slow_sch);
 	}
 	spin_unlock_irq(&slow_subchannel_lock);
 }
 
-DECLARE_WORK(slow_path_work, css_trigger_slow_path);
+static DECLARE_WORK(slow_path_work, css_slow_path_func);
 struct workqueue_struct *slow_path_wq;
 
+void css_schedule_eval(struct subchannel_id schid)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&slow_subchannel_lock, flags);
+	idset_sch_add(slow_subchannel_set, schid);
+	queue_work(slow_path_wq, &slow_path_work);
+	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
+}
+
+void css_schedule_eval_all(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&slow_subchannel_lock, flags);
+	idset_fill(slow_subchannel_set);
+	queue_work(slow_path_wq, &slow_path_work);
+	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
+}
+
 /* Reprobe subchannel if unregistered. */
 static int reprobe_subchannel(struct subchannel_id schid, void *data)
 {
@@ -426,33 +482,14 @@ void css_schedule_reprobe(void)
 EXPORT_SYMBOL_GPL(css_schedule_reprobe);
 
 /*
- * Rescan for new devices. FIXME: This is slow.
- * This function is called when we have lost CRWs due to overflows and we have
- * to do subchannel housekeeping.
- */
-void
-css_reiterate_subchannels(void)
-{
-	css_clear_subchannel_slow_list();
-	need_rescan = 1;
-}
-
-/*
  * Called from the machine check handler for subchannel report words.
  */
-int
-css_process_crw(int rsid1, int rsid2)
+void css_process_crw(int rsid1, int rsid2)
 {
-	int ret;
 	struct subchannel_id mchk_schid;
 
 	CIO_CRW_EVENT(2, "source is subchannel %04X, subsystem id %x\n",
 		      rsid1, rsid2);
-
-	if (need_rescan)
-		/* We need to iterate all subchannels anyway. */
-		return -EAGAIN;
-
 	init_subchannel_id(&mchk_schid);
 	mchk_schid.sch_no = rsid1;
 	if (rsid2 != 0)
@@ -463,14 +500,7 @@ css_process_crw(int rsid1, int rsid2)
 	 * use stsch() to find out if the subchannel in question has come
 	 * or gone.
 	 */
-	ret = css_evaluate_subchannel(mchk_schid, 0);
-	if (ret == -EAGAIN) {
-		if (css_enqueue_subchannel_slow(mchk_schid)) {
-			css_clear_subchannel_slow_list();
-			need_rescan = 1;
-		}
-	}
-	return ret;
+	css_evaluate_subchannel(mchk_schid, 0);
 }
 
 static int __init
@@ -745,47 +775,6 @@ struct bus_type css_bus_type = {
 
 subsys_initcall(init_channel_subsystem);
 
-int
-css_enqueue_subchannel_slow(struct subchannel_id schid)
-{
-	struct slow_subchannel *new_slow_sch;
-	unsigned long flags;
-
-	new_slow_sch = kzalloc(sizeof(struct slow_subchannel), GFP_ATOMIC);
-	if (!new_slow_sch)
-		return -ENOMEM;
-	new_slow_sch->schid = schid;
-	spin_lock_irqsave(&slow_subchannel_lock, flags);
-	list_add_tail(&new_slow_sch->slow_list, &slow_subchannels_head);
-	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
-	return 0;
-}
-
-void
-css_clear_subchannel_slow_list(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&slow_subchannel_lock, flags);
-	while (!list_empty(&slow_subchannels_head)) {
-		struct slow_subchannel *slow_sch =
-			list_entry(slow_subchannels_head.next,
-				   struct slow_subchannel, slow_list);
-
-		list_del_init(slow_subchannels_head.next);
-		kfree(slow_sch);
-	}
-	spin_unlock_irqrestore(&slow_subchannel_lock, flags);
-}
-
-
-
-int
-css_slow_subchannels_exist(void)
-{
-	return (!list_empty(&slow_subchannels_head));
-}
-
 MODULE_LICENSE("GPL");
 EXPORT_SYMBOL(css_bus_type);
 EXPORT_SYMBOL_GPL(css_characteristics_avail);
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index ca2bab932a8..71fcfdc4280 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -4,8 +4,11 @@
 #include <linux/mutex.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include <linux/device.h>
+#include <linux/types.h>
 
 #include <asm/cio.h>
+#include <asm/chpid.h>
 
 #include "schid.h"
 
@@ -143,13 +146,12 @@ extern void css_sch_device_unregister(struct subchannel *);
 extern struct subchannel * get_subchannel_by_schid(struct subchannel_id);
 extern int css_init_done;
 extern int for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *);
-extern int css_process_crw(int, int);
+extern void css_process_crw(int, int);
 extern void css_reiterate_subchannels(void);
+void css_update_ssd_info(struct subchannel *sch);
 
 #define __MAX_SUBCHANNEL 65535
 #define __MAX_SSID 3
-#define __MAX_CHPID 255
-#define __MAX_CSSID 0
 
 struct channel_subsystem {
 	u8 cssid;
@@ -185,16 +187,12 @@ int device_trigger_verify(struct subchannel *sch);
 void device_kill_pending_timer(struct subchannel *);
 
 /* Helper functions to build lists for the slow path. */
-extern int css_enqueue_subchannel_slow(struct subchannel_id schid);
-void css_walk_subchannel_slow_list(void (*fn)(unsigned long));
-void css_clear_subchannel_slow_list(void);
-int css_slow_subchannels_exist(void);
-extern int need_rescan;
+void css_schedule_eval(struct subchannel_id schid);
+void css_schedule_eval_all(void);
 
 int sch_is_pseudo_sch(struct subchannel *);
 
 extern struct workqueue_struct *slow_path_wq;
-extern struct work_struct slow_path_work;
 
 int subchannel_add_files (struct device *);
 extern struct attribute_group *subch_attr_groups[];
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index e322111fb36..03355902c58 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -56,13 +56,12 @@ ccw_bus_match (struct device * dev, struct device_driver * drv)
 /* Store modalias string delimited by prefix/suffix string into buffer with
  * specified size. Return length of resulting string (excluding trailing '\0')
  * even if string doesn't fit buffer (snprintf semantics). */
-static int snprint_alias(char *buf, size_t size, const char *prefix,
+static int snprint_alias(char *buf, size_t size,
 			 struct ccw_device_id *id, const char *suffix)
 {
 	int len;
 
-	len = snprintf(buf, size, "%sccw:t%04Xm%02X", prefix, id->cu_type,
-		       id->cu_model);
+	len = snprintf(buf, size, "ccw:t%04Xm%02X", id->cu_type, id->cu_model);
 	if (len > size)
 		return len;
 	buf += len;
@@ -85,53 +84,40 @@ static int ccw_uevent(struct device *dev, char **envp, int num_envp,
 	struct ccw_device *cdev = to_ccwdev(dev);
 	struct ccw_device_id *id = &(cdev->id);
 	int i = 0;
-	int len;
+	int len = 0;
+	int ret;
+	char modalias_buf[30];
 
 	/* CU_TYPE= */
-	len = snprintf(buffer, buffer_size, "CU_TYPE=%04X", id->cu_type) + 1;
-	if (len > buffer_size || i >= num_envp)
-		return -ENOMEM;
-	envp[i++] = buffer;
-	buffer += len;
-	buffer_size -= len;
+	ret = add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			     "CU_TYPE=%04X", id->cu_type);
+	if (ret)
+		return ret;
 
 	/* CU_MODEL= */
-	len = snprintf(buffer, buffer_size, "CU_MODEL=%02X", id->cu_model) + 1;
-	if (len > buffer_size || i >= num_envp)
-		return -ENOMEM;
-	envp[i++] = buffer;
-	buffer += len;
-	buffer_size -= len;
+	ret = add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			     "CU_MODEL=%02X", id->cu_model);
+	if (ret)
+		return ret;
 
 	/* The next two can be zero, that's ok for us */
 	/* DEV_TYPE= */
-	len = snprintf(buffer, buffer_size, "DEV_TYPE=%04X", id->dev_type) + 1;
-	if (len > buffer_size || i >= num_envp)
-		return -ENOMEM;
-	envp[i++] = buffer;
-	buffer += len;
-	buffer_size -= len;
+	ret = add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			     "DEV_TYPE=%04X", id->dev_type);
+	if (ret)
+		return ret;
 
 	/* DEV_MODEL= */
-	len = snprintf(buffer, buffer_size, "DEV_MODEL=%02X",
-			(unsigned char) id->dev_model) + 1;
-	if (len > buffer_size || i >= num_envp)
-		return -ENOMEM;
-	envp[i++] = buffer;
-	buffer += len;
-	buffer_size -= len;
+	ret = add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			     "DEV_MODEL=%02X", id->dev_model);
+	if (ret)
+		return ret;
 
 	/* MODALIAS=  */
-	len = snprint_alias(buffer, buffer_size, "MODALIAS=", id, "") + 1;
-	if (len > buffer_size || i >= num_envp)
-		return -ENOMEM;
-	envp[i++] = buffer;
-	buffer += len;
-	buffer_size -= len;
-
-	envp[i] = NULL;
-
-	return 0;
+	snprint_alias(modalias_buf, sizeof(modalias_buf), id, "");
+	ret = add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			     "MODALIAS=%s", modalias_buf);
+	return ret;
 }
 
 struct bus_type ccw_bus_type;
@@ -230,12 +216,18 @@ static ssize_t
 chpids_show (struct device * dev, struct device_attribute *attr, char * buf)
 {
 	struct subchannel *sch = to_subchannel(dev);
-	struct ssd_info *ssd = &sch->ssd_info;
+	struct chsc_ssd_info *ssd = &sch->ssd_info;
 	ssize_t ret = 0;
 	int chp;
+	int mask;
 
-	for (chp = 0; chp < 8; chp++)
-		ret += sprintf (buf+ret, "%02x ", ssd->chpid[chp]);
+	for (chp = 0; chp < 8; chp++) {
+		mask = 0x80 >> chp;
+		if (ssd->path_mask & mask)
+			ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
+		else
+			ret += sprintf(buf + ret, "00 ");
+	}
 	ret += sprintf (buf+ret, "\n");
 	return min((ssize_t)PAGE_SIZE, ret);
 }
@@ -280,7 +272,7 @@ modalias_show (struct device *dev, struct device_attribute *attr, char *buf)
 	struct ccw_device_id *id = &(cdev->id);
 	int len;
 
-	len = snprint_alias(buf, PAGE_SIZE, "", id, "\n") + 1;
+	len = snprint_alias(buf, PAGE_SIZE, id, "\n") + 1;
 
 	return len > PAGE_SIZE ? PAGE_SIZE : len;
 }
@@ -298,16 +290,10 @@ int ccw_device_is_orphan(struct ccw_device *cdev)
 	return sch_is_pseudo_sch(to_subchannel(cdev->dev.parent));
 }
 
-static void ccw_device_unregister(struct work_struct *work)
+static void ccw_device_unregister(struct ccw_device *cdev)
 {
-	struct ccw_device_private *priv;
-	struct ccw_device *cdev;
-
-	priv = container_of(work, struct ccw_device_private, kick_work);
-	cdev = priv->cdev;
 	if (test_and_clear_bit(1, &cdev->private->registered))
-		device_unregister(&cdev->dev);
-	put_device(&cdev->dev);
+		device_del(&cdev->dev);
 }
 
 static void
@@ -324,11 +310,8 @@ ccw_device_remove_disconnected(struct ccw_device *cdev)
 		spin_lock_irqsave(cdev->ccwlock, flags);
 		cdev->private->state = DEV_STATE_NOT_OPER;
 		spin_unlock_irqrestore(cdev->ccwlock, flags);
-		if (get_device(&cdev->dev)) {
-			PREPARE_WORK(&cdev->private->kick_work,
-				     ccw_device_unregister);
-			queue_work(ccw_device_work, &cdev->private->kick_work);
-		}
+		ccw_device_unregister(cdev);
+		put_device(&cdev->dev);
 		return ;
 	}
 	sch = to_subchannel(cdev->dev.parent);
@@ -413,11 +396,60 @@ ccw_device_set_online(struct ccw_device *cdev)
 	return (ret == 0) ? -ENODEV : ret;
 }
 
-static ssize_t
-online_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+static void online_store_handle_offline(struct ccw_device *cdev)
+{
+	if (cdev->private->state == DEV_STATE_DISCONNECTED)
+		ccw_device_remove_disconnected(cdev);
+	else if (cdev->drv && cdev->drv->set_offline)
+		ccw_device_set_offline(cdev);
+}
+
+static int online_store_recog_and_online(struct ccw_device *cdev)
+{
+	int ret;
+
+	/* Do device recognition, if needed. */
+	if (cdev->id.cu_type == 0) {
+		ret = ccw_device_recognition(cdev);
+		if (ret) {
+			printk(KERN_WARNING"Couldn't start recognition "
+			       "for device %s (ret=%d)\n",
+			       cdev->dev.bus_id, ret);
+			return ret;
+		}
+		wait_event(cdev->private->wait_q,
+			   cdev->private->flags.recog_done);
+	}
+	if (cdev->drv && cdev->drv->set_online)
+		ccw_device_set_online(cdev);
+	return 0;
+}
+static void online_store_handle_online(struct ccw_device *cdev, int force)
+{
+	int ret;
+
+	ret = online_store_recog_and_online(cdev);
+	if (ret)
+		return;
+	if (force && cdev->private->state == DEV_STATE_BOXED) {
+		ret = ccw_device_stlck(cdev);
+		if (ret) {
+			printk(KERN_WARNING"ccw_device_stlck for device %s "
+			       "returned %d!\n", cdev->dev.bus_id, ret);
+			return;
+		}
+		if (cdev->id.cu_type == 0)
+			cdev->private->state = DEV_STATE_NOT_OPER;
+		online_store_recog_and_online(cdev);
+	}
+
+}
+
+static ssize_t online_store (struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
 {
 	struct ccw_device *cdev = to_ccwdev(dev);
-	int i, force, ret;
+	int i, force;
 	char *tmp;
 
 	if (atomic_cmpxchg(&cdev->private->onoff, 0, 1) != 0)
@@ -434,51 +466,17 @@ online_store (struct device *dev, struct device_attribute *attr, const char *buf
 		force = 0;
 		i = simple_strtoul(buf, &tmp, 16);
 	}
-	if (i == 1) {
-		/* Do device recognition, if needed. */
-		if (cdev->id.cu_type == 0) {
-			ret = ccw_device_recognition(cdev);
-			if (ret) {
-				printk(KERN_WARNING"Couldn't start recognition "
-				       "for device %s (ret=%d)\n",
-				       cdev->dev.bus_id, ret);
-				goto out;
-			}
-			wait_event(cdev->private->wait_q,
-				   cdev->private->flags.recog_done);
-		}
-		if (cdev->drv && cdev->drv->set_online)
-			ccw_device_set_online(cdev);
-	} else if (i == 0) {
-		if (cdev->private->state == DEV_STATE_DISCONNECTED)
-			ccw_device_remove_disconnected(cdev);
-		else if (cdev->drv && cdev->drv->set_offline)
-			ccw_device_set_offline(cdev);
-	}
-	if (force && cdev->private->state == DEV_STATE_BOXED) {
-		ret = ccw_device_stlck(cdev);
-		if (ret) {
-			printk(KERN_WARNING"ccw_device_stlck for device %s "
-			       "returned %d!\n", cdev->dev.bus_id, ret);
-			goto out;
-		}
-		/* Do device recognition, if needed. */
-		if (cdev->id.cu_type == 0) {
-			cdev->private->state = DEV_STATE_NOT_OPER;
-			ret = ccw_device_recognition(cdev);
-			if (ret) {
-				printk(KERN_WARNING"Couldn't start recognition "
-				       "for device %s (ret=%d)\n",
-				       cdev->dev.bus_id, ret);
-				goto out;
-			}
-			wait_event(cdev->private->wait_q,
-				   cdev->private->flags.recog_done);
-		}
-		if (cdev->drv && cdev->drv->set_online)
-			ccw_device_set_online(cdev);
+
+	switch (i) {
+	case 0:
+		online_store_handle_offline(cdev);
+		break;
+	case 1:
+		online_store_handle_online(cdev, force);
+		break;
+	default:
+		count = -EINVAL;
 	}
-	out:
 	if (cdev->drv)
 		module_put(cdev->drv->owner);
 	atomic_set(&cdev->private->onoff, 0);
@@ -548,17 +546,10 @@ static struct attribute_group ccwdev_attr_group = {
 	.attrs = ccwdev_attrs,
 };
 
-static int
-device_add_files (struct device *dev)
-{
-	return sysfs_create_group(&dev->kobj, &ccwdev_attr_group);
-}
-
-static void
-device_remove_files(struct device *dev)
-{
-	sysfs_remove_group(&dev->kobj, &ccwdev_attr_group);
-}
+struct attribute_group *ccwdev_attr_groups[] = {
+	&ccwdev_attr_group,
+	NULL,
+};
 
 /* this is a simple abstraction for device_register that sets the
  * correct bus type and adds the bus specific files */
@@ -573,10 +564,6 @@ static int ccw_device_register(struct ccw_device *cdev)
 		return ret;
 
 	set_bit(1, &cdev->private->registered);
-	if ((ret = device_add_files(dev))) {
-		if (test_and_clear_bit(1, &cdev->private->registered))
-			device_del(dev);
-	}
 	return ret;
 }
 
@@ -648,10 +635,6 @@ ccw_device_add_changed(struct work_struct *work)
 		return;
 	}
 	set_bit(1, &cdev->private->registered);
-	if (device_add_files(&cdev->dev)) {
-		if (test_and_clear_bit(1, &cdev->private->registered))
-			device_unregister(&cdev->dev);
-	}
 }
 
 void ccw_device_do_unreg_rereg(struct work_struct *work)
@@ -664,9 +647,7 @@ void ccw_device_do_unreg_rereg(struct work_struct *work)
 	cdev = priv->cdev;
 	sch = to_subchannel(cdev->dev.parent);
 
-	device_remove_files(&cdev->dev);
-	if (test_and_clear_bit(1, &cdev->private->registered))
-		device_del(&cdev->dev);
+	ccw_device_unregister(cdev);
 	PREPARE_WORK(&cdev->private->kick_work,
 		     ccw_device_add_changed);
 	queue_work(ccw_device_work, &cdev->private->kick_work);
@@ -705,6 +686,7 @@ static int io_subchannel_initialize_dev(struct subchannel *sch,
 	cdev->dev.parent = &sch->dev;
 	cdev->dev.release = ccw_device_release;
 	INIT_LIST_HEAD(&cdev->private->kick_work.entry);
+	cdev->dev.groups = ccwdev_attr_groups;
 	/* Do first half of device_register. */
 	device_initialize(&cdev->dev);
 	if (!get_device(&sch->dev)) {
@@ -736,6 +718,7 @@ static int io_subchannel_recog(struct ccw_device *, struct subchannel *);
 static void sch_attach_device(struct subchannel *sch,
 			      struct ccw_device *cdev)
 {
+	css_update_ssd_info(sch);
 	spin_lock_irq(sch->lock);
 	sch->dev.driver_data = cdev;
 	cdev->private->schid = sch->schid;
@@ -871,7 +854,7 @@ io_subchannel_register(struct work_struct *work)
 	priv = container_of(work, struct ccw_device_private, kick_work);
 	cdev = priv->cdev;
 	sch = to_subchannel(cdev->dev.parent);
-
+	css_update_ssd_info(sch);
 	/*
 	 * io_subchannel_register() will also be called after device
 	 * recognition has been done for a boxed device (which will already
@@ -1133,15 +1116,8 @@ io_subchannel_remove (struct subchannel *sch)
 	sch->dev.driver_data = NULL;
 	cdev->private->state = DEV_STATE_NOT_OPER;
 	spin_unlock_irqrestore(cdev->ccwlock, flags);
-	/*
-	 * Put unregistration on workqueue to avoid livelocks on the css bus
-	 * semaphore.
-	 */
-	if (get_device(&cdev->dev)) {
-		PREPARE_WORK(&cdev->private->kick_work,
-			     ccw_device_unregister);
-		queue_work(ccw_device_work, &cdev->private->kick_work);
-	}
+	ccw_device_unregister(cdev);
+	put_device(&cdev->dev);
 	return 0;
 }
 
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 089a3ddd626..898ec3b2beb 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -15,6 +15,7 @@
 
 #include <asm/ccwdev.h>
 #include <asm/cio.h>
+#include <asm/chpid.h>
 
 #include "cio.h"
 #include "cio_debug.h"
@@ -22,6 +23,7 @@
 #include "device.h"
 #include "chsc.h"
 #include "ioasm.h"
+#include "chp.h"
 
 int
 device_is_online(struct subchannel *sch)
@@ -210,14 +212,18 @@ static void
 __recover_lost_chpids(struct subchannel *sch, int old_lpm)
 {
 	int mask, i;
+	struct chp_id chpid;
 
+	chp_id_init(&chpid);
 	for (i = 0; i<8; i++) {
 		mask = 0x80 >> i;
 		if (!(sch->lpm & mask))
 			continue;
 		if (old_lpm & mask)
 			continue;
-		chpid_is_actually_online(sch->schib.pmcw.chpid[i]);
+		chpid.id = sch->schib.pmcw.chpid[i];
+		if (!chp_is_registered(chpid))
+			css_schedule_eval_all();
 	}
 }
 
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 7c7775aae38..16f59fcb66b 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -16,12 +16,14 @@
 
 #include <asm/ccwdev.h>
 #include <asm/idals.h>
+#include <asm/chpid.h>
 
 #include "cio.h"
 #include "cio_debug.h"
 #include "css.h"
 #include "chsc.h"
 #include "device.h"
+#include "chp.h"
 
 int ccw_device_set_options_mask(struct ccw_device *cdev, unsigned long flags)
 {
@@ -606,9 +608,12 @@ void *
 ccw_device_get_chp_desc(struct ccw_device *cdev, int chp_no)
 {
 	struct subchannel *sch;
+	struct chp_id chpid;
 
 	sch = to_subchannel(cdev->dev.parent);
-	return chsc_get_chp_desc(sch, chp_no);
+	chp_id_init(&chpid);
+	chpid.id = sch->schib.pmcw.chpid[chp_no];
+	return chp_get_chp_desc(chpid);
 }
 
 // FIXME: these have to go:
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
new file mode 100644
index 00000000000..16ea828e99f
--- /dev/null
+++ b/drivers/s390/cio/idset.c
@@ -0,0 +1,112 @@
+/*
+ *  drivers/s390/cio/idset.c
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#include <linux/slab.h>
+#include <asm/bitops.h>
+#include "idset.h"
+#include "css.h"
+
+struct idset {
+	int num_ssid;
+	int num_id;
+	unsigned long bitmap[0];
+};
+
+static inline unsigned long bitmap_size(int num_ssid, int num_id)
+{
+	return __BITOPS_WORDS(num_ssid * num_id) * sizeof(unsigned long);
+}
+
+static struct idset *idset_new(int num_ssid, int num_id)
+{
+	struct idset *set;
+
+	set = kzalloc(sizeof(struct idset) + bitmap_size(num_ssid, num_id),
+		      GFP_KERNEL);
+	if (set) {
+		set->num_ssid = num_ssid;
+		set->num_id = num_id;
+	}
+	return set;
+}
+
+void idset_free(struct idset *set)
+{
+	kfree(set);
+}
+
+void idset_clear(struct idset *set)
+{
+	memset(set->bitmap, 0, bitmap_size(set->num_ssid, set->num_id));
+}
+
+void idset_fill(struct idset *set)
+{
+	memset(set->bitmap, 0xff, bitmap_size(set->num_ssid, set->num_id));
+}
+
+static inline void idset_add(struct idset *set, int ssid, int id)
+{
+	set_bit(ssid * set->num_id + id, set->bitmap);
+}
+
+static inline void idset_del(struct idset *set, int ssid, int id)
+{
+	clear_bit(ssid * set->num_id + id, set->bitmap);
+}
+
+static inline int idset_contains(struct idset *set, int ssid, int id)
+{
+	return test_bit(ssid * set->num_id + id, set->bitmap);
+}
+
+static inline int idset_get_first(struct idset *set, int *ssid, int *id)
+{
+	int bitnum;
+
+	bitnum = find_first_bit(set->bitmap, set->num_ssid * set->num_id);
+	if (bitnum >= set->num_ssid * set->num_id)
+		return 0;
+	*ssid = bitnum / set->num_id;
+	*id = bitnum % set->num_id;
+	return 1;
+}
+
+struct idset *idset_sch_new(void)
+{
+	return idset_new(__MAX_SSID + 1, __MAX_SUBCHANNEL + 1);
+}
+
+void idset_sch_add(struct idset *set, struct subchannel_id schid)
+{
+	idset_add(set, schid.ssid, schid.sch_no);
+}
+
+void idset_sch_del(struct idset *set, struct subchannel_id schid)
+{
+	idset_del(set, schid.ssid, schid.sch_no);
+}
+
+int idset_sch_contains(struct idset *set, struct subchannel_id schid)
+{
+	return idset_contains(set, schid.ssid, schid.sch_no);
+}
+
+int idset_sch_get_first(struct idset *set, struct subchannel_id *schid)
+{
+	int ssid = 0;
+	int id = 0;
+	int rc;
+
+	rc = idset_get_first(set, &ssid, &id);
+	if (rc) {
+		init_subchannel_id(schid);
+		schid->ssid = ssid;
+		schid->sch_no = id;
+	}
+	return rc;
+}
diff --git a/drivers/s390/cio/idset.h b/drivers/s390/cio/idset.h
new file mode 100644
index 00000000000..144466ab8c1
--- /dev/null
+++ b/drivers/s390/cio/idset.h
@@ -0,0 +1,25 @@
+/*
+ *  drivers/s390/cio/idset.h
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef S390_IDSET_H
+#define S390_IDSET_H S390_IDSET_H
+
+#include "schid.h"
+
+struct idset;
+
+void idset_free(struct idset *set);
+void idset_clear(struct idset *set);
+void idset_fill(struct idset *set);
+
+struct idset *idset_sch_new(void);
+void idset_sch_add(struct idset *set, struct subchannel_id id);
+void idset_sch_del(struct idset *set, struct subchannel_id id);
+int idset_sch_contains(struct idset *set, struct subchannel_id id);
+int idset_sch_get_first(struct idset *set, struct subchannel_id *id);
+
+#endif /* S390_IDSET_H */
diff --git a/drivers/s390/cio/ioasm.h b/drivers/s390/cio/ioasm.h
index ad6d8294006..7153dd95908 100644
--- a/drivers/s390/cio/ioasm.h
+++ b/drivers/s390/cio/ioasm.h
@@ -1,6 +1,7 @@
 #ifndef S390_CIO_IOASM_H
 #define S390_CIO_IOASM_H
 
+#include <asm/chpid.h>
 #include "schid.h"
 
 /*
@@ -189,9 +190,9 @@ static inline int chsc(void *chsc_area)
 	return cc;
 }
 
-static inline int rchp(int chpid)
+static inline int rchp(struct chp_id chpid)
 {
-	register unsigned int reg1 asm ("1") = chpid;
+	register struct chp_id reg1 asm ("1") = chpid;
 	int ccode;
 
 	asm volatile(
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index 7809a79feec..6dd64d0c8d4 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -3525,8 +3525,8 @@ unpack_next:
                                 memcpy(skb_put(skb,len_of_data),
 					privptr->p_mtc_envelope,
 					len_of_data);
-                                skb->mac.raw=skb->data;
                                 skb->dev=dev;
+				skb_reset_mac_header(skb);
                                 skb->protocol=htons(ETH_P_IP);
                                 skb->ip_summed=CHECKSUM_UNNECESSARY;
                                 privptr->stats.rx_packets++;
diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c
index 0d6d5fcc128..b20fd068173 100644
--- a/drivers/s390/net/ctcmain.c
+++ b/drivers/s390/net/ctcmain.c
@@ -455,7 +455,7 @@ ctc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
 			return;
 		}
 		skb_put(pskb, header->length);
-		pskb->mac.raw = pskb->data;
+		skb_reset_mac_header(pskb);
 		len -= header->length;
 		skb = dev_alloc_skb(pskb->len);
 		if (!skb) {
@@ -472,8 +472,9 @@ ctc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
 			privptr->stats.rx_dropped++;
 			return;
 		}
-		memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len);
-		skb->mac.raw = skb->data;
+		skb_copy_from_linear_data(pskb, skb_put(skb, pskb->len),
+					  pskb->len);
+		skb_reset_mac_header(skb);
 		skb->dev = pskb->dev;
 		skb->protocol = pskb->protocol;
 		pskb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -706,7 +707,8 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg)
 			spin_unlock(&ch->collect_lock);
 			return;
 		}
-		ch->trans_skb->tail = ch->trans_skb->data = ch->trans_skb_data;
+		ch->trans_skb->data = ch->trans_skb_data;
+		skb_reset_tail_pointer(ch->trans_skb);
 		ch->trans_skb->len = 0;
 		if (ch->prof.maxmulti < (ch->collect_len + 2))
 			ch->prof.maxmulti = ch->collect_len + 2;
@@ -715,8 +717,9 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg)
 		*((__u16 *) skb_put(ch->trans_skb, 2)) = ch->collect_len + 2;
 		i = 0;
 		while ((skb = skb_dequeue(&ch->collect_queue))) {
-			memcpy(skb_put(ch->trans_skb, skb->len), skb->data,
-			       skb->len);
+			skb_copy_from_linear_data(skb, skb_put(ch->trans_skb,
+							       skb->len),
+						  skb->len);
 			privptr->stats.tx_packets++;
 			privptr->stats.tx_bytes += skb->len - LL_HEADER_LENGTH;
 			atomic_dec(&skb->users);
@@ -831,7 +834,8 @@ ch_action_rx(fsm_instance * fi, int event, void *arg)
 		ctc_unpack_skb(ch, skb);
 	}
  again:
-	skb->data = skb->tail = ch->trans_skb_data;
+	skb->data = ch->trans_skb_data;
+	skb_reset_tail_pointer(skb);
 	skb->len = 0;
 	if (ctc_checkalloc_buffer(ch, 1))
 		return;
@@ -1638,21 +1642,19 @@ add_channel(struct ccw_device *cdev, enum channel_types type)
 	struct channel *ch;
 
 	DBF_TEXT(trace, 2, __FUNCTION__);
-	if ((ch =
-	     (struct channel *) kmalloc(sizeof (struct channel),
-					GFP_KERNEL)) == NULL) {
+	ch = kzalloc(sizeof(struct channel), GFP_KERNEL);
+	if (!ch) {
 		ctc_pr_warn("ctc: Out of memory in add_channel\n");
 		return -1;
 	}
-	memset(ch, 0, sizeof (struct channel));
-	if ((ch->ccw = kmalloc(8*sizeof(struct ccw1),
-					       GFP_KERNEL | GFP_DMA)) == NULL) {
+	/* assure all flags and counters are reset */
+	ch->ccw = kzalloc(8 * sizeof(struct ccw1), GFP_KERNEL | GFP_DMA);
+	if (!ch->ccw) {
 		kfree(ch);
 		ctc_pr_warn("ctc: Out of memory in add_channel\n");
 		return -1;
 	}
 
-	memset(ch->ccw, 0, 8*sizeof(struct ccw1));	// assure all flags and counters are reset
 
 	/**
 	 * "static" ccws are used in the following way:
@@ -1692,15 +1694,14 @@ add_channel(struct ccw_device *cdev, enum channel_types type)
 		return -1;
 	}
 	fsm_newstate(ch->fsm, CH_STATE_IDLE);
-	if ((ch->irb = kmalloc(sizeof (struct irb),
-					      GFP_KERNEL)) == NULL) {
+	ch->irb = kzalloc(sizeof(struct irb), GFP_KERNEL);
+	if (!ch->irb) {
 		ctc_pr_warn("ctc: Out of memory in add_channel\n");
 		kfree_fsm(ch->fsm);
 		kfree(ch->ccw);
 		kfree(ch);
 		return -1;
 	}
-	memset(ch->irb, 0, sizeof (struct irb));
 	while (*c && less_than((*c)->id, ch->id))
 		c = &(*c)->next;
 	if (*c && (!strncmp((*c)->id, ch->id, CTC_ID_SIZE))) {
@@ -2226,7 +2227,8 @@ transmit_skb(struct channel *ch, struct sk_buff *skb)
 		 * IDAL support in CTC is broken, so we have to
 		 * care about skb's above 2G ourselves.
 		 */
-		hi = ((unsigned long) skb->tail + LL_HEADER_LENGTH) >> 31;
+		hi = ((unsigned long)skb_tail_pointer(skb) +
+		      LL_HEADER_LENGTH) >> 31;
 		if (hi) {
 			nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
 			if (!nskb) {
@@ -2262,11 +2264,12 @@ transmit_skb(struct channel *ch, struct sk_buff *skb)
 				return -EBUSY;
 			}
 
-			ch->trans_skb->tail = ch->trans_skb->data;
+			skb_reset_tail_pointer(ch->trans_skb);
 			ch->trans_skb->len = 0;
 			ch->ccw[1].count = skb->len;
-			memcpy(skb_put(ch->trans_skb, skb->len), skb->data,
-			       skb->len);
+			skb_copy_from_linear_data(skb, skb_put(ch->trans_skb,
+							       skb->len),
+						  skb->len);
 			atomic_dec(&skb->users);
 			dev_kfree_skb_irq(skb);
 			ccw_idx = 0;
@@ -2745,14 +2748,13 @@ ctc_probe_device(struct ccwgroup_device *cgdev)
 	if (!get_device(&cgdev->dev))
 		return -ENODEV;
 
-	priv = kmalloc(sizeof (struct ctc_priv), GFP_KERNEL);
+	priv = kzalloc(sizeof(struct ctc_priv), GFP_KERNEL);
 	if (!priv) {
 		ctc_pr_err("%s: Out of memory\n", __func__);
 		put_device(&cgdev->dev);
 		return -ENOMEM;
 	}
 
-	memset(priv, 0, sizeof (struct ctc_priv));
 	rc = ctc_add_files(&cgdev->dev);
 	if (rc) {
 		kfree(priv);
@@ -2793,10 +2795,9 @@ ctc_init_netdevice(struct net_device * dev, int alloc_device,
 	DBF_TEXT(setup, 3, __FUNCTION__);
 
 	if (alloc_device) {
-		dev = kmalloc(sizeof (struct net_device), GFP_KERNEL);
+		dev = kzalloc(sizeof(struct net_device), GFP_KERNEL);
 		if (!dev)
 			return NULL;
-		memset(dev, 0, sizeof (struct net_device));
 	}
 
 	dev->priv = privptr;
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index ecca1046714..08a994fdd1a 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1576,7 +1576,7 @@ __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb,
 	header->offset = card->tx_buffer->count;
 	header->type = card->lan_type;
 	header->slot = card->portno;
-	memcpy(header + 1, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, header + 1, skb->len);
 	spin_unlock(&card->lock);
 	card->stats.tx_bytes += skb->len;
 	card->stats.tx_packets++;
@@ -1784,7 +1784,6 @@ lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len)
 		card->stats.rx_dropped++;
 		return;
 	}
-	skb->dev = card->dev;
 	memcpy(skb_put(skb, skb_len), skb_data, skb_len);
 	skb->protocol =	card->lan_type_trans(skb, card->dev);
 	card->stats.rx_bytes += skb_len;
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 594320ca1b7..e10e85e85c8 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -635,7 +635,7 @@ static void netiucv_unpack_skb(struct iucv_connection *conn,
 			return;
 		}
 		skb_put(pskb, header->next);
-		pskb->mac.raw = pskb->data;
+		skb_reset_mac_header(pskb);
 		skb = dev_alloc_skb(pskb->len);
 		if (!skb) {
 			PRINT_WARN("%s Out of memory in netiucv_unpack_skb\n",
@@ -645,8 +645,9 @@ static void netiucv_unpack_skb(struct iucv_connection *conn,
 			privptr->stats.rx_dropped++;
 			return;
 		}
-		memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len);
-		skb->mac.raw = skb->data;
+		skb_copy_from_linear_data(pskb, skb_put(skb, pskb->len),
+					  pskb->len);
+		skb_reset_mac_header(skb);
 		skb->dev = pskb->dev;
 		skb->protocol = pskb->protocol;
 		pskb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -689,7 +690,8 @@ static void conn_action_rx(fsm_instance *fi, int event, void *arg)
 			       msg->length, conn->max_buffsize);
 		return;
 	}
-	conn->rx_buff->data = conn->rx_buff->tail = conn->rx_buff->head;
+	conn->rx_buff->data = conn->rx_buff->head;
+	skb_reset_tail_pointer(conn->rx_buff);
 	conn->rx_buff->len = 0;
 	rc = iucv_message_receive(conn->path, msg, 0, conn->rx_buff->data,
 				  msg->length, NULL);
@@ -735,14 +737,17 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 			}
 		}
 	}
-	conn->tx_buff->data = conn->tx_buff->tail = conn->tx_buff->head;
+	conn->tx_buff->data = conn->tx_buff->head;
+	skb_reset_tail_pointer(conn->tx_buff);
 	conn->tx_buff->len = 0;
 	spin_lock_irqsave(&conn->collect_lock, saveflags);
 	while ((skb = skb_dequeue(&conn->collect_queue))) {
 		header.next = conn->tx_buff->len + skb->len + NETIUCV_HDRLEN;
 		memcpy(skb_put(conn->tx_buff, NETIUCV_HDRLEN), &header,
 		       NETIUCV_HDRLEN);
-		memcpy(skb_put(conn->tx_buff, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb,
+					  skb_put(conn->tx_buff, skb->len),
+					  skb->len);
 		txbytes += skb->len;
 		txpackets++;
 		stat_maxcq++;
@@ -1164,8 +1169,8 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
 		 * Copy the skb to a new allocated skb in lowmem only if the
 		 * data is located above 2G in memory or tailroom is < 2.
 		 */
-		unsigned long hi =
-			((unsigned long)(skb->tail + NETIUCV_HDRLEN)) >> 31;
+		unsigned long hi = ((unsigned long)(skb_tail_pointer(skb) +
+				    NETIUCV_HDRLEN)) >> 31;
 		int copied = 0;
 		if (hi || (skb_tailroom(skb) < 2)) {
 			nskb = alloc_skb(skb->len + NETIUCV_HDRLEN +
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 7c735e1fe06..dd7034fbfff 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -267,7 +267,8 @@ qeth_eddp_copy_data_tcp(char *dst, struct qeth_eddp_data *eddp, int len,
 
 	QETH_DBF_TEXT(trace, 5, "eddpcdtc");
 	if (skb_shinfo(eddp->skb)->nr_frags == 0) {
-		memcpy(dst, eddp->skb->data + eddp->skb_offset, len);
+		skb_copy_from_linear_data_offset(eddp->skb, eddp->skb_offset,
+						 dst, len);
 		*hcsum = csum_partial(eddp->skb->data + eddp->skb_offset, len,
 				      *hcsum);
 		eddp->skb_offset += len;
@@ -416,7 +417,7 @@ __qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
                        eddp->skb_offset += VLAN_HLEN;
 #endif /* CONFIG_QETH_VLAN */
        }
-	tcph = eddp->skb->h.th;
+	tcph = tcp_hdr(eddp->skb);
 	while (eddp->skb_offset < eddp->skb->len) {
 		data_len = min((int)skb_shinfo(eddp->skb)->gso_size,
 			       (int)(eddp->skb->len - eddp->skb_offset));
@@ -473,20 +474,24 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 	QETH_DBF_TEXT(trace, 5, "eddpficx");
 	/* create our segmentation headers and copy original headers */
 	if (skb->protocol == htons(ETH_P_IP))
-		eddp = qeth_eddp_create_eddp_data(qhdr, (u8 *)skb->nh.iph,
-				skb->nh.iph->ihl*4,
-				(u8 *)skb->h.th, skb->h.th->doff*4);
+		eddp = qeth_eddp_create_eddp_data(qhdr,
+						  skb_network_header(skb),
+						  ip_hdrlen(skb),
+						  skb_transport_header(skb),
+						  tcp_hdrlen(skb));
 	else
-		eddp = qeth_eddp_create_eddp_data(qhdr, (u8 *)skb->nh.ipv6h,
-				sizeof(struct ipv6hdr),
-				(u8 *)skb->h.th, skb->h.th->doff*4);
+		eddp = qeth_eddp_create_eddp_data(qhdr,
+						  skb_network_header(skb),
+						  sizeof(struct ipv6hdr),
+						  skb_transport_header(skb),
+						  tcp_hdrlen(skb));
 
 	if (eddp == NULL) {
 		QETH_DBF_TEXT(trace, 2, "eddpfcnm");
 		return -ENOMEM;
 	}
 	if (qhdr->hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
-		skb->mac.raw = (skb->data) + sizeof(struct qeth_hdr);
+		skb_set_mac_header(skb, sizeof(struct qeth_hdr));
 		memcpy(&eddp->mac, eth_hdr(skb), ETH_HLEN);
 #ifdef CONFIG_QETH_VLAN
 		if (eddp->mac.h_proto == __constant_htons(ETH_P_8021Q)) {
@@ -590,12 +595,13 @@ qeth_eddp_create_context_tcp(struct qeth_card *card, struct sk_buff *skb,
 	QETH_DBF_TEXT(trace, 5, "creddpct");
 	if (skb->protocol == htons(ETH_P_IP))
 		ctx = qeth_eddp_create_context_generic(card, skb,
-			sizeof(struct qeth_hdr) + skb->nh.iph->ihl*4 +
-			skb->h.th->doff*4);
+						       (sizeof(struct qeth_hdr) +
+						        ip_hdrlen(skb) +
+							tcp_hdrlen(skb)));
 	else if (skb->protocol == htons(ETH_P_IPV6))
 		ctx = qeth_eddp_create_context_generic(card, skb,
 			sizeof(struct qeth_hdr) + sizeof(struct ipv6hdr) +
-			skb->h.th->doff*4);
+			tcp_hdrlen(skb));
 	else
 		QETH_DBF_TEXT(trace, 2, "cetcpinv");
 
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index d8a86f5af37..ad7792dc1a0 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2278,7 +2278,7 @@ qeth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	    (card->info.link_type == QETH_LINK_TYPE_LANE_TR))
 	 	return tr_type_trans(skb,dev);
 #endif /* CONFIG_TR */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN );
 	eth = eth_hdr(skb);
 
@@ -2306,9 +2306,9 @@ qeth_rebuild_skb_fake_ll_tr(struct qeth_card *card, struct sk_buff *skb,
 	struct iphdr *ip_hdr;
 
 	QETH_DBF_TEXT(trace,5,"skbfktr");
-	skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_TR;
+	skb_set_mac_header(skb, -QETH_FAKE_LL_LEN_TR);
 	/* this is a fake ethernet header */
-	fake_hdr = (struct trh_hdr *) skb->mac.raw;
+	fake_hdr = tr_hdr(skb);
 
 	/* the destination MAC address */
 	switch (skb->pkt_type){
@@ -2359,9 +2359,9 @@ qeth_rebuild_skb_fake_ll_eth(struct qeth_card *card, struct sk_buff *skb,
 	struct iphdr *ip_hdr;
 
 	QETH_DBF_TEXT(trace,5,"skbfketh");
-	skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_ETH;
+	skb_set_mac_header(skb, -QETH_FAKE_LL_LEN_ETH);
 	/* this is a fake ethernet header */
-	fake_hdr = (struct ethhdr *) skb->mac.raw;
+	fake_hdr = eth_hdr(skb);
 
 	/* the destination MAC address */
 	switch (skb->pkt_type){
@@ -2461,7 +2461,7 @@ qeth_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
 	if (card->options.fake_ll)
 		qeth_rebuild_skb_fake_ll(card, skb, hdr);
 	else
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 	skb->ip_summed = card->options.checksum_type;
 	if (card->options.checksum_type == HW_CHECKSUMMING){
 		if ( (hdr->hdr.l3.ext_flags &
@@ -2501,7 +2501,8 @@ qeth_process_inbound_buffer(struct qeth_card *card,
 			vlan_tag = qeth_rebuild_skb(card, skb, hdr);
 		else { /*in case of OSN*/
 			skb_push(skb, sizeof(struct qeth_hdr));
-			memcpy(skb->data, hdr, sizeof(struct qeth_hdr));
+			skb_copy_to_linear_data(skb, hdr,
+						sizeof(struct qeth_hdr));
 		}
 		/* is device UP ? */
 		if (!(card->dev->flags & IFF_UP)){
@@ -3778,9 +3779,11 @@ qeth_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 	}
 	/* try something else */
 	if (skb->protocol == ETH_P_IPV6)
-		return (skb->nh.raw[24] == 0xff) ? RTN_MULTICAST : 0;
+		return (skb_network_header(skb)[24] == 0xff) ?
+				RTN_MULTICAST : 0;
 	else if (skb->protocol == ETH_P_IP)
-		return ((skb->nh.raw[16] & 0xf0) == 0xe0) ? RTN_MULTICAST : 0;
+		return ((skb_network_header(skb)[16] & 0xf0) == 0xe0) ?
+				RTN_MULTICAST : 0;
 	/* ... */
 	if (!memcmp(skb->data, skb->dev->broadcast, 6))
 		return RTN_BROADCAST;
@@ -3818,18 +3821,20 @@ qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
 			return card->info.is_multicast_different &
 				(card->qdio.no_out_queues - 1);
 		if (card->qdio.do_prio_queueing && (ipv == 4)) {
+			const u8 tos = ip_hdr(skb)->tos;
+
 			if (card->qdio.do_prio_queueing==QETH_PRIO_Q_ING_TOS){
-				if (skb->nh.iph->tos & IP_TOS_NOTIMPORTANT)
+				if (tos & IP_TOS_NOTIMPORTANT)
 					return 3;
-				if (skb->nh.iph->tos & IP_TOS_HIGHRELIABILITY)
+				if (tos & IP_TOS_HIGHRELIABILITY)
 					return 2;
-				if (skb->nh.iph->tos & IP_TOS_HIGHTHROUGHPUT)
+				if (tos & IP_TOS_HIGHTHROUGHPUT)
 					return 1;
-				if (skb->nh.iph->tos & IP_TOS_LOWDELAY)
+				if (tos & IP_TOS_LOWDELAY)
 					return 0;
 			}
 			if (card->qdio.do_prio_queueing==QETH_PRIO_Q_ING_PREC)
-				return 3 - (skb->nh.iph->tos >> 6);
+				return 3 - (tos >> 6);
 		} else if (card->qdio.do_prio_queueing && (ipv == 6)) {
 			/* TODO: IPv6!!! */
 		}
@@ -3866,9 +3871,9 @@ __qeth_prepare_skb(struct qeth_card *card, struct sk_buff *skb, int ipv)
 		 * memcpys instead of one memmove to save cycles.
 		 */
 		skb_push(skb, VLAN_HLEN);
-		memcpy(skb->data, skb->data + 4, 4);
-		memcpy(skb->data + 4, skb->data + 8, 4);
-		memcpy(skb->data + 8, skb->data + 12, 4);
+		skb_copy_to_linear_data(skb, skb->data + 4, 4);
+		skb_copy_to_linear_data_offset(skb, 4, skb->data + 8, 4);
+		skb_copy_to_linear_data_offset(skb, 8, skb->data + 12, 4);
 		tag = (u16 *)(skb->data + 12);
 		/*
 		 * first two bytes  = ETH_P_8021Q (0x8100)
@@ -4039,7 +4044,8 @@ qeth_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 			    *((u32 *) skb->dst->neighbour->primary_key);
 		} else {
 			/* fill in destination address used in ip header */
-			*((u32 *) (&hdr->hdr.l3.dest_addr[12])) = skb->nh.iph->daddr;
+			*((u32 *)(&hdr->hdr.l3.dest_addr[12])) =
+							   ip_hdr(skb)->daddr;
 		}
 	} else if (ipv == 6) { /* IPv6 or passthru */
 		hdr->hdr.l3.flags = qeth_get_qeth_hdr_flags6(cast_type);
@@ -4048,7 +4054,8 @@ qeth_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 			       skb->dst->neighbour->primary_key, 16);
 		} else {
 			/* fill in destination address used in ip header */
-			memcpy(hdr->hdr.l3.dest_addr, &skb->nh.ipv6h->daddr, 16);
+			memcpy(hdr->hdr.l3.dest_addr,
+			       &ipv6_hdr(skb)->daddr, 16);
 		}
 	} else { /* passthrough */
                 if((skb->dev->type == ARPHRD_IEEE802_TR) &&
diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h
index 14504afb044..c20e923cf9a 100644
--- a/drivers/s390/net/qeth_tso.h
+++ b/drivers/s390/net/qeth_tso.h
@@ -40,8 +40,8 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
 	QETH_DBF_TEXT(trace, 5, "tsofhdr");
 
 	hdr  = (struct qeth_hdr_tso *) skb->data;
-	iph  = skb->nh.iph;
-	tcph = skb->h.th;
+	iph  = ip_hdr(skb);
+	tcph = tcp_hdr(skb);
 	/*fix header to TSO values ...*/
 	hdr->hdr.hdr.l3.id = QETH_HEADER_TYPE_TSO;
 	/*set values which are fix for the first approach ...*/
@@ -63,13 +63,9 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
 static inline void
 qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb)
 {
-	struct iphdr *iph;
-	struct ipv6hdr *ip6h;
-	struct tcphdr *tcph;
-
-	iph  = skb->nh.iph;
-	ip6h = skb->nh.ipv6h;
-	tcph = skb->h.th;
+	struct iphdr *iph    = ip_hdr(skb);
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct tcphdr *tcph  = tcp_hdr(skb);
 
 	tcph->check = 0;
 	if (skb->protocol == ETH_P_IPV6) {
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 806bb1a921e..644a06eba82 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -21,6 +21,7 @@
 #include "cio/cio.h"
 #include "cio/chsc.h"
 #include "cio/css.h"
+#include "cio/chp.h"
 #include "s390mach.h"
 
 static struct semaphore m_sem;
@@ -44,14 +45,13 @@ static int
 s390_collect_crw_info(void *param)
 {
 	struct crw crw[2];
-	int ccode, ret, slow;
+	int ccode;
 	struct semaphore *sem;
 	unsigned int chain;
 
 	sem = (struct semaphore *)param;
 repeat:
 	down_interruptible(sem);
-	slow = 0;
 	chain = 0;
 	while (1) {
 		if (unlikely(chain > 1)) {
@@ -84,9 +84,8 @@ repeat:
 		/* Check for overflows. */
 		if (crw[chain].oflw) {
 			pr_debug("%s: crw overflow detected!\n", __FUNCTION__);
-			css_reiterate_subchannels();
+			css_schedule_eval_all();
 			chain = 0;
-			slow = 1;
 			continue;
 		}
 		switch (crw[chain].rsc) {
@@ -94,10 +93,7 @@ repeat:
 			if (crw[0].chn && !chain)
 				break;
 			pr_debug("source is subchannel %04X\n", crw[0].rsid);
-			ret = css_process_crw (crw[0].rsid,
-					       chain ? crw[1].rsid : 0);
-			if (ret == -EAGAIN)
-				slow = 1;
+			css_process_crw(crw[0].rsid, chain ? crw[1].rsid : 0);
 			break;
 		case CRW_RSC_MONITOR:
 			pr_debug("source is monitoring facility\n");
@@ -116,28 +112,23 @@ repeat:
 			}
 			switch (crw[0].erc) {
 			case CRW_ERC_IPARM: /* Path has come. */
-				ret = chp_process_crw(crw[0].rsid, 1);
+				chp_process_crw(crw[0].rsid, 1);
 				break;
 			case CRW_ERC_PERRI: /* Path has gone. */
 			case CRW_ERC_PERRN:
-				ret = chp_process_crw(crw[0].rsid, 0);
+				chp_process_crw(crw[0].rsid, 0);
 				break;
 			default:
 				pr_debug("Don't know how to handle erc=%x\n",
 					 crw[0].erc);
-				ret = 0;
 			}
-			if (ret == -EAGAIN)
-				slow = 1;
 			break;
 		case CRW_RSC_CONFIG:
 			pr_debug("source is configuration-alert facility\n");
 			break;
 		case CRW_RSC_CSS:
 			pr_debug("source is channel subsystem\n");
-			ret = chsc_process_crw();
-			if (ret == -EAGAIN)
-				slow = 1;
+			chsc_process_crw();
 			break;
 		default:
 			pr_debug("unknown source\n");
@@ -146,8 +137,6 @@ repeat:
 		/* chain is always 0 or 1 here. */
 		chain = crw[chain].chn ? chain + 1 : 0;
 	}
-	if (slow)
-		queue_work(slow_path_wq, &slow_path_work);
 	goto repeat;
 	return 0;
 }
diff --git a/drivers/s390/sysinfo.c b/drivers/s390/sysinfo.c
index 090743d2f91..19343f9675c 100644
--- a/drivers/s390/sysinfo.c
+++ b/drivers/s390/sysinfo.c
@@ -357,6 +357,24 @@ static __init int create_proc_sysinfo(void)
 
 __initcall(create_proc_sysinfo);
 
+int get_cpu_capability(unsigned int *capability)
+{
+	struct sysinfo_1_2_2 *info;
+	int rc;
+
+	info = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	rc = stsi(info, 1, 2, 2);
+	if (rc == -ENOSYS)
+		goto out;
+	rc = 0;
+	*capability = info->capability;
+out:
+	free_page((unsigned long) info);
+	return rc;
+}
+
 /*
  * CPU capability might have changed. Therefore recalculate loops_per_jiffy.
  */
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 2cea4f5d208..f2be2ead874 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -726,7 +726,7 @@ static struct miscdevice envctrl_dev = {
  * Return: None.
  */
 static void envctrl_set_mon(struct i2c_child_t *pchild,
-			    char *chnl_desc,
+			    const char *chnl_desc,
 			    int chnl_no)
 {
 	/* Firmware only has temperature type.  It does not distinguish
@@ -763,8 +763,8 @@ static void envctrl_set_mon(struct i2c_child_t *pchild,
 static void envctrl_init_adc(struct i2c_child_t *pchild, struct device_node *dp)
 {
 	int i = 0, len;
-	char *pos;
-	unsigned int *pval;
+	const char *pos;
+	const unsigned int *pval;
 
 	/* Firmware describe channels into a stream separated by a '\0'. */
 	pos = of_get_property(dp, "channels-description", &len);
@@ -859,7 +859,7 @@ static void envctrl_init_i2c_child(struct linux_ebus_child *edev_child,
 {
 	int len, i, tbls_size = 0;
 	struct device_node *dp = edev_child->prom_node;
-	void *pval;
+	const void *pval;
 
 	/* Get device address. */
 	pval = of_get_property(dp, "reg", &len);
diff --git a/drivers/sbus/char/flash.c b/drivers/sbus/char/flash.c
index 6e99507aeb1..262f01e6859 100644
--- a/drivers/sbus/char/flash.c
+++ b/drivers/sbus/char/flash.c
@@ -190,7 +190,7 @@ static int __init flash_init(void)
 	}
 	if (!sdev) {
 #ifdef CONFIG_PCI
-		struct linux_prom_registers *ebus_regs;
+		const struct linux_prom_registers *ebus_regs;
 
 		for_each_ebus(ebus) {
 			for_each_ebusdev(edev, ebus) {
diff --git a/drivers/sbus/char/openprom.c b/drivers/sbus/char/openprom.c
index 5041c9dfbe3..fbfeb89a6f3 100644
--- a/drivers/sbus/char/openprom.c
+++ b/drivers/sbus/char/openprom.c
@@ -44,7 +44,6 @@
 #include <asm/openpromio.h>
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
-#include <asm/pbm.h>
 #endif
 
 MODULE_AUTHOR("Thomas K. Dyas (tdyas@noc.rutgers.edu) and Eddie C. Dost  (ecd@skynet.be)");
@@ -141,7 +140,7 @@ static int copyout(void __user *info, struct openpromio *opp, int len)
 
 static int opromgetprop(void __user *argp, struct device_node *dp, struct openpromio *op, int bufsize)
 {
-	void *pval;
+	const void *pval;
 	int len;
 
 	if (!dp ||
@@ -248,18 +247,17 @@ static int oprompci2node(void __user *argp, struct device_node *dp, struct openp
 	if (bufsize >= 2*sizeof(int)) {
 #ifdef CONFIG_PCI
 		struct pci_dev *pdev;
-		struct pcidev_cookie *pcp;
+		struct device_node *dp;
+
 		pdev = pci_get_bus_and_slot (((int *) op->oprom_array)[0],
 				      ((int *) op->oprom_array)[1]);
 
-		pcp = pdev->sysdata;
-		if (pcp != NULL) {
-			dp = pcp->prom_node;
-			data->current_node = dp;
-			*((int *)op->oprom_array) = dp->node;
-			op->oprom_size = sizeof(int);
-			err = copyout(argp, op, bufsize + sizeof(int));
-		}
+		dp = pci_device_to_OF_node(pdev);
+		data->current_node = dp;
+		*((int *)op->oprom_array) = dp->node;
+		op->oprom_size = sizeof(int);
+		err = copyout(argp, op, bufsize + sizeof(int));
+
 		pci_dev_put(pdev);
 #endif
 	}
@@ -410,7 +408,7 @@ static int opiocget(void __user *argp, DATA *data)
 	struct opiocdesc op;
 	struct device_node *dp;
 	char *str;
-	void *pval;
+	const void *pval;
 	int err, len;
 
 	if (copy_from_user(&op, argp, sizeof(op)))
diff --git a/drivers/sbus/sbus.c b/drivers/sbus/sbus.c
index 6349dd617f8..eee590a51d8 100644
--- a/drivers/sbus/sbus.c
+++ b/drivers/sbus/sbus.c
@@ -35,7 +35,7 @@ struct sbus_bus *sbus_root;
 static void __init fill_sbus_device(struct device_node *dp, struct sbus_dev *sdev)
 {
 	unsigned long base;
-	void *pval;
+	const void *pval;
 	int len, err;
 
 	sdev->prom_node = dp->node;
@@ -86,7 +86,7 @@ static void __init fill_sbus_device(struct device_node *dp, struct sbus_dev *sde
 
 static void __init sbus_bus_ranges_init(struct device_node *dp, struct sbus_bus *sbus)
 {
-	void *pval;
+	const void *pval;
 	int len;
 
 	pval = of_get_property(dp, "ranges", &len);
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 4cd280e8696..fcc4cb6c7f4 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1763,9 +1763,15 @@ config SUN3X_ESP
 	  The ESP was an on-board SCSI controller used on Sun 3/80
 	  machines.  Say Y here to compile in support for it.
 
+config SCSI_ESP_CORE
+	tristate "ESP Scsi Driver Core"
+	depends on SCSI
+	select SCSI_SPI_ATTRS
+
 config SCSI_SUNESP
 	tristate "Sparc ESP Scsi Driver"
 	depends on SBUS && SCSI
+	select SCSI_ESP_CORE
 	help
 	  This is the driver for the Sun ESP SCSI host adapter. The ESP
 	  chipset is present in most SPARC SBUS-based computers.
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 79ecf4ebe6e..70cff4c599d 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -106,7 +106,8 @@ obj-$(CONFIG_MEGARAID_LEGACY)	+= megaraid.o
 obj-$(CONFIG_MEGARAID_NEWGEN)	+= megaraid/
 obj-$(CONFIG_MEGARAID_SAS)	+= megaraid/
 obj-$(CONFIG_SCSI_ACARD)	+= atp870u.o
-obj-$(CONFIG_SCSI_SUNESP)	+= esp.o
+obj-$(CONFIG_SCSI_ESP_CORE)	+= esp_scsi.o
+obj-$(CONFIG_SCSI_SUNESP)	+= sun_esp.o
 obj-$(CONFIG_SCSI_GDTH)		+= gdth.o
 obj-$(CONFIG_SCSI_INITIO)	+= initio.o
 obj-$(CONFIG_SCSI_INIA100)	+= a100u2w.o
diff --git a/drivers/scsi/esp.c b/drivers/scsi/esp.c
deleted file mode 100644
index 2c2fe80bc42..00000000000
--- a/drivers/scsi/esp.c
+++ /dev/null
@@ -1,4394 +0,0 @@
-/* esp.c: ESP Sun SCSI driver.
- *
- * Copyright (C) 1995, 1998, 2006 David S. Miller (davem@davemloft.net)
- */
-
-/* TODO:
- *
- * 1) Maybe disable parity checking in config register one for SCSI1
- *    targets.  (Gilmore says parity error on the SBus can lock up
- *    old sun4c's)
- * 2) Add support for DMA2 pipelining.
- * 3) Add tagged queueing.
- */
-
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-
-#include "esp.h"
-
-#include <asm/sbus.h>
-#include <asm/dma.h>
-#include <asm/system.h>
-#include <asm/ptrace.h>
-#include <asm/pgtable.h>
-#include <asm/oplib.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#ifndef __sparc_v9__
-#include <asm/machines.h>
-#include <asm/idprom.h>
-#endif
-
-#include <scsi/scsi.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/scsi_eh.h>
-#include <scsi/scsi_host.h>
-#include <scsi/scsi_tcq.h>
-
-#define DRV_VERSION "1.101"
-
-#define DEBUG_ESP
-/* #define DEBUG_ESP_HME */
-/* #define DEBUG_ESP_DATA */
-/* #define DEBUG_ESP_QUEUE */
-/* #define DEBUG_ESP_DISCONNECT */
-/* #define DEBUG_ESP_STATUS */
-/* #define DEBUG_ESP_PHASES */
-/* #define DEBUG_ESP_WORKBUS */
-/* #define DEBUG_STATE_MACHINE */
-/* #define DEBUG_ESP_CMDS */
-/* #define DEBUG_ESP_IRQS */
-/* #define DEBUG_SDTR */
-/* #define DEBUG_ESP_SG */
-
-/* Use the following to sprinkle debugging messages in a way which
- * suits you if combinations of the above become too verbose when
- * trying to track down a specific problem.
- */
-/* #define DEBUG_ESP_MISC */
-
-#if defined(DEBUG_ESP)
-#define ESPLOG(foo)  printk foo
-#else
-#define ESPLOG(foo)
-#endif /* (DEBUG_ESP) */
-
-#if defined(DEBUG_ESP_HME)
-#define ESPHME(foo)  printk foo
-#else
-#define ESPHME(foo)
-#endif
-
-#if defined(DEBUG_ESP_DATA)
-#define ESPDATA(foo)  printk foo
-#else
-#define ESPDATA(foo)
-#endif
-
-#if defined(DEBUG_ESP_QUEUE)
-#define ESPQUEUE(foo)  printk foo
-#else
-#define ESPQUEUE(foo)
-#endif
-
-#if defined(DEBUG_ESP_DISCONNECT)
-#define ESPDISC(foo)  printk foo
-#else
-#define ESPDISC(foo)
-#endif
-
-#if defined(DEBUG_ESP_STATUS)
-#define ESPSTAT(foo)  printk foo
-#else
-#define ESPSTAT(foo)
-#endif
-
-#if defined(DEBUG_ESP_PHASES)
-#define ESPPHASE(foo)  printk foo
-#else
-#define ESPPHASE(foo)
-#endif
-
-#if defined(DEBUG_ESP_WORKBUS)
-#define ESPBUS(foo)  printk foo
-#else
-#define ESPBUS(foo)
-#endif
-
-#if defined(DEBUG_ESP_IRQS)
-#define ESPIRQ(foo)  printk foo
-#else
-#define ESPIRQ(foo)
-#endif
-
-#if defined(DEBUG_SDTR)
-#define ESPSDTR(foo)  printk foo
-#else
-#define ESPSDTR(foo)
-#endif
-
-#if defined(DEBUG_ESP_MISC)
-#define ESPMISC(foo)  printk foo
-#else
-#define ESPMISC(foo)
-#endif
-
-/* Command phase enumeration. */
-enum {
-	not_issued    = 0x00,  /* Still in the issue_SC queue.          */
-
-	/* Various forms of selecting a target. */
-#define in_slct_mask    0x10
-	in_slct_norm  = 0x10,  /* ESP is arbitrating, normal selection  */
-	in_slct_stop  = 0x11,  /* ESP will select, then stop with IRQ   */
-	in_slct_msg   = 0x12,  /* select, then send a message           */
-	in_slct_tag   = 0x13,  /* select and send tagged queue msg      */
-	in_slct_sneg  = 0x14,  /* select and acquire sync capabilities  */
-
-	/* Any post selection activity. */
-#define in_phases_mask  0x20
-	in_datain     = 0x20,  /* Data is transferring from the bus     */
-	in_dataout    = 0x21,  /* Data is transferring to the bus       */
-	in_data_done  = 0x22,  /* Last DMA data operation done (maybe)  */
-	in_msgin      = 0x23,  /* Eating message from target            */
-	in_msgincont  = 0x24,  /* Eating more msg bytes from target     */
-	in_msgindone  = 0x25,  /* Decide what to do with what we got    */
-	in_msgout     = 0x26,  /* Sending message to target             */
-	in_msgoutdone = 0x27,  /* Done sending msg out                  */
-	in_cmdbegin   = 0x28,  /* Sending cmd after abnormal selection  */
-	in_cmdend     = 0x29,  /* Done sending slow cmd                 */
-	in_status     = 0x2a,  /* Was in status phase, finishing cmd    */
-	in_freeing    = 0x2b,  /* freeing the bus for cmd cmplt or disc */
-	in_the_dark   = 0x2c,  /* Don't know what bus phase we are in   */
-
-	/* Special states, ie. not normal bus transitions... */
-#define in_spec_mask    0x80
-	in_abortone   = 0x80,  /* Aborting one command currently        */
-	in_abortall   = 0x81,  /* Blowing away all commands we have     */
-	in_resetdev   = 0x82,  /* SCSI target reset in progress         */
-	in_resetbus   = 0x83,  /* SCSI bus reset in progress            */
-	in_tgterror   = 0x84,  /* Target did something stupid           */
-};
-
-enum {
-	/* Zero has special meaning, see skipahead[12]. */
-/*0*/	do_never,
-
-/*1*/	do_phase_determine,
-/*2*/	do_reset_bus,
-/*3*/	do_reset_complete,
-/*4*/	do_work_bus,
-/*5*/	do_intr_end
-};
-
-/* Forward declarations. */
-static irqreturn_t esp_intr(int irq, void *dev_id);
-
-/* Debugging routines */
-struct esp_cmdstrings {
-	u8 cmdchar;
-	char *text;
-} esp_cmd_strings[] = {
-	/* Miscellaneous */
-	{ ESP_CMD_NULL, "ESP_NOP", },
-	{ ESP_CMD_FLUSH, "FIFO_FLUSH", },
-	{ ESP_CMD_RC, "RSTESP", },
-	{ ESP_CMD_RS, "RSTSCSI", },
-	/* Disconnected State Group */
-	{ ESP_CMD_RSEL, "RESLCTSEQ", },
-	{ ESP_CMD_SEL, "SLCTNATN", },
-	{ ESP_CMD_SELA, "SLCTATN", },
-	{ ESP_CMD_SELAS, "SLCTATNSTOP", },
-	{ ESP_CMD_ESEL, "ENSLCTRESEL", },
-	{ ESP_CMD_DSEL, "DISSELRESEL", },
-	{ ESP_CMD_SA3, "SLCTATN3", },
-	{ ESP_CMD_RSEL3, "RESLCTSEQ", },
-	/* Target State Group */
-	{ ESP_CMD_SMSG, "SNDMSG", },
-	{ ESP_CMD_SSTAT, "SNDSTATUS", },
-	{ ESP_CMD_SDATA, "SNDDATA", },
-	{ ESP_CMD_DSEQ, "DISCSEQ", },
-	{ ESP_CMD_TSEQ, "TERMSEQ", },
-	{ ESP_CMD_TCCSEQ, "TRGTCMDCOMPSEQ", },
-	{ ESP_CMD_DCNCT, "DISC", },
-	{ ESP_CMD_RMSG, "RCVMSG", },
-	{ ESP_CMD_RCMD, "RCVCMD", },
-	{ ESP_CMD_RDATA, "RCVDATA", },
-	{ ESP_CMD_RCSEQ, "RCVCMDSEQ", },
-	/* Initiator State Group */
-	{ ESP_CMD_TI, "TRANSINFO", },
-	{ ESP_CMD_ICCSEQ, "INICMDSEQCOMP", },
-	{ ESP_CMD_MOK, "MSGACCEPTED", },
-	{ ESP_CMD_TPAD, "TPAD", },
-	{ ESP_CMD_SATN, "SATN", },
-	{ ESP_CMD_RATN, "RATN", },
-};
-#define NUM_ESP_COMMANDS  ((sizeof(esp_cmd_strings)) / (sizeof(struct esp_cmdstrings)))
-
-/* Print textual representation of an ESP command */
-static inline void esp_print_cmd(u8 espcmd)
-{
-	u8 dma_bit = espcmd & ESP_CMD_DMA;
-	int i;
-
-	espcmd &= ~dma_bit;
-	for (i = 0; i < NUM_ESP_COMMANDS; i++)
-		if (esp_cmd_strings[i].cmdchar == espcmd)
-			break;
-	if (i == NUM_ESP_COMMANDS)
-		printk("ESP_Unknown");
-	else
-		printk("%s%s", esp_cmd_strings[i].text,
-		       ((dma_bit) ? "+DMA" : ""));
-}
-
-/* Print the status register's value */
-static inline void esp_print_statreg(u8 statreg)
-{
-	u8 phase;
-
-	printk("STATUS<");
-	phase = statreg & ESP_STAT_PMASK;
-	printk("%s,", (phase == ESP_DOP ? "DATA-OUT" :
-		       (phase == ESP_DIP ? "DATA-IN" :
-			(phase == ESP_CMDP ? "COMMAND" :
-			 (phase == ESP_STATP ? "STATUS" :
-			  (phase == ESP_MOP ? "MSG-OUT" :
-			   (phase == ESP_MIP ? "MSG_IN" :
-			    "unknown")))))));
-	if (statreg & ESP_STAT_TDONE)
-		printk("TRANS_DONE,");
-	if (statreg & ESP_STAT_TCNT)
-		printk("TCOUNT_ZERO,");
-	if (statreg & ESP_STAT_PERR)
-		printk("P_ERROR,");
-	if (statreg & ESP_STAT_SPAM)
-		printk("SPAM,");
-	if (statreg & ESP_STAT_INTR)
-		printk("IRQ,");
-	printk(">");
-}
-
-/* Print the interrupt register's value */
-static inline void esp_print_ireg(u8 intreg)
-{
-	printk("INTREG< ");
-	if (intreg & ESP_INTR_S)
-		printk("SLCT_NATN ");
-	if (intreg & ESP_INTR_SATN)
-		printk("SLCT_ATN ");
-	if (intreg & ESP_INTR_RSEL)
-		printk("RSLCT ");
-	if (intreg & ESP_INTR_FDONE)
-		printk("FDONE ");
-	if (intreg & ESP_INTR_BSERV)
-		printk("BSERV ");
-	if (intreg & ESP_INTR_DC)
-		printk("DISCNCT ");
-	if (intreg & ESP_INTR_IC)
-		printk("ILL_CMD ");
-	if (intreg & ESP_INTR_SR)
-		printk("SCSI_BUS_RESET ");
-	printk(">");
-}
-
-/* Print the sequence step registers contents */
-static inline void esp_print_seqreg(u8 stepreg)
-{
-	stepreg &= ESP_STEP_VBITS;
-	printk("STEP<%s>",
-	       (stepreg == ESP_STEP_ASEL ? "SLCT_ARB_CMPLT" :
-		(stepreg == ESP_STEP_SID ? "1BYTE_MSG_SENT" :
-		 (stepreg == ESP_STEP_NCMD ? "NOT_IN_CMD_PHASE" :
-		  (stepreg == ESP_STEP_PPC ? "CMD_BYTES_LOST" :
-		   (stepreg == ESP_STEP_FINI4 ? "CMD_SENT_OK" :
-		    "UNKNOWN"))))));
-}
-
-static char *phase_string(int phase)
-{
-	switch (phase) {
-	case not_issued:
-		return "UNISSUED";
-	case in_slct_norm:
-		return "SLCTNORM";
-	case in_slct_stop:
-		return "SLCTSTOP";
-	case in_slct_msg:
-		return "SLCTMSG";
-	case in_slct_tag:
-		return "SLCTTAG";
-	case in_slct_sneg:
-		return "SLCTSNEG";
-	case in_datain:
-		return "DATAIN";
-	case in_dataout:
-		return "DATAOUT";
-	case in_data_done:
-		return "DATADONE";
-	case in_msgin:
-		return "MSGIN";
-	case in_msgincont:
-		return "MSGINCONT";
-	case in_msgindone:
-		return "MSGINDONE";
-	case in_msgout:
-		return "MSGOUT";
-	case in_msgoutdone:
-		return "MSGOUTDONE";
-	case in_cmdbegin:
-		return "CMDBEGIN";
-	case in_cmdend:
-		return "CMDEND";
-	case in_status:
-		return "STATUS";
-	case in_freeing:
-		return "FREEING";
-	case in_the_dark:
-		return "CLUELESS";
-	case in_abortone:
-		return "ABORTONE";
-	case in_abortall:
-		return "ABORTALL";
-	case in_resetdev:
-		return "RESETDEV";
-	case in_resetbus:
-		return "RESETBUS";
-	case in_tgterror:
-		return "TGTERROR";
-	default:
-		return "UNKNOWN";
-	};
-}
-
-#ifdef DEBUG_STATE_MACHINE
-static inline void esp_advance_phase(struct scsi_cmnd *s, int newphase)
-{
-	ESPLOG(("<%s>", phase_string(newphase)));
-	s->SCp.sent_command = s->SCp.phase;
-	s->SCp.phase = newphase;
-}
-#else
-#define esp_advance_phase(__s, __newphase) \
-	(__s)->SCp.sent_command = (__s)->SCp.phase; \
-	(__s)->SCp.phase = (__newphase);
-#endif
-
-#ifdef DEBUG_ESP_CMDS
-static inline void esp_cmd(struct esp *esp, u8 cmd)
-{
-	esp->espcmdlog[esp->espcmdent] = cmd;
-	esp->espcmdent = (esp->espcmdent + 1) & 31;
-	sbus_writeb(cmd, esp->eregs + ESP_CMD);
-}
-#else
-#define esp_cmd(__esp, __cmd)	\
-	sbus_writeb((__cmd), ((__esp)->eregs) + ESP_CMD)
-#endif
-
-#define ESP_INTSOFF(__dregs)	\
-	sbus_writel(sbus_readl((__dregs)+DMA_CSR)&~(DMA_INT_ENAB), (__dregs)+DMA_CSR)
-#define ESP_INTSON(__dregs)	\
-	sbus_writel(sbus_readl((__dregs)+DMA_CSR)|DMA_INT_ENAB, (__dregs)+DMA_CSR)
-#define ESP_IRQ_P(__dregs)	\
-	(sbus_readl((__dregs)+DMA_CSR) & (DMA_HNDL_INTR|DMA_HNDL_ERROR))
-
-/* How we use the various Linux SCSI data structures for operation.
- *
- * struct scsi_cmnd:
- *
- *   We keep track of the synchronous capabilities of a target
- *   in the device member, using sync_min_period and
- *   sync_max_offset.  These are the values we directly write
- *   into the ESP registers while running a command.  If offset
- *   is zero the ESP will use asynchronous transfers.
- *   If the borken flag is set we assume we shouldn't even bother
- *   trying to negotiate for synchronous transfer as this target
- *   is really stupid.  If we notice the target is dropping the
- *   bus, and we have been allowing it to disconnect, we clear
- *   the disconnect flag.
- */
-
-
-/* Manipulation of the ESP command queues.  Thanks to the aha152x driver
- * and its author, Juergen E. Fischer, for the methods used here.
- * Note that these are per-ESP queues, not global queues like
- * the aha152x driver uses.
- */
-static inline void append_SC(struct scsi_cmnd **SC, struct scsi_cmnd *new_SC)
-{
-	struct scsi_cmnd *end;
-
-	new_SC->host_scribble = (unsigned char *) NULL;
-	if (!*SC)
-		*SC = new_SC;
-	else {
-		for (end=*SC;end->host_scribble;end=(struct scsi_cmnd *)end->host_scribble)
-			;
-		end->host_scribble = (unsigned char *) new_SC;
-	}
-}
-
-static inline void prepend_SC(struct scsi_cmnd **SC, struct scsi_cmnd *new_SC)
-{
-	new_SC->host_scribble = (unsigned char *) *SC;
-	*SC = new_SC;
-}
-
-static inline struct scsi_cmnd *remove_first_SC(struct scsi_cmnd **SC)
-{
-	struct scsi_cmnd *ptr;
-	ptr = *SC;
-	if (ptr)
-		*SC = (struct scsi_cmnd *) (*SC)->host_scribble;
-	return ptr;
-}
-
-static inline struct scsi_cmnd *remove_SC(struct scsi_cmnd **SC, int target, int lun)
-{
-	struct scsi_cmnd *ptr, *prev;
-
-	for (ptr = *SC, prev = NULL;
-	     ptr && ((ptr->device->id != target) || (ptr->device->lun != lun));
-	     prev = ptr, ptr = (struct scsi_cmnd *) ptr->host_scribble)
-		;
-	if (ptr) {
-		if (prev)
-			prev->host_scribble=ptr->host_scribble;
-		else
-			*SC=(struct scsi_cmnd *)ptr->host_scribble;
-	}
-	return ptr;
-}
-
-/* Resetting various pieces of the ESP scsi driver chipset/buses. */
-static void esp_reset_dma(struct esp *esp)
-{
-	int can_do_burst16, can_do_burst32, can_do_burst64;
-	int can_do_sbus64;
-	u32 tmp;
-
-	can_do_burst16 = (esp->bursts & DMA_BURST16) != 0;
-	can_do_burst32 = (esp->bursts & DMA_BURST32) != 0;
-	can_do_burst64 = 0;
-	can_do_sbus64 = 0;
-	if (sbus_can_dma_64bit(esp->sdev))
-		can_do_sbus64 = 1;
-	if (sbus_can_burst64(esp->sdev))
-		can_do_burst64 = (esp->bursts & DMA_BURST64) != 0;
-
-	/* Punt the DVMA into a known state. */
-	if (esp->dma->revision != dvmahme) {
-		tmp = sbus_readl(esp->dregs + DMA_CSR);
-		sbus_writel(tmp | DMA_RST_SCSI, esp->dregs + DMA_CSR);
-		sbus_writel(tmp & ~DMA_RST_SCSI, esp->dregs + DMA_CSR);
-	}
-	switch (esp->dma->revision) {
-	case dvmahme:
-		/* This is the HME DVMA gate array. */
-
-		sbus_writel(DMA_RESET_FAS366, esp->dregs + DMA_CSR);
-		sbus_writel(DMA_RST_SCSI, esp->dregs + DMA_CSR);
-
-		esp->prev_hme_dmacsr = (DMA_PARITY_OFF|DMA_2CLKS|DMA_SCSI_DISAB|DMA_INT_ENAB);
-		esp->prev_hme_dmacsr &= ~(DMA_ENABLE|DMA_ST_WRITE|DMA_BRST_SZ);
-
-		if (can_do_burst64)
-			esp->prev_hme_dmacsr |= DMA_BRST64;
-		else if (can_do_burst32)
-			esp->prev_hme_dmacsr |= DMA_BRST32;
-
-		if (can_do_sbus64) {
-			esp->prev_hme_dmacsr |= DMA_SCSI_SBUS64;
-			sbus_set_sbus64(esp->sdev, esp->bursts);
-		}
-
-		/* This chip is horrible. */
-		while (sbus_readl(esp->dregs + DMA_CSR) & DMA_PEND_READ)
-			udelay(1);
-
-		sbus_writel(0, esp->dregs + DMA_CSR);
-		sbus_writel(esp->prev_hme_dmacsr, esp->dregs + DMA_CSR);
-
-		/* This is necessary to avoid having the SCSI channel
-		 * engine lock up on us.
-		 */
-		sbus_writel(0, esp->dregs + DMA_ADDR);
-
-		break;
-	case dvmarev2:
-		/* This is the gate array found in the sun4m
-		 * NCR SBUS I/O subsystem.
-		 */
-		if (esp->erev != esp100) {
-			tmp = sbus_readl(esp->dregs + DMA_CSR);
-			sbus_writel(tmp | DMA_3CLKS, esp->dregs + DMA_CSR);
-		}
-		break;
-	case dvmarev3:
-		tmp = sbus_readl(esp->dregs + DMA_CSR);
-		tmp &= ~DMA_3CLKS;
-		tmp |= DMA_2CLKS;
-		if (can_do_burst32) {
-			tmp &= ~DMA_BRST_SZ;
-			tmp |= DMA_BRST32;
-		}
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-		break;
-	case dvmaesc1:
-		/* This is the DMA unit found on SCSI/Ether cards. */
-		tmp = sbus_readl(esp->dregs + DMA_CSR);
-		tmp |= DMA_ADD_ENABLE;
-		tmp &= ~DMA_BCNT_ENAB;
-		if (!can_do_burst32 && can_do_burst16) {
-			tmp |= DMA_ESC_BURST;
-		} else {
-			tmp &= ~(DMA_ESC_BURST);
-		}
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-		break;
-	default:
-		break;
-	};
-	ESP_INTSON(esp->dregs);
-}
-
-/* Reset the ESP chip, _not_ the SCSI bus. */
-static void __init esp_reset_esp(struct esp *esp)
-{
-	u8 family_code, version;
-	int i;
-
-	/* Now reset the ESP chip */
-	esp_cmd(esp, ESP_CMD_RC);
-	esp_cmd(esp, ESP_CMD_NULL | ESP_CMD_DMA);
-	esp_cmd(esp, ESP_CMD_NULL | ESP_CMD_DMA);
-
-	/* Reload the configuration registers */
-	sbus_writeb(esp->cfact, esp->eregs + ESP_CFACT);
-	esp->prev_stp = 0;
-	sbus_writeb(esp->prev_stp, esp->eregs + ESP_STP);
-	esp->prev_soff = 0;
-	sbus_writeb(esp->prev_soff, esp->eregs + ESP_SOFF);
-	sbus_writeb(esp->neg_defp, esp->eregs + ESP_TIMEO);
-
-	/* This is the only point at which it is reliable to read
-	 * the ID-code for a fast ESP chip variants.
-	 */
-	esp->max_period = ((35 * esp->ccycle) / 1000);
-	if (esp->erev == fast) {
-		version = sbus_readb(esp->eregs + ESP_UID);
-		family_code = (version & 0xf8) >> 3;
-		if (family_code == 0x02)
-			esp->erev = fas236;
-		else if (family_code == 0x0a)
-			esp->erev = fashme; /* Version is usually '5'. */
-		else
-			esp->erev = fas100a;
-		ESPMISC(("esp%d: FAST chip is %s (family=%d, version=%d)\n",
-			 esp->esp_id,
-			 (esp->erev == fas236) ? "fas236" :
-			 ((esp->erev == fas100a) ? "fas100a" :
-			  "fasHME"), family_code, (version & 7)));
-
-		esp->min_period = ((4 * esp->ccycle) / 1000);
-	} else {
-		esp->min_period = ((5 * esp->ccycle) / 1000);
-	}
-	esp->max_period = (esp->max_period + 3)>>2;
-	esp->min_period = (esp->min_period + 3)>>2;
-
-	sbus_writeb(esp->config1, esp->eregs + ESP_CFG1);
-	switch (esp->erev) {
-	case esp100:
-		/* nothing to do */
-		break;
-	case esp100a:
-		sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		break;
-	case esp236:
-		/* Slow 236 */
-		sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		esp->prev_cfg3 = esp->config3[0];
-		sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-		break;
-	case fashme:
-		esp->config2 |= (ESP_CONFIG2_HME32 | ESP_CONFIG2_HMEFENAB);
-		/* fallthrough... */
-	case fas236:
-		/* Fast 236 or HME */
-		sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		for (i = 0; i < 16; i++) {
-			if (esp->erev == fashme) {
-				u8 cfg3;
-
-				cfg3 = ESP_CONFIG3_FCLOCK | ESP_CONFIG3_OBPUSH;
-				if (esp->scsi_id >= 8)
-					cfg3 |= ESP_CONFIG3_IDBIT3;
-				esp->config3[i] |= cfg3;
-			} else {
-				esp->config3[i] |= ESP_CONFIG3_FCLK;
-			}
-		}
-		esp->prev_cfg3 = esp->config3[0];
-		sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-		if (esp->erev == fashme) {
-			esp->radelay = 80;
-		} else {
-			if (esp->diff)
-				esp->radelay = 0;
-			else
-				esp->radelay = 96;
-		}
-		break;
-	case fas100a:
-		/* Fast 100a */
-		sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		for (i = 0; i < 16; i++)
-			esp->config3[i] |= ESP_CONFIG3_FCLOCK;
-		esp->prev_cfg3 = esp->config3[0];
-		sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-		esp->radelay = 32;
-		break;
-	default:
-		panic("esp: what could it be... I wonder...");
-		break;
-	};
-
-	/* Eat any bitrot in the chip */
-	sbus_readb(esp->eregs + ESP_INTRPT);
-	udelay(100);
-}
-
-/* This places the ESP into a known state at boot time. */
-static void __init esp_bootup_reset(struct esp *esp)
-{
-	u8 tmp;
-
-	/* Reset the DMA */
-	esp_reset_dma(esp);
-
-	/* Reset the ESP */
-	esp_reset_esp(esp);
-
-	/* Reset the SCSI bus, but tell ESP not to generate an irq */
-	tmp = sbus_readb(esp->eregs + ESP_CFG1);
-	tmp |= ESP_CONFIG1_SRRDISAB;
-	sbus_writeb(tmp, esp->eregs + ESP_CFG1);
-
-	esp_cmd(esp, ESP_CMD_RS);
-	udelay(400);
-
-	sbus_writeb(esp->config1, esp->eregs + ESP_CFG1);
-
-	/* Eat any bitrot in the chip and we are done... */
-	sbus_readb(esp->eregs + ESP_INTRPT);
-}
-
-static int __init esp_find_dvma(struct esp *esp, struct sbus_dev *dma_sdev)
-{
-	struct sbus_dev *sdev = esp->sdev;
-	struct sbus_dma *dma;
-
-	if (dma_sdev != NULL) {
-		for_each_dvma(dma) {
-			if (dma->sdev == dma_sdev)
-				break;
-		}
-	} else {
-		for_each_dvma(dma) {
-			/* If allocated already, can't use it. */
-			if (dma->allocated)
-				continue;
-
-			if (dma->sdev == NULL)
-				break;
-
-			/* If bus + slot are the same and it has the
-			 * correct OBP name, it's ours.
-			 */
-			if (sdev->bus == dma->sdev->bus &&
-			    sdev->slot == dma->sdev->slot &&
-			    (!strcmp(dma->sdev->prom_name, "dma") ||
-			     !strcmp(dma->sdev->prom_name, "espdma")))
-				break;
-		}
-	}
-
-	/* If we don't know how to handle the dvma,
-	 * do not use this device.
-	 */
-	if (dma == NULL) {
-		printk("Cannot find dvma for ESP%d's SCSI\n", esp->esp_id);
-		return -1;
-	}
-	if (dma->allocated) {
-		printk("esp%d: can't use my espdma\n", esp->esp_id);
-		return -1;
-	}
-	dma->allocated = 1;
-	esp->dma = dma;
-	esp->dregs = dma->regs;
-
-	return 0;
-}
-
-static int __init esp_map_regs(struct esp *esp, int hme)
-{
-	struct sbus_dev *sdev = esp->sdev;
-	struct resource *res;
-
-	/* On HME, two reg sets exist, first is DVMA,
-	 * second is ESP registers.
-	 */
-	if (hme)
-		res = &sdev->resource[1];
-	else
-		res = &sdev->resource[0];
-
-	esp->eregs = sbus_ioremap(res, 0, ESP_REG_SIZE, "ESP Registers");
-
-	if (esp->eregs == 0)
-		return -1;
-	return 0;
-}
-
-static int __init esp_map_cmdarea(struct esp *esp)
-{
-	struct sbus_dev *sdev = esp->sdev;
-
-	esp->esp_command = sbus_alloc_consistent(sdev, 16,
-						 &esp->esp_command_dvma);
-	if (esp->esp_command == NULL ||
-	    esp->esp_command_dvma == 0)
-		return -1;
-	return 0;
-}
-
-static int __init esp_register_irq(struct esp *esp)
-{
-	esp->ehost->irq = esp->irq = esp->sdev->irqs[0];
-
-	/* We used to try various overly-clever things to
-	 * reduce the interrupt processing overhead on
-	 * sun4c/sun4m when multiple ESP's shared the
-	 * same IRQ.  It was too complex and messy to
-	 * sanely maintain.
-	 */
-	if (request_irq(esp->ehost->irq, esp_intr,
-			IRQF_SHARED, "ESP SCSI", esp)) {
-		printk("esp%d: Cannot acquire irq line\n",
-		       esp->esp_id);
-		return -1;
-	}
-
-	printk("esp%d: IRQ %d ", esp->esp_id,
-	       esp->ehost->irq);
-
-	return 0;
-}
-
-static void __init esp_get_scsi_id(struct esp *esp)
-{
-	struct sbus_dev *sdev = esp->sdev;
-	struct device_node *dp = sdev->ofdev.node;
-
-	esp->scsi_id = of_getintprop_default(dp,
-					     "initiator-id",
-					     -1);
-	if (esp->scsi_id == -1)
-		esp->scsi_id = of_getintprop_default(dp,
-						     "scsi-initiator-id",
-						     -1);
-	if (esp->scsi_id == -1)
-		esp->scsi_id = (sdev->bus == NULL) ? 7 :
-			of_getintprop_default(sdev->bus->ofdev.node,
-					      "scsi-initiator-id",
-					      7);
-	esp->ehost->this_id = esp->scsi_id;
-	esp->scsi_id_mask = (1 << esp->scsi_id);
-
-}
-
-static void __init esp_get_clock_params(struct esp *esp)
-{
-	struct sbus_dev *sdev = esp->sdev;
-	int prom_node = esp->prom_node;
-	int sbus_prom_node;
-	unsigned int fmhz;
-	u8 ccf;
-
-	if (sdev != NULL && sdev->bus != NULL)
-		sbus_prom_node = sdev->bus->prom_node;
-	else
-		sbus_prom_node = 0;
-
-	/* This is getting messy but it has to be done
-	 * correctly or else you get weird behavior all
-	 * over the place.  We are trying to basically
-	 * figure out three pieces of information.
-	 *
-	 * a) Clock Conversion Factor
-	 *
-	 *    This is a representation of the input
-	 *    crystal clock frequency going into the
-	 *    ESP on this machine.  Any operation whose
-	 *    timing is longer than 400ns depends on this
-	 *    value being correct.  For example, you'll
-	 *    get blips for arbitration/selection during
-	 *    high load or with multiple targets if this
-	 *    is not set correctly.
-	 *
-	 * b) Selection Time-Out
-	 *
-	 *    The ESP isn't very bright and will arbitrate
-	 *    for the bus and try to select a target
-	 *    forever if you let it.  This value tells
-	 *    the ESP when it has taken too long to
-	 *    negotiate and that it should interrupt
-	 *    the CPU so we can see what happened.
-	 *    The value is computed as follows (from
-	 *    NCR/Symbios chip docs).
-	 *
-	 *          (Time Out Period) *  (Input Clock)
-	 *    STO = ----------------------------------
-	 *          (8192) * (Clock Conversion Factor)
-	 *
-	 *    You usually want the time out period to be
-	 *    around 250ms, I think we'll set it a little
-	 *    bit higher to account for fully loaded SCSI
-	 *    bus's and slow devices that don't respond so
-	 *    quickly to selection attempts. (yeah, I know
-	 *    this is out of spec. but there is a lot of
-	 *    buggy pieces of firmware out there so bite me)
-	 *
-	 * c) Imperical constants for synchronous offset
-	 *    and transfer period register values
-	 *
-	 *    This entails the smallest and largest sync
-	 *    period we could ever handle on this ESP.
-	 */
-
-	fmhz = prom_getintdefault(prom_node, "clock-frequency", -1);
-	if (fmhz == -1)
-		fmhz = (!sbus_prom_node) ? 0 :
-			prom_getintdefault(sbus_prom_node, "clock-frequency", -1);
-
-	if (fmhz <= (5000000))
-		ccf = 0;
-	else
-		ccf = (((5000000 - 1) + (fmhz))/(5000000));
-
-	if (!ccf || ccf > 8) {
-		/* If we can't find anything reasonable,
-		 * just assume 20MHZ.  This is the clock
-		 * frequency of the older sun4c's where I've
-		 * been unable to find the clock-frequency
-		 * PROM property.  All other machines provide
-		 * useful values it seems.
-		 */
-		ccf = ESP_CCF_F4;
-		fmhz = (20000000);
-	}
-
-	if (ccf == (ESP_CCF_F7 + 1))
-		esp->cfact = ESP_CCF_F0;
-	else if (ccf == ESP_CCF_NEVER)
-		esp->cfact = ESP_CCF_F2;
-	else
-		esp->cfact = ccf;
-	esp->raw_cfact = ccf;
-
-	esp->cfreq = fmhz;
-	esp->ccycle = ESP_MHZ_TO_CYCLE(fmhz);
-	esp->ctick = ESP_TICK(ccf, esp->ccycle);
-	esp->neg_defp = ESP_NEG_DEFP(fmhz, ccf);
-	esp->sync_defp = SYNC_DEFP_SLOW;
-
-	printk("SCSI ID %d Clk %dMHz CCYC=%d CCF=%d TOut %d ",
-	       esp->scsi_id, (fmhz / 1000000),
-	       (int)esp->ccycle, (int)ccf, (int) esp->neg_defp);
-}
-
-static void __init esp_get_bursts(struct esp *esp, struct sbus_dev *dma)
-{
-	struct sbus_dev *sdev = esp->sdev;
-	u8 bursts;
-
-	bursts = prom_getintdefault(esp->prom_node, "burst-sizes", 0xff);
-
-	if (dma) {
-		u8 tmp = prom_getintdefault(dma->prom_node,
-					    "burst-sizes", 0xff);
-		if (tmp != 0xff)
-			bursts &= tmp;
-	}
-
-	if (sdev->bus) {
-		u8 tmp = prom_getintdefault(sdev->bus->prom_node,
-					    "burst-sizes", 0xff);
-		if (tmp != 0xff)
-			bursts &= tmp;
-	}
-
-	if (bursts == 0xff ||
-	    (bursts & DMA_BURST16) == 0 ||
-	    (bursts & DMA_BURST32) == 0)
-		bursts = (DMA_BURST32 - 1);
-
-	esp->bursts = bursts;
-}
-
-static void __init esp_get_revision(struct esp *esp)
-{
-	u8 tmp;
-
-	esp->config1 = (ESP_CONFIG1_PENABLE | (esp->scsi_id & 7));
-	esp->config2 = (ESP_CONFIG2_SCSI2ENAB | ESP_CONFIG2_REGPARITY);
-	sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-
-	tmp = sbus_readb(esp->eregs + ESP_CFG2);
-	tmp &= ~ESP_CONFIG2_MAGIC;
-	if (tmp != (ESP_CONFIG2_SCSI2ENAB | ESP_CONFIG2_REGPARITY)) {
-		/* If what we write to cfg2 does not come back, cfg2
-		 * is not implemented, therefore this must be a plain
-		 * esp100.
-		 */
-		esp->erev = esp100;
-		printk("NCR53C90(esp100)\n");
-	} else {
-		esp->config2 = 0;
-		esp->prev_cfg3 = esp->config3[0] = 5;
-		sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		sbus_writeb(0, esp->eregs + ESP_CFG3);
-		sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-
-		tmp = sbus_readb(esp->eregs + ESP_CFG3);
-		if (tmp != 5) {
-			/* The cfg2 register is implemented, however
-			 * cfg3 is not, must be esp100a.
-			 */
-			esp->erev = esp100a;
-			printk("NCR53C90A(esp100a)\n");
-		} else {
-			int target;
-
-			for (target = 0; target < 16; target++)
-				esp->config3[target] = 0;
-			esp->prev_cfg3 = 0;
-			sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-
-			/* All of cfg{1,2,3} implemented, must be one of
-			 * the fas variants, figure out which one.
-			 */
-			if (esp->raw_cfact > ESP_CCF_F5) {
-				esp->erev = fast;
-				esp->sync_defp = SYNC_DEFP_FAST;
-				printk("NCR53C9XF(espfast)\n");
-			} else {
-				esp->erev = esp236;
-				printk("NCR53C9x(esp236)\n");
-			}
-			esp->config2 = 0;
-			sbus_writeb(esp->config2, esp->eregs + ESP_CFG2);
-		}
-	}
-}
-
-static void __init esp_init_swstate(struct esp *esp)
-{
-	int i;
-
-	/* Command queues... */
-	esp->current_SC = NULL;
-	esp->disconnected_SC = NULL;
-	esp->issue_SC = NULL;
-
-	/* Target and current command state... */
-	esp->targets_present = 0;
-	esp->resetting_bus = 0;
-	esp->snip = 0;
-
-	init_waitqueue_head(&esp->reset_queue);
-
-	/* Debugging... */
-	for(i = 0; i < 32; i++)
-		esp->espcmdlog[i] = 0;
-	esp->espcmdent = 0;
-
-	/* MSG phase state... */
-	for(i = 0; i < 16; i++) {
-		esp->cur_msgout[i] = 0;
-		esp->cur_msgin[i] = 0;
-	}
-	esp->prevmsgout = esp->prevmsgin = 0;
-	esp->msgout_len = esp->msgin_len = 0;
-
-	/* Clear the one behind caches to hold unmatchable values. */
-	esp->prev_soff = esp->prev_stp = esp->prev_cfg3 = 0xff;
-	esp->prev_hme_dmacsr = 0xffffffff;
-}
-
-static int __init detect_one_esp(struct scsi_host_template *tpnt,
-				 struct device *dev,
-				 struct sbus_dev *esp_dev,
-				 struct sbus_dev *espdma,
-				 struct sbus_bus *sbus,
-				 int hme)
-{
-	static int instance;
-	struct Scsi_Host *esp_host = scsi_host_alloc(tpnt, sizeof(struct esp));
-	struct esp *esp;
-	
-	if (!esp_host)
-		return -ENOMEM;
-
-	if (hme)
-		esp_host->max_id = 16;
-	esp = (struct esp *) esp_host->hostdata;
-	esp->ehost = esp_host;
-	esp->sdev = esp_dev;
-	esp->esp_id = instance;
-	esp->prom_node = esp_dev->prom_node;
-	prom_getstring(esp->prom_node, "name", esp->prom_name,
-		       sizeof(esp->prom_name));
-
-	if (esp_find_dvma(esp, espdma) < 0)
-		goto fail_unlink;
-	if (esp_map_regs(esp, hme) < 0) {
-		printk("ESP registers unmappable");
-		goto fail_dvma_release;
-	}
-	if (esp_map_cmdarea(esp) < 0) {
-		printk("ESP DVMA transport area unmappable");
-		goto fail_unmap_regs;
-	}
-	if (esp_register_irq(esp) < 0)
-		goto fail_unmap_cmdarea;
-
-	esp_get_scsi_id(esp);
-
-	esp->diff = prom_getbool(esp->prom_node, "differential");
-	if (esp->diff)
-		printk("Differential ");
-
-	esp_get_clock_params(esp);
-	esp_get_bursts(esp, espdma);
-	esp_get_revision(esp);
-	esp_init_swstate(esp);
-
-	esp_bootup_reset(esp);
-
-	if (scsi_add_host(esp_host, dev))
-		goto fail_free_irq;
-
-	dev_set_drvdata(&esp_dev->ofdev.dev, esp);
-
-	scsi_scan_host(esp_host);
-	instance++;
-
-	return 0;
-
-fail_free_irq:
-	free_irq(esp->ehost->irq, esp);
-
-fail_unmap_cmdarea:
-	sbus_free_consistent(esp->sdev, 16,
-			     (void *) esp->esp_command,
-			     esp->esp_command_dvma);
-
-fail_unmap_regs:
-	sbus_iounmap(esp->eregs, ESP_REG_SIZE);
-
-fail_dvma_release:
-	esp->dma->allocated = 0;
-
-fail_unlink:
-	scsi_host_put(esp_host);
-	return -1;
-}
-
-/* Detecting ESP chips on the machine.  This is the simple and easy
- * version.
- */
-static int __devexit esp_remove_common(struct esp *esp)
-{
-	unsigned int irq = esp->ehost->irq;
-
-	scsi_remove_host(esp->ehost);
-
-	ESP_INTSOFF(esp->dregs);
-#if 0
-	esp_reset_dma(esp);
-	esp_reset_esp(esp);
-#endif
-
-	free_irq(irq, esp);
-	sbus_free_consistent(esp->sdev, 16,
-			     (void *) esp->esp_command, esp->esp_command_dvma);
-	sbus_iounmap(esp->eregs, ESP_REG_SIZE);
-	esp->dma->allocated = 0;
-
-	scsi_host_put(esp->ehost);
-
-	return 0;
-}
-
-
-#ifdef CONFIG_SUN4
-
-#include <asm/sun4paddr.h>
-
-static struct sbus_dev sun4_esp_dev;
-
-static int __init esp_sun4_probe(struct scsi_host_template *tpnt)
-{
-	if (sun4_esp_physaddr) {
-		memset(&sun4_esp_dev, 0, sizeof(sun4_esp_dev));
-		sun4_esp_dev.reg_addrs[0].phys_addr = sun4_esp_physaddr;
-		sun4_esp_dev.irqs[0] = 4;
-		sun4_esp_dev.resource[0].start = sun4_esp_physaddr;
-		sun4_esp_dev.resource[0].end =
-			sun4_esp_physaddr + ESP_REG_SIZE - 1;
-		sun4_esp_dev.resource[0].flags = IORESOURCE_IO;
-
-		return detect_one_esp(tpnt, NULL,
-				      &sun4_esp_dev, NULL, NULL, 0);
-	}
-	return 0;
-}
-
-static int __devexit esp_sun4_remove(void)
-{
-	struct of_device *dev = &sun4_esp_dev.ofdev;
-	struct esp *esp = dev_get_drvdata(&dev->dev);
-
-	return esp_remove_common(esp);
-}
-
-#else /* !CONFIG_SUN4 */
-
-static int __devinit esp_sbus_probe(struct of_device *dev, const struct of_device_id *match)
-{
-	struct sbus_dev *sdev = to_sbus_device(&dev->dev);
-	struct device_node *dp = dev->node;
-	struct sbus_dev *dma_sdev = NULL;
-	int hme = 0;
-
-	if (dp->parent &&
-	    (!strcmp(dp->parent->name, "espdma") ||
-	     !strcmp(dp->parent->name, "dma")))
-		dma_sdev = sdev->parent;
-	else if (!strcmp(dp->name, "SUNW,fas")) {
-		dma_sdev = sdev;
-		hme = 1;
-	}
-
-	return detect_one_esp(match->data, &dev->dev,
-			      sdev, dma_sdev, sdev->bus, hme);
-}
-
-static int __devexit esp_sbus_remove(struct of_device *dev)
-{
-	struct esp *esp = dev_get_drvdata(&dev->dev);
-
-	return esp_remove_common(esp);
-}
-
-#endif /* !CONFIG_SUN4 */
-
-/* The info function will return whatever useful
- * information the developer sees fit.  If not provided, then
- * the name field will be used instead.
- */
-static const char *esp_info(struct Scsi_Host *host)
-{
-	struct esp *esp;
-
-	esp = (struct esp *) host->hostdata;
-	switch (esp->erev) {
-	case esp100:
-		return "Sparc ESP100 (NCR53C90)";
-	case esp100a:
-		return "Sparc ESP100A (NCR53C90A)";
-	case esp236:
-		return "Sparc ESP236";
-	case fas236:
-		return "Sparc ESP236-FAST";
-	case fashme:
-		return "Sparc ESP366-HME";
-	case fas100a:
-		return "Sparc ESP100A-FAST";
-	default:
-		return "Bogon ESP revision";
-	};
-}
-
-/* From Wolfgang Stanglmeier's NCR scsi driver. */
-struct info_str
-{
-	char *buffer;
-	int length;
-	int offset;
-	int pos;
-};
-
-static void copy_mem_info(struct info_str *info, char *data, int len)
-{
-	if (info->pos + len > info->length)
-		len = info->length - info->pos;
-
-	if (info->pos + len < info->offset) {
-		info->pos += len;
-		return;
-	}
-	if (info->pos < info->offset) {
-		data += (info->offset - info->pos);
-		len  -= (info->offset - info->pos);
-	}
-
-	if (len > 0) {
-		memcpy(info->buffer + info->pos, data, len);
-		info->pos += len;
-	}
-}
-
-static int copy_info(struct info_str *info, char *fmt, ...)
-{
-	va_list args;
-	char buf[81];
-	int len;
-
-	va_start(args, fmt);
-	len = vsprintf(buf, fmt, args);
-	va_end(args);
-
-	copy_mem_info(info, buf, len);
-	return len;
-}
-
-static int esp_host_info(struct esp *esp, char *ptr, off_t offset, int len)
-{
-	struct scsi_device *sdev;
-	struct info_str info;
-	int i;
-
-	info.buffer	= ptr;
-	info.length	= len;
-	info.offset	= offset;
-	info.pos	= 0;
-
-	copy_info(&info, "Sparc ESP Host Adapter:\n");
-	copy_info(&info, "\tPROM node\t\t%08x\n", (unsigned int) esp->prom_node);
-	copy_info(&info, "\tPROM name\t\t%s\n", esp->prom_name);
-	copy_info(&info, "\tESP Model\t\t");
-	switch (esp->erev) {
-	case esp100:
-		copy_info(&info, "ESP100\n");
-		break;
-	case esp100a:
-		copy_info(&info, "ESP100A\n");
-		break;
-	case esp236:
-		copy_info(&info, "ESP236\n");
-		break;
-	case fas236:
-		copy_info(&info, "FAS236\n");
-		break;
-	case fas100a:
-		copy_info(&info, "FAS100A\n");
-		break;
-	case fast:
-		copy_info(&info, "FAST\n");
-		break;
-	case fashme:
-		copy_info(&info, "Happy Meal FAS\n");
-		break;
-	case espunknown:
-	default:
-		copy_info(&info, "Unknown!\n");
-		break;
-	};
-	copy_info(&info, "\tDMA Revision\t\t");
-	switch (esp->dma->revision) {
-	case dvmarev0:
-		copy_info(&info, "Rev 0\n");
-		break;
-	case dvmaesc1:
-		copy_info(&info, "ESC Rev 1\n");
-		break;
-	case dvmarev1:
-		copy_info(&info, "Rev 1\n");
-		break;
-	case dvmarev2:
-		copy_info(&info, "Rev 2\n");
-		break;
-	case dvmarev3:
-		copy_info(&info, "Rev 3\n");
-		break;
-	case dvmarevplus:
-		copy_info(&info, "Rev 1+\n");
-		break;
-	case dvmahme:
-		copy_info(&info, "Rev HME/FAS\n");
-		break;
-	default:
-		copy_info(&info, "Unknown!\n");
-		break;
-	};
-	copy_info(&info, "\tLive Targets\t\t[ ");
-	for (i = 0; i < 15; i++) {
-		if (esp->targets_present & (1 << i))
-			copy_info(&info, "%d ", i);
-	}
-	copy_info(&info, "]\n\n");
-	
-	/* Now describe the state of each existing target. */
-	copy_info(&info, "Target #\tconfig3\t\tSync Capabilities\tDisconnect\tWide\n");
-
-	shost_for_each_device(sdev, esp->ehost) {
-		struct esp_device *esp_dev = sdev->hostdata;
-		uint id = sdev->id;
-
-		if (!(esp->targets_present & (1 << id)))
-			continue;
-
-		copy_info(&info, "%d\t\t", id);
-		copy_info(&info, "%08lx\t", esp->config3[id]);
-		copy_info(&info, "[%02lx,%02lx]\t\t\t",
-			esp_dev->sync_max_offset,
-			esp_dev->sync_min_period);
-		copy_info(&info, "%s\t\t",
-			esp_dev->disconnect ? "yes" : "no");
-		copy_info(&info, "%s\n",
-			(esp->config3[id] & ESP_CONFIG3_EWIDE) ? "yes" : "no");
-	}
-	return info.pos > info.offset? info.pos - info.offset : 0;
-}
-
-/* ESP proc filesystem code. */
-static int esp_proc_info(struct Scsi_Host *host, char *buffer, char **start, off_t offset,
-			 int length, int inout)
-{
-	struct esp *esp = (struct esp *) host->hostdata;
-
-	if (inout)
-		return -EINVAL; /* not yet */
-
-	if (start)
-		*start = buffer;
-
-	return esp_host_info(esp, buffer, offset, length);
-}
-
-static void esp_get_dmabufs(struct esp *esp, struct scsi_cmnd *sp)
-{
-	if (sp->use_sg == 0) {
-		sp->SCp.this_residual = sp->request_bufflen;
-		sp->SCp.buffer = (struct scatterlist *) sp->request_buffer;
-		sp->SCp.buffers_residual = 0;
-		if (sp->request_bufflen) {
-			sp->SCp.have_data_in = sbus_map_single(esp->sdev, sp->SCp.buffer,
-							       sp->SCp.this_residual,
-							       sp->sc_data_direction);
-			sp->SCp.ptr = (char *) ((unsigned long)sp->SCp.have_data_in);
-		} else {
-			sp->SCp.ptr = NULL;
-		}
-	} else {
-		sp->SCp.buffer = (struct scatterlist *) sp->request_buffer;
-		sp->SCp.buffers_residual = sbus_map_sg(esp->sdev,
-						       sp->SCp.buffer,
-						       sp->use_sg,
-						       sp->sc_data_direction);
-		sp->SCp.this_residual = sg_dma_len(sp->SCp.buffer);
-		sp->SCp.ptr = (char *) ((unsigned long)sg_dma_address(sp->SCp.buffer));
-	}
-}
-
-static void esp_release_dmabufs(struct esp *esp, struct scsi_cmnd *sp)
-{
-	if (sp->use_sg) {
-		sbus_unmap_sg(esp->sdev, sp->request_buffer, sp->use_sg,
-			      sp->sc_data_direction);
-	} else if (sp->request_bufflen) {
-		sbus_unmap_single(esp->sdev,
-				  sp->SCp.have_data_in,
-				  sp->request_bufflen,
-				  sp->sc_data_direction);
-	}
-}
-
-static void esp_restore_pointers(struct esp *esp, struct scsi_cmnd *sp)
-{
-	struct esp_pointers *ep = &esp->data_pointers[sp->device->id];
-
-	sp->SCp.ptr = ep->saved_ptr;
-	sp->SCp.buffer = ep->saved_buffer;
-	sp->SCp.this_residual = ep->saved_this_residual;
-	sp->SCp.buffers_residual = ep->saved_buffers_residual;
-}
-
-static void esp_save_pointers(struct esp *esp, struct scsi_cmnd *sp)
-{
-	struct esp_pointers *ep = &esp->data_pointers[sp->device->id];
-
-	ep->saved_ptr = sp->SCp.ptr;
-	ep->saved_buffer = sp->SCp.buffer;
-	ep->saved_this_residual = sp->SCp.this_residual;
-	ep->saved_buffers_residual = sp->SCp.buffers_residual;
-}
-
-/* Some rules:
- *
- *   1) Never ever panic while something is live on the bus.
- *      If there is to be any chance of syncing the disks this
- *      rule is to be obeyed.
- *
- *   2) Any target that causes a foul condition will no longer
- *      have synchronous transfers done to it, no questions
- *      asked.
- *
- *   3) Keep register accesses to a minimum.  Think about some
- *      day when we have Xbus machines this is running on and
- *      the ESP chip is on the other end of the machine on a
- *      different board from the cpu where this is running.
- */
-
-/* Fire off a command.  We assume the bus is free and that the only
- * case where we could see an interrupt is where we have disconnected
- * commands active and they are trying to reselect us.
- */
-static inline void esp_check_cmd(struct esp *esp, struct scsi_cmnd *sp)
-{
-	switch (sp->cmd_len) {
-	case 6:
-	case 10:
-	case 12:
-		esp->esp_slowcmd = 0;
-		break;
-
-	default:
-		esp->esp_slowcmd = 1;
-		esp->esp_scmdleft = sp->cmd_len;
-		esp->esp_scmdp = &sp->cmnd[0];
-		break;
-	};
-}
-
-static inline void build_sync_nego_msg(struct esp *esp, int period, int offset)
-{
-	esp->cur_msgout[0] = EXTENDED_MESSAGE;
-	esp->cur_msgout[1] = 3;
-	esp->cur_msgout[2] = EXTENDED_SDTR;
-	esp->cur_msgout[3] = period;
-	esp->cur_msgout[4] = offset;
-	esp->msgout_len = 5;
-}
-
-/* SIZE is in bits, currently HME only supports 16 bit wide transfers. */
-static inline void build_wide_nego_msg(struct esp *esp, int size)
-{
-	esp->cur_msgout[0] = EXTENDED_MESSAGE;
-	esp->cur_msgout[1] = 2;
-	esp->cur_msgout[2] = EXTENDED_WDTR;
-	switch (size) {
-	case 32:
-		esp->cur_msgout[3] = 2;
-		break;
-	case 16:
-		esp->cur_msgout[3] = 1;
-		break;
-	case 8:
-	default:
-		esp->cur_msgout[3] = 0;
-		break;
-	};
-
-	esp->msgout_len = 4;
-}
-
-static void esp_exec_cmd(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr;
-	struct scsi_device *SDptr;
-	struct esp_device *esp_dev;
-	volatile u8 *cmdp = esp->esp_command;
-	u8 the_esp_command;
-	int lun, target;
-	int i;
-
-	/* Hold off if we have disconnected commands and
-	 * an IRQ is showing...
-	 */
-	if (esp->disconnected_SC && ESP_IRQ_P(esp->dregs))
-		return;
-
-	/* Grab first member of the issue queue. */
-	SCptr = esp->current_SC = remove_first_SC(&esp->issue_SC);
-
-	/* Safe to panic here because current_SC is null. */
-	if (!SCptr)
-		panic("esp: esp_exec_cmd and issue queue is NULL");
-
-	SDptr = SCptr->device;
-	esp_dev = SDptr->hostdata;
-	lun = SCptr->device->lun;
-	target = SCptr->device->id;
-
-	esp->snip = 0;
-	esp->msgout_len = 0;
-
-	/* Send it out whole, or piece by piece?   The ESP
-	 * only knows how to automatically send out 6, 10,
-	 * and 12 byte commands.  I used to think that the
-	 * Linux SCSI code would never throw anything other
-	 * than that to us, but then again there is the
-	 * SCSI generic driver which can send us anything.
-	 */
-	esp_check_cmd(esp, SCptr);
-
-	/* If arbitration/selection is successful, the ESP will leave
-	 * ATN asserted, causing the target to go into message out
-	 * phase.  The ESP will feed the target the identify and then
-	 * the target can only legally go to one of command,
-	 * datain/out, status, or message in phase, or stay in message
-	 * out phase (should we be trying to send a sync negotiation
-	 * message after the identify).  It is not allowed to drop
-	 * BSY, but some buggy targets do and we check for this
-	 * condition in the selection complete code.  Most of the time
-	 * we'll make the command bytes available to the ESP and it
-	 * will not interrupt us until it finishes command phase, we
-	 * cannot do this for command sizes the ESP does not
-	 * understand and in this case we'll get interrupted right
-	 * when the target goes into command phase.
-	 *
-	 * It is absolutely _illegal_ in the presence of SCSI-2 devices
-	 * to use the ESP select w/o ATN command.  When SCSI-2 devices are
-	 * present on the bus we _must_ always go straight to message out
-	 * phase with an identify message for the target.  Being that
-	 * selection attempts in SCSI-1 w/o ATN was an option, doing SCSI-2
-	 * selections should not confuse SCSI-1 we hope.
-	 */
-
-	if (esp_dev->sync) {
-		/* this targets sync is known */
-#ifndef __sparc_v9__
-do_sync_known:
-#endif
-		if (esp_dev->disconnect)
-			*cmdp++ = IDENTIFY(1, lun);
-		else
-			*cmdp++ = IDENTIFY(0, lun);
-
-		if (esp->esp_slowcmd) {
-			the_esp_command = (ESP_CMD_SELAS | ESP_CMD_DMA);
-			esp_advance_phase(SCptr, in_slct_stop);
-		} else {
-			the_esp_command = (ESP_CMD_SELA | ESP_CMD_DMA);
-			esp_advance_phase(SCptr, in_slct_norm);
-		}
-	} else if (!(esp->targets_present & (1<<target)) || !(esp_dev->disconnect)) {
-		/* After the bootup SCSI code sends both the
-		 * TEST_UNIT_READY and INQUIRY commands we want
-		 * to at least attempt allowing the device to
-		 * disconnect.
-		 */
-		ESPMISC(("esp: Selecting device for first time. target=%d "
-			 "lun=%d\n", target, SCptr->device->lun));
-		if (!SDptr->borken && !esp_dev->disconnect)
-			esp_dev->disconnect = 1;
-
-		*cmdp++ = IDENTIFY(0, lun);
-		esp->prevmsgout = NOP;
-		esp_advance_phase(SCptr, in_slct_norm);
-		the_esp_command = (ESP_CMD_SELA | ESP_CMD_DMA);
-
-		/* Take no chances... */
-		esp_dev->sync_max_offset = 0;
-		esp_dev->sync_min_period = 0;
-	} else {
-		/* Sorry, I have had way too many problems with
-		 * various CDROM devices on ESP. -DaveM
-		 */
-		int cdrom_hwbug_wkaround = 0;
-
-#ifndef __sparc_v9__
-		/* Never allow disconnects or synchronous transfers on
-		 * SparcStation1 and SparcStation1+.  Allowing those
-		 * to be enabled seems to lockup the machine completely.
-		 */
-		if ((idprom->id_machtype == (SM_SUN4C | SM_4C_SS1)) ||
-		    (idprom->id_machtype == (SM_SUN4C | SM_4C_SS1PLUS))) {
-			/* But we are nice and allow tapes and removable
-			 * disks (but not CDROMs) to disconnect.
-			 */
-			if(SDptr->type == TYPE_TAPE ||
-			   (SDptr->type != TYPE_ROM && SDptr->removable))
-				esp_dev->disconnect = 1;
-			else
-				esp_dev->disconnect = 0;
-			esp_dev->sync_max_offset = 0;
-			esp_dev->sync_min_period = 0;
-			esp_dev->sync = 1;
-			esp->snip = 0;
-			goto do_sync_known;
-		}
-#endif /* !(__sparc_v9__) */
-
-		/* We've talked to this guy before,
-		 * but never negotiated.  Let's try,
-		 * need to attempt WIDE first, before
-		 * sync nego, as per SCSI 2 standard.
-		 */
-		if (esp->erev == fashme && !esp_dev->wide) {
-			if (!SDptr->borken &&
-			   SDptr->type != TYPE_ROM &&
-			   SDptr->removable == 0) {
-				build_wide_nego_msg(esp, 16);
-				esp_dev->wide = 1;
-				esp->wnip = 1;
-				goto after_nego_msg_built;
-			} else {
-				esp_dev->wide = 1;
-				/* Fall through and try sync. */
-			}
-		}
-
-		if (!SDptr->borken) {
-			if ((SDptr->type == TYPE_ROM)) {
-				/* Nice try sucker... */
-				ESPMISC(("esp%d: Disabling sync for buggy "
-					 "CDROM.\n", esp->esp_id));
-				cdrom_hwbug_wkaround = 1;
-				build_sync_nego_msg(esp, 0, 0);
-			} else if (SDptr->removable != 0) {
-				ESPMISC(("esp%d: Not negotiating sync/wide but "
-					 "allowing disconnect for removable media.\n",
-					 esp->esp_id));
-				build_sync_nego_msg(esp, 0, 0);
-			} else {
-				build_sync_nego_msg(esp, esp->sync_defp, 15);
-			}
-		} else {
-			build_sync_nego_msg(esp, 0, 0);
-		}
-		esp_dev->sync = 1;
-		esp->snip = 1;
-
-after_nego_msg_built:
-		/* A fix for broken SCSI1 targets, when they disconnect
-		 * they lock up the bus and confuse ESP.  So disallow
-		 * disconnects for SCSI1 targets for now until we
-		 * find a better fix.
-		 *
-		 * Addendum: This is funny, I figured out what was going
-		 *           on.  The blotzed SCSI1 target would disconnect,
-		 *           one of the other SCSI2 targets or both would be
-		 *           disconnected as well.  The SCSI1 target would
-		 *           stay disconnected long enough that we start
-		 *           up a command on one of the SCSI2 targets.  As
-		 *           the ESP is arbitrating for the bus the SCSI1
-		 *           target begins to arbitrate as well to reselect
-		 *           the ESP.  The SCSI1 target refuses to drop it's
-		 *           ID bit on the data bus even though the ESP is
-		 *           at ID 7 and is the obvious winner for any
-		 *           arbitration.  The ESP is a poor sport and refuses
-		 *           to lose arbitration, it will continue indefinitely
-		 *           trying to arbitrate for the bus and can only be
-		 *           stopped via a chip reset or SCSI bus reset.
-		 *           Therefore _no_ disconnects for SCSI1 targets
-		 *           thank you very much. ;-)
-		 */
-		if(((SDptr->scsi_level < 3) &&
-		    (SDptr->type != TYPE_TAPE) &&
-		    SDptr->removable == 0) ||
-		    cdrom_hwbug_wkaround || SDptr->borken) {
-			ESPMISC((KERN_INFO "esp%d: Disabling DISCONNECT for target %d "
-				 "lun %d\n", esp->esp_id, SCptr->device->id, SCptr->device->lun));
-			esp_dev->disconnect = 0;
-			*cmdp++ = IDENTIFY(0, lun);
-		} else {
-			*cmdp++ = IDENTIFY(1, lun);
-		}
-
-		/* ESP fifo is only so big...
-		 * Make this look like a slow command.
-		 */
-		esp->esp_slowcmd = 1;
-		esp->esp_scmdleft = SCptr->cmd_len;
-		esp->esp_scmdp = &SCptr->cmnd[0];
-
-		the_esp_command = (ESP_CMD_SELAS | ESP_CMD_DMA);
-		esp_advance_phase(SCptr, in_slct_msg);
-	}
-
-	if (!esp->esp_slowcmd)
-		for (i = 0; i < SCptr->cmd_len; i++)
-			*cmdp++ = SCptr->cmnd[i];
-
-	/* HME sucks... */
-	if (esp->erev == fashme)
-		sbus_writeb((target & 0xf) | (ESP_BUSID_RESELID | ESP_BUSID_CTR32BIT),
-			    esp->eregs + ESP_BUSID);
-	else
-		sbus_writeb(target & 7, esp->eregs + ESP_BUSID);
-	if (esp->prev_soff != esp_dev->sync_max_offset ||
-	    esp->prev_stp  != esp_dev->sync_min_period ||
-	    (esp->erev > esp100a &&
-	     esp->prev_cfg3 != esp->config3[target])) {
-		esp->prev_soff = esp_dev->sync_max_offset;
-		esp->prev_stp = esp_dev->sync_min_period;
-		sbus_writeb(esp->prev_soff, esp->eregs + ESP_SOFF);
-		sbus_writeb(esp->prev_stp, esp->eregs + ESP_STP);
-		if (esp->erev > esp100a) {
-			esp->prev_cfg3 = esp->config3[target];
-			sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-		}
-	}
-	i = (cmdp - esp->esp_command);
-
-	if (esp->erev == fashme) {
-		esp_cmd(esp, ESP_CMD_FLUSH); /* Grrr! */
-
-		/* Set up the DMA and HME counters */
-		sbus_writeb(i, esp->eregs + ESP_TCLOW);
-		sbus_writeb(0, esp->eregs + ESP_TCMED);
-		sbus_writeb(0, esp->eregs + FAS_RLO);
-		sbus_writeb(0, esp->eregs + FAS_RHI);
-		esp_cmd(esp, the_esp_command);
-
-		/* Talk about touchy hardware... */
-		esp->prev_hme_dmacsr = ((esp->prev_hme_dmacsr |
-					 (DMA_SCSI_DISAB | DMA_ENABLE)) &
-					~(DMA_ST_WRITE));
-		sbus_writel(16, esp->dregs + DMA_COUNT);
-		sbus_writel(esp->esp_command_dvma, esp->dregs + DMA_ADDR);
-		sbus_writel(esp->prev_hme_dmacsr, esp->dregs + DMA_CSR);
-	} else {
-		u32 tmp;
-
-		/* Set up the DMA and ESP counters */
-		sbus_writeb(i, esp->eregs + ESP_TCLOW);
-		sbus_writeb(0, esp->eregs + ESP_TCMED);
-		tmp = sbus_readl(esp->dregs + DMA_CSR);
-		tmp &= ~DMA_ST_WRITE;
-		tmp |= DMA_ENABLE;
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-		if (esp->dma->revision == dvmaesc1) {
-			if (i) /* Workaround ESC gate array SBUS rerun bug. */
-				sbus_writel(PAGE_SIZE, esp->dregs + DMA_COUNT);
-		}
-		sbus_writel(esp->esp_command_dvma, esp->dregs + DMA_ADDR);
-
-		/* Tell ESP to "go". */
-		esp_cmd(esp, the_esp_command);
-	}
-}
-
-/* Queue a SCSI command delivered from the mid-level Linux SCSI code. */
-static int esp_queue(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
-{
-	struct esp *esp;
-
-	/* Set up func ptr and initial driver cmd-phase. */
-	SCpnt->scsi_done = done;
-	SCpnt->SCp.phase = not_issued;
-
-	/* We use the scratch area. */
-	ESPQUEUE(("esp_queue: target=%d lun=%d ", SCpnt->device->id, SCpnt->device->lun));
-	ESPDISC(("N<%02x,%02x>", SCpnt->device->id, SCpnt->device->lun));
-
-	esp = (struct esp *) SCpnt->device->host->hostdata;
-	esp_get_dmabufs(esp, SCpnt);
-	esp_save_pointers(esp, SCpnt); /* FIXME for tag queueing */
-
-	SCpnt->SCp.Status           = CHECK_CONDITION;
-	SCpnt->SCp.Message          = 0xff;
-	SCpnt->SCp.sent_command     = 0;
-
-	/* Place into our queue. */
-	if (SCpnt->cmnd[0] == REQUEST_SENSE) {
-		ESPQUEUE(("RQSENSE\n"));
-		prepend_SC(&esp->issue_SC, SCpnt);
-	} else {
-		ESPQUEUE(("\n"));
-		append_SC(&esp->issue_SC, SCpnt);
-	}
-
-	/* Run it now if we can. */
-	if (!esp->current_SC && !esp->resetting_bus)
-		esp_exec_cmd(esp);
-
-	return 0;
-}
-
-/* Dump driver state. */
-static void esp_dump_cmd(struct scsi_cmnd *SCptr)
-{
-	ESPLOG(("[tgt<%02x> lun<%02x> "
-		"pphase<%s> cphase<%s>]",
-		SCptr->device->id, SCptr->device->lun,
-		phase_string(SCptr->SCp.sent_command),
-		phase_string(SCptr->SCp.phase)));
-}
-
-static void esp_dump_state(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-#ifdef DEBUG_ESP_CMDS
-	int i;
-#endif
-
-	ESPLOG(("esp%d: dumping state\n", esp->esp_id));
-	ESPLOG(("esp%d: dma -- cond_reg<%08x> addr<%08x>\n",
-		esp->esp_id,
-		sbus_readl(esp->dregs + DMA_CSR),
-		sbus_readl(esp->dregs + DMA_ADDR)));
-	ESPLOG(("esp%d: SW [sreg<%02x> sstep<%02x> ireg<%02x>]\n",
-		esp->esp_id, esp->sreg, esp->seqreg, esp->ireg));
-	ESPLOG(("esp%d: HW reread [sreg<%02x> sstep<%02x> ireg<%02x>]\n",
-		esp->esp_id,
-		sbus_readb(esp->eregs + ESP_STATUS),
-		sbus_readb(esp->eregs + ESP_SSTEP),
-		sbus_readb(esp->eregs + ESP_INTRPT)));
-#ifdef DEBUG_ESP_CMDS
-	printk("esp%d: last ESP cmds [", esp->esp_id);
-	i = (esp->espcmdent - 1) & 31;
-	printk("<"); esp_print_cmd(esp->espcmdlog[i]); printk(">");
-	i = (i - 1) & 31;
-	printk("<"); esp_print_cmd(esp->espcmdlog[i]); printk(">");
-	i = (i - 1) & 31;
-	printk("<"); esp_print_cmd(esp->espcmdlog[i]); printk(">");
-	i = (i - 1) & 31;
-	printk("<"); esp_print_cmd(esp->espcmdlog[i]); printk(">");
-	printk("]\n");
-#endif /* (DEBUG_ESP_CMDS) */
-
-	if (SCptr) {
-		ESPLOG(("esp%d: current command ", esp->esp_id));
-		esp_dump_cmd(SCptr);
-	}
-	ESPLOG(("\n"));
-	SCptr = esp->disconnected_SC;
-	ESPLOG(("esp%d: disconnected ", esp->esp_id));
-	while (SCptr) {
-		esp_dump_cmd(SCptr);
-		SCptr = (struct scsi_cmnd *) SCptr->host_scribble;
-	}
-	ESPLOG(("\n"));
-}
-
-/* Abort a command.  The host_lock is acquired by caller. */
-static int esp_abort(struct scsi_cmnd *SCptr)
-{
-	struct esp *esp = (struct esp *) SCptr->device->host->hostdata;
-	int don;
-
-	ESPLOG(("esp%d: Aborting command\n", esp->esp_id));
-	esp_dump_state(esp);
-
-	/* Wheee, if this is the current command on the bus, the
-	 * best we can do is assert ATN and wait for msgout phase.
-	 * This should even fix a hung SCSI bus when we lose state
-	 * in the driver and timeout because the eventual phase change
-	 * will cause the ESP to (eventually) give an interrupt.
-	 */
-	if (esp->current_SC == SCptr) {
-		esp->cur_msgout[0] = ABORT;
-		esp->msgout_len = 1;
-		esp->msgout_ctr = 0;
-		esp_cmd(esp, ESP_CMD_SATN);
-		return SUCCESS;
-	}
-
-	/* If it is still in the issue queue then we can safely
-	 * call the completion routine and report abort success.
-	 */
-	don = (sbus_readl(esp->dregs + DMA_CSR) & DMA_INT_ENAB);
-	if (don) {
-		ESP_INTSOFF(esp->dregs);
-	}
-	if (esp->issue_SC) {
-		struct scsi_cmnd **prev, *this;
-		for (prev = (&esp->issue_SC), this = esp->issue_SC;
-		     this != NULL;
-		     prev = (struct scsi_cmnd **) &(this->host_scribble),
-			     this = (struct scsi_cmnd *) this->host_scribble) {
-
-			if (this == SCptr) {
-				*prev = (struct scsi_cmnd *) this->host_scribble;
-				this->host_scribble = NULL;
-
-				esp_release_dmabufs(esp, this);
-				this->result = DID_ABORT << 16;
-				this->scsi_done(this);
-
-				if (don)
-					ESP_INTSON(esp->dregs);
-
-				return SUCCESS;
-			}
-		}
-	}
-
-	/* Yuck, the command to abort is disconnected, it is not
-	 * worth trying to abort it now if something else is live
-	 * on the bus at this time.  So, we let the SCSI code wait
-	 * a little bit and try again later.
-	 */
-	if (esp->current_SC) {
-		if (don)
-			ESP_INTSON(esp->dregs);
-		return FAILED;
-	}
-
-	/* It's disconnected, we have to reconnect to re-establish
-	 * the nexus and tell the device to abort.  However, we really
-	 * cannot 'reconnect' per se.  Don't try to be fancy, just
-	 * indicate failure, which causes our caller to reset the whole
-	 * bus.
-	 */
-
-	if (don)
-		ESP_INTSON(esp->dregs);
-
-	return FAILED;
-}
-
-/* We've sent ESP_CMD_RS to the ESP, the interrupt had just
- * arrived indicating the end of the SCSI bus reset.  Our job
- * is to clean out the command queues and begin re-execution
- * of SCSI commands once more.
- */
-static int esp_finish_reset(struct esp *esp)
-{
-	struct scsi_cmnd *sp = esp->current_SC;
-
-	/* Clean up currently executing command, if any. */
-	if (sp != NULL) {
-		esp->current_SC = NULL;
-
-		esp_release_dmabufs(esp, sp);
-		sp->result = (DID_RESET << 16);
-
-		sp->scsi_done(sp);
-	}
-
-	/* Clean up disconnected queue, they have been invalidated
-	 * by the bus reset.
-	 */
-	if (esp->disconnected_SC) {
-		while ((sp = remove_first_SC(&esp->disconnected_SC)) != NULL) {
-			esp_release_dmabufs(esp, sp);
-			sp->result = (DID_RESET << 16);
-
-			sp->scsi_done(sp);
-		}
-	}
-
-	/* SCSI bus reset is complete. */
-	esp->resetting_bus = 0;
-	wake_up(&esp->reset_queue);
-
-	/* Ok, now it is safe to get commands going once more. */
-	if (esp->issue_SC)
-		esp_exec_cmd(esp);
-
-	return do_intr_end;
-}
-
-static int esp_do_resetbus(struct esp *esp)
-{
-	ESPLOG(("esp%d: Resetting scsi bus\n", esp->esp_id));
-	esp->resetting_bus = 1;
-	esp_cmd(esp, ESP_CMD_RS);
-
-	return do_intr_end;
-}
-
-/* Reset ESP chip, reset hanging bus, then kill active and
- * disconnected commands for targets without soft reset.
- *
- * The host_lock is acquired by caller.
- */
-static int esp_reset(struct scsi_cmnd *SCptr)
-{
-	struct esp *esp = (struct esp *) SCptr->device->host->hostdata;
-
-	spin_lock_irq(esp->ehost->host_lock);
-	(void) esp_do_resetbus(esp);
-	spin_unlock_irq(esp->ehost->host_lock);
-
-	wait_event(esp->reset_queue, (esp->resetting_bus == 0));
-
-	return SUCCESS;
-}
-
-/* Internal ESP done function. */
-static void esp_done(struct esp *esp, int error)
-{
-	struct scsi_cmnd *done_SC = esp->current_SC;
-
-	esp->current_SC = NULL;
-
-	esp_release_dmabufs(esp, done_SC);
-	done_SC->result = error;
-
-	done_SC->scsi_done(done_SC);
-
-	/* Bus is free, issue any commands in the queue. */
-	if (esp->issue_SC && !esp->current_SC)
-		esp_exec_cmd(esp);
-
-}
-
-/* Wheee, ESP interrupt engine. */  
-
-/* Forward declarations. */
-static int esp_do_phase_determine(struct esp *esp);
-static int esp_do_data_finale(struct esp *esp);
-static int esp_select_complete(struct esp *esp);
-static int esp_do_status(struct esp *esp);
-static int esp_do_msgin(struct esp *esp);
-static int esp_do_msgindone(struct esp *esp);
-static int esp_do_msgout(struct esp *esp);
-static int esp_do_cmdbegin(struct esp *esp);
-
-#define sreg_datainp(__sreg)  (((__sreg) & ESP_STAT_PMASK) == ESP_DIP)
-#define sreg_dataoutp(__sreg) (((__sreg) & ESP_STAT_PMASK) == ESP_DOP)
-
-/* Read any bytes found in the FAS366 fifo, storing them into
- * the ESP driver software state structure.
- */
-static void hme_fifo_read(struct esp *esp)
-{
-	u8 count = 0;
-	u8 status = esp->sreg;
-
-	/* Cannot safely frob the fifo for these following cases, but
-	 * we must always read the fifo when the reselect interrupt
-	 * is pending.
-	 */
-	if (((esp->ireg & ESP_INTR_RSEL) == 0)	&&
-	    (sreg_datainp(status)		||
-	     sreg_dataoutp(status)		||
-	     (esp->current_SC &&
-	      esp->current_SC->SCp.phase == in_data_done))) {
-		ESPHME(("<wkaround_skipped>"));
-	} else {
-		unsigned long fcnt = sbus_readb(esp->eregs + ESP_FFLAGS) & ESP_FF_FBYTES;
-
-		/* The HME stores bytes in multiples of 2 in the fifo. */
-		ESPHME(("hme_fifo[fcnt=%d", (int)fcnt));
-		while (fcnt) {
-			esp->hme_fifo_workaround_buffer[count++] =
-				sbus_readb(esp->eregs + ESP_FDATA);
-			esp->hme_fifo_workaround_buffer[count++] =
-				sbus_readb(esp->eregs + ESP_FDATA);
-			ESPHME(("<%02x,%02x>", esp->hme_fifo_workaround_buffer[count-2], esp->hme_fifo_workaround_buffer[count-1]));
-			fcnt--;
-		}
-		if (sbus_readb(esp->eregs + ESP_STATUS2) & ESP_STAT2_F1BYTE) {
-			ESPHME(("<poke_byte>"));
-			sbus_writeb(0, esp->eregs + ESP_FDATA);
-			esp->hme_fifo_workaround_buffer[count++] =
-				sbus_readb(esp->eregs + ESP_FDATA);
-			ESPHME(("<%02x,0x00>", esp->hme_fifo_workaround_buffer[count-1]));
-			ESPHME(("CMD_FLUSH"));
-			esp_cmd(esp, ESP_CMD_FLUSH);
-		} else {
-			ESPHME(("no_xtra_byte"));
-		}
-	}
-	ESPHME(("wkarnd_cnt=%d]", (int)count));
-	esp->hme_fifo_workaround_count = count;
-}
-
-static inline void hme_fifo_push(struct esp *esp, u8 *bytes, u8 count)
-{
-	esp_cmd(esp, ESP_CMD_FLUSH);
-	while (count) {
-		u8 tmp = *bytes++;
-		sbus_writeb(tmp, esp->eregs + ESP_FDATA);
-		sbus_writeb(0, esp->eregs + ESP_FDATA);
-		count--;
-	}
-}
-
-/* We try to avoid some interrupts by jumping ahead and see if the ESP
- * has gotten far enough yet.  Hence the following.
- */
-static inline int skipahead1(struct esp *esp, struct scsi_cmnd *scp,
-			     int prev_phase, int new_phase)
-{
-	if (scp->SCp.sent_command != prev_phase)
-		return 0;
-	if (ESP_IRQ_P(esp->dregs)) {
-		/* Yes, we are able to save an interrupt. */
-		if (esp->erev == fashme)
-			esp->sreg2 = sbus_readb(esp->eregs + ESP_STATUS2);
-		esp->sreg = (sbus_readb(esp->eregs + ESP_STATUS) & ~(ESP_STAT_INTR));
-		esp->ireg = sbus_readb(esp->eregs + ESP_INTRPT);
-		if (esp->erev == fashme) {
-			/* This chip is really losing. */
-			ESPHME(("HME["));
-			/* Must latch fifo before reading the interrupt
-			 * register else garbage ends up in the FIFO
-			 * which confuses the driver utterly.
-			 * Happy Meal indeed....
-			 */
-			ESPHME(("fifo_workaround]"));
-			if (!(esp->sreg2 & ESP_STAT2_FEMPTY) ||
-			    (esp->sreg2 & ESP_STAT2_F1BYTE))
-				hme_fifo_read(esp);
-		}
-		if (!(esp->ireg & ESP_INTR_SR))
-			return 0;
-		else
-			return do_reset_complete;
-	}
-	/* Ho hum, target is taking forever... */
-	scp->SCp.sent_command = new_phase; /* so we don't recurse... */
-	return do_intr_end;
-}
-
-static inline int skipahead2(struct esp *esp, struct scsi_cmnd *scp,
-			     int prev_phase1, int prev_phase2, int new_phase)
-{
-	if (scp->SCp.sent_command != prev_phase1 &&
-	    scp->SCp.sent_command != prev_phase2)
-		return 0;
-	if (ESP_IRQ_P(esp->dregs)) {
-		/* Yes, we are able to save an interrupt. */
-		if (esp->erev == fashme)
-			esp->sreg2 = sbus_readb(esp->eregs + ESP_STATUS2);
-		esp->sreg = (sbus_readb(esp->eregs + ESP_STATUS) & ~(ESP_STAT_INTR));
-		esp->ireg = sbus_readb(esp->eregs + ESP_INTRPT);
-		if (esp->erev == fashme) {
-			/* This chip is really losing. */
-			ESPHME(("HME["));
-
-			/* Must latch fifo before reading the interrupt
-			 * register else garbage ends up in the FIFO
-			 * which confuses the driver utterly.
-			 * Happy Meal indeed....
-			 */
-			ESPHME(("fifo_workaround]"));
-			if (!(esp->sreg2 & ESP_STAT2_FEMPTY) ||
-			    (esp->sreg2 & ESP_STAT2_F1BYTE))
-				hme_fifo_read(esp);
-		}
-		if (!(esp->ireg & ESP_INTR_SR))
-			return 0;
-		else
-			return do_reset_complete;
-	}
-	/* Ho hum, target is taking forever... */
-	scp->SCp.sent_command = new_phase; /* so we don't recurse... */
-	return do_intr_end;
-}
-
-/* Now some dma helpers. */
-static void dma_setup(struct esp *esp, __u32 addr, int count, int write)
-{
-	u32 nreg = sbus_readl(esp->dregs + DMA_CSR);
-
-	if (write)
-		nreg |= DMA_ST_WRITE;
-	else
-		nreg &= ~(DMA_ST_WRITE);
-	nreg |= DMA_ENABLE;
-	sbus_writel(nreg, esp->dregs + DMA_CSR);
-	if (esp->dma->revision == dvmaesc1) {
-		/* This ESC gate array sucks! */
-		__u32 src = addr;
-		__u32 dest = src + count;
-
-		if (dest & (PAGE_SIZE - 1))
-			count = PAGE_ALIGN(count);
-		sbus_writel(count, esp->dregs + DMA_COUNT);
-	}
-	sbus_writel(addr, esp->dregs + DMA_ADDR);
-}
-
-static void dma_drain(struct esp *esp)
-{
-	u32 tmp;
-
-	if (esp->dma->revision == dvmahme)
-		return;
-	if ((tmp = sbus_readl(esp->dregs + DMA_CSR)) & DMA_FIFO_ISDRAIN) {
-		switch (esp->dma->revision) {
-		default:
-			tmp |= DMA_FIFO_STDRAIN;
-			sbus_writel(tmp, esp->dregs + DMA_CSR);
-
-		case dvmarev3:
-		case dvmaesc1:
-			while (sbus_readl(esp->dregs + DMA_CSR) & DMA_FIFO_ISDRAIN)
-				udelay(1);
-		};
-	}
-}
-
-static void dma_invalidate(struct esp *esp)
-{
-	u32 tmp;
-
-	if (esp->dma->revision == dvmahme) {
-		sbus_writel(DMA_RST_SCSI, esp->dregs + DMA_CSR);
-
-		esp->prev_hme_dmacsr = ((esp->prev_hme_dmacsr |
-					 (DMA_PARITY_OFF | DMA_2CLKS |
-					  DMA_SCSI_DISAB | DMA_INT_ENAB)) &
-					~(DMA_ST_WRITE | DMA_ENABLE));
-
-		sbus_writel(0, esp->dregs + DMA_CSR);
-		sbus_writel(esp->prev_hme_dmacsr, esp->dregs + DMA_CSR);
-
-		/* This is necessary to avoid having the SCSI channel
-		 * engine lock up on us.
-		 */
-		sbus_writel(0, esp->dregs + DMA_ADDR);
-	} else {
-		while ((tmp = sbus_readl(esp->dregs + DMA_CSR)) & DMA_PEND_READ)
-			udelay(1);
-
-		tmp &= ~(DMA_ENABLE | DMA_ST_WRITE | DMA_BCNT_ENAB);
-		tmp |= DMA_FIFO_INV;
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-		tmp &= ~DMA_FIFO_INV;
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-	}
-}
-
-static inline void dma_flashclear(struct esp *esp)
-{
-	dma_drain(esp);
-	dma_invalidate(esp);
-}
-
-static int dma_can_transfer(struct esp *esp, struct scsi_cmnd *sp)
-{
-	__u32 base, end, sz;
-
-	if (esp->dma->revision == dvmarev3) {
-		sz = sp->SCp.this_residual;
-		if (sz > 0x1000000)
-			sz = 0x1000000;
-	} else {
-		base = ((__u32)((unsigned long)sp->SCp.ptr));
-		base &= (0x1000000 - 1);
-		end = (base + sp->SCp.this_residual);
-		if (end > 0x1000000)
-			end = 0x1000000;
-		sz = (end - base);
-	}
-	return sz;
-}
-
-/* Misc. esp helper macros. */
-#define esp_setcount(__eregs, __cnt, __hme) \
-	sbus_writeb(((__cnt)&0xff), (__eregs) + ESP_TCLOW); \
-	sbus_writeb((((__cnt)>>8)&0xff), (__eregs) + ESP_TCMED); \
-	if (__hme) { \
-		sbus_writeb((((__cnt)>>16)&0xff), (__eregs) + FAS_RLO); \
-		sbus_writeb(0, (__eregs) + FAS_RHI); \
-	}
-
-#define esp_getcount(__eregs, __hme) \
-	((sbus_readb((__eregs) + ESP_TCLOW)&0xff) | \
-	 ((sbus_readb((__eregs) + ESP_TCMED)&0xff) << 8) | \
-         ((__hme) ? sbus_readb((__eregs) + FAS_RLO) << 16 : 0))
-
-#define fcount(__esp) \
-	(((__esp)->erev == fashme) ? \
-	  (__esp)->hme_fifo_workaround_count : \
-	  sbus_readb(((__esp)->eregs) + ESP_FFLAGS) & ESP_FF_FBYTES)
-
-#define fnzero(__esp) \
-	(((__esp)->erev == fashme) ? 0 : \
-	 sbus_readb(((__esp)->eregs) + ESP_FFLAGS) & ESP_FF_ONOTZERO)
-
-/* XXX speculative nops unnecessary when continuing amidst a data phase
- * XXX even on esp100!!!  another case of flooding the bus with I/O reg
- * XXX writes...
- */
-#define esp_maybe_nop(__esp) \
-	if ((__esp)->erev == esp100) \
-		esp_cmd((__esp), ESP_CMD_NULL)
-
-#define sreg_to_dataphase(__sreg) \
-	((((__sreg) & ESP_STAT_PMASK) == ESP_DOP) ? in_dataout : in_datain)
-
-/* The ESP100 when in synchronous data phase, can mistake a long final
- * REQ pulse from the target as an extra byte, it places whatever is on
- * the data lines into the fifo.  For now, we will assume when this
- * happens that the target is a bit quirky and we don't want to
- * be talking synchronously to it anyways.  Regardless, we need to
- * tell the ESP to eat the extraneous byte so that we can proceed
- * to the next phase.
- */
-static int esp100_sync_hwbug(struct esp *esp, struct scsi_cmnd *sp, int fifocnt)
-{
-	/* Do not touch this piece of code. */
-	if ((!(esp->erev == esp100)) ||
-	    (!(sreg_datainp((esp->sreg = sbus_readb(esp->eregs + ESP_STATUS))) &&
-	       !fifocnt) &&
-	     !(sreg_dataoutp(esp->sreg) && !fnzero(esp)))) {
-		if (sp->SCp.phase == in_dataout)
-			esp_cmd(esp, ESP_CMD_FLUSH);
-		return 0;
-	} else {
-		/* Async mode for this guy. */
-		build_sync_nego_msg(esp, 0, 0);
-
-		/* Ack the bogus byte, but set ATN first. */
-		esp_cmd(esp, ESP_CMD_SATN);
-		esp_cmd(esp, ESP_CMD_MOK);
-		return 1;
-	}
-}
-
-/* This closes the window during a selection with a reselect pending, because
- * we use DMA for the selection process the FIFO should hold the correct
- * contents if we get reselected during this process.  So we just need to
- * ack the possible illegal cmd interrupt pending on the esp100.
- */
-static inline int esp100_reconnect_hwbug(struct esp *esp)
-{
-	u8 tmp;
-
-	if (esp->erev != esp100)
-		return 0;
-	tmp = sbus_readb(esp->eregs + ESP_INTRPT);
-	if (tmp & ESP_INTR_SR)
-		return 1;
-	return 0;
-}
-
-/* This verifies the BUSID bits during a reselection so that we know which
- * target is talking to us.
- */
-static inline int reconnect_target(struct esp *esp)
-{
-	int it, me = esp->scsi_id_mask, targ = 0;
-
-	if (2 != fcount(esp))
-		return -1;
-	if (esp->erev == fashme) {
-		/* HME does not latch it's own BUS ID bits during
-		 * a reselection.  Also the target number is given
-		 * as an unsigned char, not as a sole bit number
-		 * like the other ESP's do.
-		 * Happy Meal indeed....
-		 */
-		targ = esp->hme_fifo_workaround_buffer[0];
-	} else {
-		it = sbus_readb(esp->eregs + ESP_FDATA);
-		if (!(it & me))
-			return -1;
-		it &= ~me;
-		if (it & (it - 1))
-			return -1;
-		while (!(it & 1))
-			targ++, it >>= 1;
-	}
-	return targ;
-}
-
-/* This verifies the identify from the target so that we know which lun is
- * being reconnected.
- */
-static inline int reconnect_lun(struct esp *esp)
-{
-	int lun;
-
-	if ((esp->sreg & ESP_STAT_PMASK) != ESP_MIP)
-		return -1;
-	if (esp->erev == fashme)
-		lun = esp->hme_fifo_workaround_buffer[1];
-	else
-		lun = sbus_readb(esp->eregs + ESP_FDATA);
-
-	/* Yes, you read this correctly.  We report lun of zero
-	 * if we see parity error.  ESP reports parity error for
-	 * the lun byte, and this is the only way to hope to recover
-	 * because the target is connected.
-	 */
-	if (esp->sreg & ESP_STAT_PERR)
-		return 0;
-
-	/* Check for illegal bits being set in the lun. */
-	if ((lun & 0x40) || !(lun & 0x80))
-		return -1;
-
-	return lun & 7;
-}
-
-/* This puts the driver in a state where it can revitalize a command that
- * is being continued due to reselection.
- */
-static inline void esp_connect(struct esp *esp, struct scsi_cmnd *sp)
-{
-	struct esp_device *esp_dev = sp->device->hostdata;
-
-	if (esp->prev_soff  != esp_dev->sync_max_offset ||
-	    esp->prev_stp   != esp_dev->sync_min_period ||
-	    (esp->erev > esp100a &&
-	     esp->prev_cfg3 != esp->config3[sp->device->id])) {
-		esp->prev_soff = esp_dev->sync_max_offset;
-		esp->prev_stp = esp_dev->sync_min_period;
-		sbus_writeb(esp->prev_soff, esp->eregs + ESP_SOFF);
-		sbus_writeb(esp->prev_stp, esp->eregs + ESP_STP);
-		if (esp->erev > esp100a) {
-			esp->prev_cfg3 = esp->config3[sp->device->id];
-			sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-		}
-	}
-	esp->current_SC = sp;
-}
-
-/* This will place the current working command back into the issue queue
- * if we are to receive a reselection amidst a selection attempt.
- */
-static inline void esp_reconnect(struct esp *esp, struct scsi_cmnd *sp)
-{
-	if (!esp->disconnected_SC)
-		ESPLOG(("esp%d: Weird, being reselected but disconnected "
-			"command queue is empty.\n", esp->esp_id));
-	esp->snip = 0;
-	esp->current_SC = NULL;
-	sp->SCp.phase = not_issued;
-	append_SC(&esp->issue_SC, sp);
-}
-
-/* Begin message in phase. */
-static int esp_do_msgin(struct esp *esp)
-{
-	/* Must be very careful with the fifo on the HME */
-	if ((esp->erev != fashme) ||
-	    !(sbus_readb(esp->eregs + ESP_STATUS2) & ESP_STAT2_FEMPTY))
-		esp_cmd(esp, ESP_CMD_FLUSH);
-	esp_maybe_nop(esp);
-	esp_cmd(esp, ESP_CMD_TI);
-	esp->msgin_len = 1;
-	esp->msgin_ctr = 0;
-	esp_advance_phase(esp->current_SC, in_msgindone);
-	return do_work_bus;
-}
-
-/* This uses various DMA csr fields and the fifo flags count value to
- * determine how many bytes were successfully sent/received by the ESP.
- */
-static inline int esp_bytes_sent(struct esp *esp, int fifo_count)
-{
-	int rval = sbus_readl(esp->dregs + DMA_ADDR) - esp->esp_command_dvma;
-
-	if (esp->dma->revision == dvmarev1)
-		rval -= (4 - ((sbus_readl(esp->dregs + DMA_CSR) & DMA_READ_AHEAD)>>11));
-	return rval - fifo_count;
-}
-
-static inline void advance_sg(struct scsi_cmnd *sp)
-{
-	++sp->SCp.buffer;
-	--sp->SCp.buffers_residual;
-	sp->SCp.this_residual = sg_dma_len(sp->SCp.buffer);
-	sp->SCp.ptr = (char *)((unsigned long)sg_dma_address(sp->SCp.buffer));
-}
-
-/* Please note that the way I've coded these routines is that I _always_
- * check for a disconnect during any and all information transfer
- * phases.  The SCSI standard states that the target _can_ cause a BUS
- * FREE condition by dropping all MSG/CD/IO/BSY signals.  Also note
- * that during information transfer phases the target controls every
- * change in phase, the only thing the initiator can do is "ask" for
- * a message out phase by driving ATN true.  The target can, and sometimes
- * will, completely ignore this request so we cannot assume anything when
- * we try to force a message out phase to abort/reset a target.  Most of
- * the time the target will eventually be nice and go to message out, so
- * we may have to hold on to our state about what we want to tell the target
- * for some period of time.
- */
-
-/* I think I have things working here correctly.  Even partial transfers
- * within a buffer or sub-buffer should not upset us at all no matter
- * how bad the target and/or ESP fucks things up.
- */
-static int esp_do_data(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	int thisphase, hmuch;
-
-	ESPDATA(("esp_do_data: "));
-	esp_maybe_nop(esp);
-	thisphase = sreg_to_dataphase(esp->sreg);
-	esp_advance_phase(SCptr, thisphase);
-	ESPDATA(("newphase<%s> ", (thisphase == in_datain) ? "DATAIN" : "DATAOUT"));
-	hmuch = dma_can_transfer(esp, SCptr);
-	if (hmuch > (64 * 1024) && (esp->erev != fashme))
-		hmuch = (64 * 1024);
-	ESPDATA(("hmuch<%d> ", hmuch));
-	esp->current_transfer_size = hmuch;
-
-	if (esp->erev == fashme) {
-		u32 tmp = esp->prev_hme_dmacsr;
-
-		/* Always set the ESP count registers first. */
-		esp_setcount(esp->eregs, hmuch, 1);
-
-		/* Get the DMA csr computed. */
-		tmp |= (DMA_SCSI_DISAB | DMA_ENABLE);
-		if (thisphase == in_datain)
-			tmp |= DMA_ST_WRITE;
-		else
-			tmp &= ~(DMA_ST_WRITE);
-		esp->prev_hme_dmacsr = tmp;
-
-		ESPDATA(("DMA|TI --> do_intr_end\n"));
-		if (thisphase == in_datain) {
-			sbus_writel(hmuch, esp->dregs + DMA_COUNT);
-			esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-		} else {
-			esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-			sbus_writel(hmuch, esp->dregs + DMA_COUNT);
-		}
-		sbus_writel((__u32)((unsigned long)SCptr->SCp.ptr), esp->dregs+DMA_ADDR);
-		sbus_writel(esp->prev_hme_dmacsr, esp->dregs + DMA_CSR);
-	} else {
-		esp_setcount(esp->eregs, hmuch, 0);
-		dma_setup(esp, ((__u32)((unsigned long)SCptr->SCp.ptr)),
-			  hmuch, (thisphase == in_datain));
-		ESPDATA(("DMA|TI --> do_intr_end\n"));
-		esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-	}
-	return do_intr_end;
-}
-
-/* See how successful the data transfer was. */
-static int esp_do_data_finale(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	struct esp_device *esp_dev = SCptr->device->hostdata;
-	int bogus_data = 0, bytes_sent = 0, fifocnt, ecount = 0;
-
-	ESPDATA(("esp_do_data_finale: "));
-
-	if (SCptr->SCp.phase == in_datain) {
-		if (esp->sreg & ESP_STAT_PERR) {
-			/* Yuck, parity error.  The ESP asserts ATN
-			 * so that we can go to message out phase
-			 * immediately and inform the target that
-			 * something bad happened.
-			 */
-			ESPLOG(("esp%d: data bad parity detected.\n",
-				esp->esp_id));
-			esp->cur_msgout[0] = INITIATOR_ERROR;
-			esp->msgout_len = 1;
-		}
-		dma_drain(esp);
-	}
-	dma_invalidate(esp);
-
-	/* This could happen for the above parity error case. */
-	if (esp->ireg != ESP_INTR_BSERV) {
-		/* Please go to msgout phase, please please please... */
-		ESPLOG(("esp%d: !BSERV after data, probably to msgout\n",
-			esp->esp_id));
-		return esp_do_phase_determine(esp);
-	}	
-
-	/* Check for partial transfers and other horrible events.
-	 * Note, here we read the real fifo flags register even
-	 * on HME broken adapters because we skip the HME fifo
-	 * workaround code in esp_handle() if we are doing data
-	 * phase things.  We don't want to fuck directly with
-	 * the fifo like that, especially if doing synchronous
-	 * transfers!  Also, will need to double the count on
-	 * HME if we are doing wide transfers, as the HME fifo
-	 * will move and count 16-bit quantities during wide data.
-	 * SMCC _and_ Qlogic can both bite me.
-	 */
-	fifocnt = (sbus_readb(esp->eregs + ESP_FFLAGS) & ESP_FF_FBYTES);
-	if (esp->erev != fashme)
-		ecount = esp_getcount(esp->eregs, 0);
-	bytes_sent = esp->current_transfer_size;
-
-	ESPDATA(("trans_sz(%d), ", bytes_sent));
-	if (esp->erev == fashme) {
-		if (!(esp->sreg & ESP_STAT_TCNT)) {
-			ecount = esp_getcount(esp->eregs, 1);
-			bytes_sent -= ecount;
-		}
-
-		/* Always subtract any cruft remaining in the FIFO. */
-		if (esp->prev_cfg3 & ESP_CONFIG3_EWIDE)
-			fifocnt <<= 1;
-		if (SCptr->SCp.phase == in_dataout)
-			bytes_sent -= fifocnt;
-
-		/* I have an IBM disk which exhibits the following
-		 * behavior during writes to it.  It disconnects in
-		 * the middle of a partial transfer, the current sglist
-		 * buffer is 1024 bytes, the disk stops data transfer
-		 * at 512 bytes.
-		 *
-		 * However the FAS366 reports that 32 more bytes were
-		 * transferred than really were.  This is precisely
-		 * the size of a fully loaded FIFO in wide scsi mode.
-		 * The FIFO state recorded indicates that it is empty.
-		 *
-		 * I have no idea if this is a bug in the FAS366 chip
-		 * or a bug in the firmware on this IBM disk.  In any
-		 * event the following seems to be a good workaround.  -DaveM
-		 */
-		if (bytes_sent != esp->current_transfer_size &&
-		    SCptr->SCp.phase == in_dataout) {
-			int mask = (64 - 1);
-
-			if ((esp->prev_cfg3 & ESP_CONFIG3_EWIDE) == 0)
-				mask >>= 1;
-
-			if (bytes_sent & mask)
-				bytes_sent -= (bytes_sent & mask);
-		}
-	} else {
-		if (!(esp->sreg & ESP_STAT_TCNT))
-			bytes_sent -= ecount;
-		if (SCptr->SCp.phase == in_dataout)
-			bytes_sent -= fifocnt;
-	}
-
-	ESPDATA(("bytes_sent(%d), ", bytes_sent));
-
-	/* If we were in synchronous mode, check for peculiarities. */
-	if (esp->erev == fashme) {
-		if (esp_dev->sync_max_offset) {
-			if (SCptr->SCp.phase == in_dataout)
-				esp_cmd(esp, ESP_CMD_FLUSH);
-		} else {
-			esp_cmd(esp, ESP_CMD_FLUSH);
-		}
-	} else {
-		if (esp_dev->sync_max_offset)
-			bogus_data = esp100_sync_hwbug(esp, SCptr, fifocnt);
-		else
-			esp_cmd(esp, ESP_CMD_FLUSH);
-	}
-
-	/* Until we are sure of what has happened, we are certainly
-	 * in the dark.
-	 */
-	esp_advance_phase(SCptr, in_the_dark);
-
-	if (bytes_sent < 0) {
-		/* I've seen this happen due to lost state in this
-		 * driver.  No idea why it happened, but allowing
-		 * this value to be negative caused things to
-		 * lock up.  This allows greater chance of recovery.
-		 * In fact every time I've seen this, it has been
-		 * a driver bug without question.
-		 */
-		ESPLOG(("esp%d: yieee, bytes_sent < 0!\n", esp->esp_id));
-		ESPLOG(("esp%d: csz=%d fifocount=%d ecount=%d\n",
-			esp->esp_id,
-			esp->current_transfer_size, fifocnt, ecount));
-		ESPLOG(("esp%d: use_sg=%d ptr=%p this_residual=%d\n",
-			esp->esp_id,
-			SCptr->use_sg, SCptr->SCp.ptr, SCptr->SCp.this_residual));
-		ESPLOG(("esp%d: Forcing async for target %d\n", esp->esp_id, 
-			SCptr->device->id));
-		SCptr->device->borken = 1;
-		esp_dev->sync = 0;
-		bytes_sent = 0;
-	}
-
-	/* Update the state of our transfer. */
-	SCptr->SCp.ptr += bytes_sent;
-	SCptr->SCp.this_residual -= bytes_sent;
-	if (SCptr->SCp.this_residual < 0) {
-		/* shit */
-		ESPLOG(("esp%d: Data transfer overrun.\n", esp->esp_id));
-		SCptr->SCp.this_residual = 0;
-	}
-
-	/* Maybe continue. */
-	if (!bogus_data) {
-		ESPDATA(("!bogus_data, "));
-
-		/* NO MATTER WHAT, we advance the scatterlist,
-		 * if the target should decide to disconnect
-		 * in between scatter chunks (which is common)
-		 * we could die horribly!  I used to have the sg
-		 * advance occur only if we are going back into
-		 * (or are staying in) a data phase, you can
-		 * imagine the hell I went through trying to
-		 * figure this out.
-		 */
-		if (SCptr->use_sg && !SCptr->SCp.this_residual)
-			advance_sg(SCptr);
-		if (sreg_datainp(esp->sreg) || sreg_dataoutp(esp->sreg)) {
-			ESPDATA(("to more data\n"));
-			return esp_do_data(esp);
-		}
-		ESPDATA(("to new phase\n"));
-		return esp_do_phase_determine(esp);
-	}
-	/* Bogus data, just wait for next interrupt. */
-	ESPLOG(("esp%d: bogus_data during end of data phase\n",
-		esp->esp_id));
-	return do_intr_end;
-}
-
-/* We received a non-good status return at the end of
- * running a SCSI command.  This is used to decide if
- * we should clear our synchronous transfer state for
- * such a device when that happens.
- *
- * The idea is that when spinning up a disk or rewinding
- * a tape, we don't want to go into a loop re-negotiating
- * synchronous capabilities over and over.
- */
-static int esp_should_clear_sync(struct scsi_cmnd *sp)
-{
-	u8 cmd = sp->cmnd[0];
-
-	/* These cases are for spinning up a disk and
-	 * waiting for that spinup to complete.
-	 */
-	if (cmd == START_STOP)
-		return 0;
-
-	if (cmd == TEST_UNIT_READY)
-		return 0;
-
-	/* One more special case for SCSI tape drives,
-	 * this is what is used to probe the device for
-	 * completion of a rewind or tape load operation.
-	 */
-	if (sp->device->type == TYPE_TAPE) {
-		if (cmd == MODE_SENSE)
-			return 0;
-	}
-
-	return 1;
-}
-
-/* Either a command is completing or a target is dropping off the bus
- * to continue the command in the background so we can do other work.
- */
-static int esp_do_freebus(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	struct esp_device *esp_dev = SCptr->device->hostdata;
-	int rval;
-
-	rval = skipahead2(esp, SCptr, in_status, in_msgindone, in_freeing);
-	if (rval)
-		return rval;
-	if (esp->ireg != ESP_INTR_DC) {
-		ESPLOG(("esp%d: Target will not disconnect\n", esp->esp_id));
-		return do_reset_bus; /* target will not drop BSY... */
-	}
-	esp->msgout_len = 0;
-	esp->prevmsgout = NOP;
-	if (esp->prevmsgin == COMMAND_COMPLETE) {
-		/* Normal end of nexus. */
-		if (esp->disconnected_SC || (esp->erev == fashme))
-			esp_cmd(esp, ESP_CMD_ESEL);
-
-		if (SCptr->SCp.Status != GOOD &&
-		    SCptr->SCp.Status != CONDITION_GOOD &&
-		    ((1<<SCptr->device->id) & esp->targets_present) &&
-		    esp_dev->sync &&
-		    esp_dev->sync_max_offset) {
-			/* SCSI standard says that the synchronous capabilities
-			 * should be renegotiated at this point.  Most likely
-			 * we are about to request sense from this target
-			 * in which case we want to avoid using sync
-			 * transfers until we are sure of the current target
-			 * state.
-			 */
-			ESPMISC(("esp: Status <%d> for target %d lun %d\n",
-				 SCptr->SCp.Status, SCptr->device->id, SCptr->device->lun));
-
-			/* But don't do this when spinning up a disk at
-			 * boot time while we poll for completion as it
-			 * fills up the console with messages.  Also, tapes
-			 * can report not ready many times right after
-			 * loading up a tape.
-			 */
-			if (esp_should_clear_sync(SCptr) != 0)
-				esp_dev->sync = 0;
-		}
-		ESPDISC(("F<%02x,%02x>", SCptr->device->id, SCptr->device->lun));
-		esp_done(esp, ((SCptr->SCp.Status & 0xff) |
-			       ((SCptr->SCp.Message & 0xff)<<8) |
-			       (DID_OK << 16)));
-	} else if (esp->prevmsgin == DISCONNECT) {
-		/* Normal disconnect. */
-		esp_cmd(esp, ESP_CMD_ESEL);
-		ESPDISC(("D<%02x,%02x>", SCptr->device->id, SCptr->device->lun));
-		append_SC(&esp->disconnected_SC, SCptr);
-		esp->current_SC = NULL;
-		if (esp->issue_SC)
-			esp_exec_cmd(esp);
-	} else {
-		/* Driver bug, we do not expect a disconnect here
-		 * and should not have advanced the state engine
-		 * to in_freeing.
-		 */
-		ESPLOG(("esp%d: last msg not disc and not cmd cmplt.\n",
-			esp->esp_id));
-		return do_reset_bus;
-	}
-	return do_intr_end;
-}
-
-/* When a reselect occurs, and we cannot find the command to
- * reconnect to in our queues, we do this.
- */
-static int esp_bad_reconnect(struct esp *esp)
-{
-	struct scsi_cmnd *sp;
-
-	ESPLOG(("esp%d: Eieeee, reconnecting unknown command!\n",
-		esp->esp_id));
-	ESPLOG(("QUEUE DUMP\n"));
-	sp = esp->issue_SC;
-	ESPLOG(("esp%d: issue_SC[", esp->esp_id));
-	while (sp) {
-		ESPLOG(("<%02x,%02x>", sp->device->id, sp->device->lun));
-		sp = (struct scsi_cmnd *) sp->host_scribble;
-	}
-	ESPLOG(("]\n"));
-	sp = esp->current_SC;
-	ESPLOG(("esp%d: current_SC[", esp->esp_id));
-	if (sp)
-		ESPLOG(("<%02x,%02x>", sp->device->id, sp->device->lun));
-	else
-		ESPLOG(("<NULL>"));
-	ESPLOG(("]\n"));
-	sp = esp->disconnected_SC;
-	ESPLOG(("esp%d: disconnected_SC[", esp->esp_id));
-	while (sp) {
-		ESPLOG(("<%02x,%02x>", sp->device->id, sp->device->lun));
-		sp = (struct scsi_cmnd *) sp->host_scribble;
-	}
-	ESPLOG(("]\n"));
-	return do_reset_bus;
-}
-
-/* Do the needy when a target tries to reconnect to us. */
-static int esp_do_reconnect(struct esp *esp)
-{
-	int lun, target;
-	struct scsi_cmnd *SCptr;
-
-	/* Check for all bogus conditions first. */
-	target = reconnect_target(esp);
-	if (target < 0) {
-		ESPDISC(("bad bus bits\n"));
-		return do_reset_bus;
-	}
-	lun = reconnect_lun(esp);
-	if (lun < 0) {
-		ESPDISC(("target=%2x, bad identify msg\n", target));
-		return do_reset_bus;
-	}
-
-	/* Things look ok... */
-	ESPDISC(("R<%02x,%02x>", target, lun));
-
-	/* Must not flush FIFO or DVMA on HME. */
-	if (esp->erev != fashme) {
-		esp_cmd(esp, ESP_CMD_FLUSH);
-		if (esp100_reconnect_hwbug(esp))
-			return do_reset_bus;
-		esp_cmd(esp, ESP_CMD_NULL);
-	}
-
-	SCptr = remove_SC(&esp->disconnected_SC, (u8) target, (u8) lun);
-	if (!SCptr)
-		return esp_bad_reconnect(esp);
-
-	esp_connect(esp, SCptr);
-	esp_cmd(esp, ESP_CMD_MOK);
-
-	if (esp->erev == fashme)
-		sbus_writeb(((SCptr->device->id & 0xf) |
-			     (ESP_BUSID_RESELID | ESP_BUSID_CTR32BIT)),
-			    esp->eregs + ESP_BUSID);
-
-	/* Reconnect implies a restore pointers operation. */
-	esp_restore_pointers(esp, SCptr);
-
-	esp->snip = 0;
-	esp_advance_phase(SCptr, in_the_dark);
-	return do_intr_end;
-}
-
-/* End of NEXUS (hopefully), pick up status + message byte then leave if
- * all goes well.
- */
-static int esp_do_status(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	int intr, rval;
-
-	rval = skipahead1(esp, SCptr, in_the_dark, in_status);
-	if (rval)
-		return rval;
-	intr = esp->ireg;
-	ESPSTAT(("esp_do_status: "));
-	if (intr != ESP_INTR_DC) {
-		int message_out = 0; /* for parity problems */
-
-		/* Ack the message. */
-		ESPSTAT(("ack msg, "));
-		esp_cmd(esp, ESP_CMD_MOK);
-
-		if (esp->erev != fashme) {
-			dma_flashclear(esp);
-
-			/* Wait till the first bits settle. */
-			while (esp->esp_command[0] == 0xff)
-				udelay(1);
-		} else {
-			esp->esp_command[0] = esp->hme_fifo_workaround_buffer[0];
-			esp->esp_command[1] = esp->hme_fifo_workaround_buffer[1];
-		}
-
-		ESPSTAT(("got something, "));
-		/* ESP chimes in with one of
-		 *
-		 * 1) function done interrupt:
-		 *	both status and message in bytes
-		 *	are available
-		 *
-		 * 2) bus service interrupt:
-		 *	only status byte was acquired
-		 *
-		 * 3) Anything else:
-		 *	can't happen, but we test for it
-		 *	anyways
-		 *
-		 * ALSO: If bad parity was detected on either
-		 *       the status _or_ the message byte then
-		 *       the ESP has asserted ATN on the bus
-		 *       and we must therefore wait for the
-		 *       next phase change.
-		 */
-		if (intr & ESP_INTR_FDONE) {
-			/* We got it all, hallejulia. */
-			ESPSTAT(("got both, "));
-			SCptr->SCp.Status = esp->esp_command[0];
-			SCptr->SCp.Message = esp->esp_command[1];
-			esp->prevmsgin = SCptr->SCp.Message;
-			esp->cur_msgin[0] = SCptr->SCp.Message;
-			if (esp->sreg & ESP_STAT_PERR) {
-				/* There was bad parity for the
-				 * message byte, the status byte
-				 * was ok.
-				 */
-				message_out = MSG_PARITY_ERROR;
-			}
-		} else if (intr == ESP_INTR_BSERV) {
-			/* Only got status byte. */
-			ESPLOG(("esp%d: got status only, ", esp->esp_id));
-			if (!(esp->sreg & ESP_STAT_PERR)) {
-				SCptr->SCp.Status = esp->esp_command[0];
-				SCptr->SCp.Message = 0xff;
-			} else {
-				/* The status byte had bad parity.
-				 * we leave the scsi_pointer Status
-				 * field alone as we set it to a default
-				 * of CHECK_CONDITION in esp_queue.
-				 */
-				message_out = INITIATOR_ERROR;
-			}
-		} else {
-			/* This shouldn't happen ever. */
-			ESPSTAT(("got bolixed\n"));
-			esp_advance_phase(SCptr, in_the_dark);
-			return esp_do_phase_determine(esp);
-		}
-
-		if (!message_out) {
-			ESPSTAT(("status=%2x msg=%2x, ", SCptr->SCp.Status,
-				SCptr->SCp.Message));
-			if (SCptr->SCp.Message == COMMAND_COMPLETE) {
-				ESPSTAT(("and was COMMAND_COMPLETE\n"));
-				esp_advance_phase(SCptr, in_freeing);
-				return esp_do_freebus(esp);
-			} else {
-				ESPLOG(("esp%d: and _not_ COMMAND_COMPLETE\n",
-					esp->esp_id));
-				esp->msgin_len = esp->msgin_ctr = 1;
-				esp_advance_phase(SCptr, in_msgindone);
-				return esp_do_msgindone(esp);
-			}
-		} else {
-			/* With luck we'll be able to let the target
-			 * know that bad parity happened, it will know
-			 * which byte caused the problems and send it
-			 * again.  For the case where the status byte
-			 * receives bad parity, I do not believe most
-			 * targets recover very well.  We'll see.
-			 */
-			ESPLOG(("esp%d: bad parity somewhere mout=%2x\n",
-				esp->esp_id, message_out));
-			esp->cur_msgout[0] = message_out;
-			esp->msgout_len = esp->msgout_ctr = 1;
-			esp_advance_phase(SCptr, in_the_dark);
-			return esp_do_phase_determine(esp);
-		}
-	} else {
-		/* If we disconnect now, all hell breaks loose. */
-		ESPLOG(("esp%d: whoops, disconnect\n", esp->esp_id));
-		esp_advance_phase(SCptr, in_the_dark);
-		return esp_do_phase_determine(esp);
-	}
-}
-
-static int esp_enter_status(struct esp *esp)
-{
-	u8 thecmd = ESP_CMD_ICCSEQ;
-
-	esp_cmd(esp, ESP_CMD_FLUSH);
-	if (esp->erev != fashme) {
-		u32 tmp;
-
-		esp->esp_command[0] = esp->esp_command[1] = 0xff;
-		sbus_writeb(2, esp->eregs + ESP_TCLOW);
-		sbus_writeb(0, esp->eregs + ESP_TCMED);
-		tmp = sbus_readl(esp->dregs + DMA_CSR);
-		tmp |= (DMA_ST_WRITE | DMA_ENABLE);
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-		if (esp->dma->revision == dvmaesc1)
-			sbus_writel(0x100, esp->dregs + DMA_COUNT);
-		sbus_writel(esp->esp_command_dvma, esp->dregs + DMA_ADDR);
-		thecmd |= ESP_CMD_DMA;
-	}
-	esp_cmd(esp, thecmd);
-	esp_advance_phase(esp->current_SC, in_status);
-
-	return esp_do_status(esp);
-}
-
-static int esp_disconnect_amidst_phases(struct esp *esp)
-{
-	struct scsi_cmnd *sp = esp->current_SC;
-	struct esp_device *esp_dev = sp->device->hostdata;
-
-	/* This means real problems if we see this
-	 * here.  Unless we were actually trying
-	 * to force the device to abort/reset.
-	 */
-	ESPLOG(("esp%d Disconnect amidst phases, ", esp->esp_id));
-	ESPLOG(("pphase<%s> cphase<%s>, ",
-		phase_string(sp->SCp.phase),
-		phase_string(sp->SCp.sent_command)));
-
-	if (esp->disconnected_SC != NULL || (esp->erev == fashme))
-		esp_cmd(esp, ESP_CMD_ESEL);
-
-	switch (esp->cur_msgout[0]) {
-	default:
-		/* We didn't expect this to happen at all. */
-		ESPLOG(("device is bolixed\n"));
-		esp_advance_phase(sp, in_tgterror);
-		esp_done(esp, (DID_ERROR << 16));
-		break;
-
-	case BUS_DEVICE_RESET:
-		ESPLOG(("device reset successful\n"));
-		esp_dev->sync_max_offset = 0;
-		esp_dev->sync_min_period = 0;
-		esp_dev->sync = 0;
-		esp_advance_phase(sp, in_resetdev);
-		esp_done(esp, (DID_RESET << 16));
-		break;
-
-	case ABORT:
-		ESPLOG(("device abort successful\n"));
-		esp_advance_phase(sp, in_abortone);
-		esp_done(esp, (DID_ABORT << 16));
-		break;
-
-	};
-	return do_intr_end;
-}
-
-static int esp_enter_msgout(struct esp *esp)
-{
-	esp_advance_phase(esp->current_SC, in_msgout);
-	return esp_do_msgout(esp);
-}
-
-static int esp_enter_msgin(struct esp *esp)
-{
-	esp_advance_phase(esp->current_SC, in_msgin);
-	return esp_do_msgin(esp);
-}
-
-static int esp_enter_cmd(struct esp *esp)
-{
-	esp_advance_phase(esp->current_SC, in_cmdbegin);
-	return esp_do_cmdbegin(esp);
-}
-
-static int esp_enter_badphase(struct esp *esp)
-{
-	ESPLOG(("esp%d: Bizarre bus phase %2x.\n", esp->esp_id,
-		esp->sreg & ESP_STAT_PMASK));
-	return do_reset_bus;
-}
-
-typedef int (*espfunc_t)(struct esp *);
-
-static espfunc_t phase_vector[] = {
-	esp_do_data,		/* ESP_DOP */
-	esp_do_data,		/* ESP_DIP */
-	esp_enter_cmd,		/* ESP_CMDP */
-	esp_enter_status,	/* ESP_STATP */
-	esp_enter_badphase,	/* ESP_STAT_PMSG */
-	esp_enter_badphase,	/* ESP_STAT_PMSG | ESP_STAT_PIO */
-	esp_enter_msgout,	/* ESP_MOP */
-	esp_enter_msgin,	/* ESP_MIP */
-};
-
-/* The target has control of the bus and we have to see where it has
- * taken us.
- */
-static int esp_do_phase_determine(struct esp *esp)
-{
-	if ((esp->ireg & ESP_INTR_DC) != 0)
-		return esp_disconnect_amidst_phases(esp);
-	return phase_vector[esp->sreg & ESP_STAT_PMASK](esp);
-}
-
-/* First interrupt after exec'ing a cmd comes here. */
-static int esp_select_complete(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	struct esp_device *esp_dev = SCptr->device->hostdata;
-	int cmd_bytes_sent, fcnt;
-
-	if (esp->erev != fashme)
-		esp->seqreg = (sbus_readb(esp->eregs + ESP_SSTEP) & ESP_STEP_VBITS);
-
-	if (esp->erev == fashme)
-		fcnt = esp->hme_fifo_workaround_count;
-	else
-		fcnt = (sbus_readb(esp->eregs + ESP_FFLAGS) & ESP_FF_FBYTES);
-
-	cmd_bytes_sent = esp_bytes_sent(esp, fcnt);
-	dma_invalidate(esp);
-
-	/* Let's check to see if a reselect happened
-	 * while we we're trying to select.  This must
-	 * be checked first.
-	 */
-	if (esp->ireg == (ESP_INTR_RSEL | ESP_INTR_FDONE)) {
-		esp_reconnect(esp, SCptr);
-		return esp_do_reconnect(esp);
-	}
-
-	/* Looks like things worked, we should see a bus service &
-	 * a function complete interrupt at this point.  Note we
-	 * are doing a direct comparison because we don't want to
-	 * be fooled into thinking selection was successful if
-	 * ESP_INTR_DC is set, see below.
-	 */
-	if (esp->ireg == (ESP_INTR_FDONE | ESP_INTR_BSERV)) {
-		/* target speaks... */
-		esp->targets_present |= (1<<SCptr->device->id);
-
-		/* What if the target ignores the sdtr? */
-		if (esp->snip)
-			esp_dev->sync = 1;
-
-		/* See how far, if at all, we got in getting
-		 * the information out to the target.
-		 */
-		switch (esp->seqreg) {
-		default:
-
-		case ESP_STEP_ASEL:
-			/* Arbitration won, target selected, but
-			 * we are in some phase which is not command
-			 * phase nor is it message out phase.
-			 *
-			 * XXX We've confused the target, obviously.
-			 * XXX So clear it's state, but we also end
-			 * XXX up clearing everyone elses.  That isn't
-			 * XXX so nice.  I'd like to just reset this
-			 * XXX target, but if I cannot even get it's
-			 * XXX attention and finish selection to talk
-			 * XXX to it, there is not much more I can do.
-			 * XXX If we have a loaded bus we're going to
-			 * XXX spend the next second or so renegotiating
-			 * XXX for synchronous transfers.
-			 */
-			ESPLOG(("esp%d: STEP_ASEL for tgt %d\n",
-				esp->esp_id, SCptr->device->id));
-
-		case ESP_STEP_SID:
-			/* Arbitration won, target selected, went
-			 * to message out phase, sent one message
-			 * byte, then we stopped.  ATN is asserted
-			 * on the SCSI bus and the target is still
-			 * there hanging on.  This is a legal
-			 * sequence step if we gave the ESP a select
-			 * and stop command.
-			 *
-			 * XXX See above, I could set the borken flag
-			 * XXX in the device struct and retry the
-			 * XXX command.  But would that help for
-			 * XXX tagged capable targets?
-			 */
-
-		case ESP_STEP_NCMD:
-			/* Arbitration won, target selected, maybe
-			 * sent the one message byte in message out
-			 * phase, but we did not go to command phase
-			 * in the end.  Actually, we could have sent
-			 * only some of the message bytes if we tried
-			 * to send out the entire identify and tag
-			 * message using ESP_CMD_SA3.
-			 */
-			cmd_bytes_sent = 0;
-			break;
-
-		case ESP_STEP_PPC:
-			/* No, not the powerPC pinhead.  Arbitration
-			 * won, all message bytes sent if we went to
-			 * message out phase, went to command phase
-			 * but only part of the command was sent.
-			 *
-			 * XXX I've seen this, but usually in conjunction
-			 * XXX with a gross error which appears to have
-			 * XXX occurred between the time I told the
-			 * XXX ESP to arbitrate and when I got the
-			 * XXX interrupt.  Could I have misloaded the
-			 * XXX command bytes into the fifo?  Actually,
-			 * XXX I most likely missed a phase, and therefore
-			 * XXX went into never never land and didn't even
-			 * XXX know it.  That was the old driver though.
-			 * XXX What is even more peculiar is that the ESP
-			 * XXX showed the proper function complete and
-			 * XXX bus service bits in the interrupt register.
-			 */
-
-		case ESP_STEP_FINI4:
-		case ESP_STEP_FINI5:
-		case ESP_STEP_FINI6:
-		case ESP_STEP_FINI7:
-			/* Account for the identify message */
-			if (SCptr->SCp.phase == in_slct_norm)
-				cmd_bytes_sent -= 1;
-		};
-
-		if (esp->erev != fashme)
-			esp_cmd(esp, ESP_CMD_NULL);
-
-		/* Be careful, we could really get fucked during synchronous
-		 * data transfers if we try to flush the fifo now.
-		 */
-		if ((esp->erev != fashme) && /* not a Happy Meal and... */
-		    !fcnt && /* Fifo is empty and... */
-		    /* either we are not doing synchronous transfers or... */
-		    (!esp_dev->sync_max_offset ||
-		     /* We are not going into data in phase. */
-		     ((esp->sreg & ESP_STAT_PMASK) != ESP_DIP)))
-			esp_cmd(esp, ESP_CMD_FLUSH); /* flush is safe */
-
-		/* See how far we got if this is not a slow command. */
-		if (!esp->esp_slowcmd) {
-			if (cmd_bytes_sent < 0)
-				cmd_bytes_sent = 0;
-			if (cmd_bytes_sent != SCptr->cmd_len) {
-				/* Crapola, mark it as a slowcmd
-				 * so that we have some chance of
-				 * keeping the command alive with
-				 * good luck.
-				 *
-				 * XXX Actually, if we didn't send it all
-				 * XXX this means either we didn't set things
-				 * XXX up properly (driver bug) or the target
-				 * XXX or the ESP detected parity on one of
-				 * XXX the command bytes.  This makes much
-				 * XXX more sense, and therefore this code
-				 * XXX should be changed to send out a
-				 * XXX parity error message or if the status
-				 * XXX register shows no parity error then
-				 * XXX just expect the target to bring the
-				 * XXX bus into message in phase so that it
-				 * XXX can send us the parity error message.
-				 * XXX SCSI sucks...
-				 */
-				esp->esp_slowcmd = 1;
-				esp->esp_scmdp = &(SCptr->cmnd[cmd_bytes_sent]);
-				esp->esp_scmdleft = (SCptr->cmd_len - cmd_bytes_sent);
-			}
-		}
-
-		/* Now figure out where we went. */
-		esp_advance_phase(SCptr, in_the_dark);
-		return esp_do_phase_determine(esp);
-	}
-
-	/* Did the target even make it? */
-	if (esp->ireg == ESP_INTR_DC) {
-		/* wheee... nobody there or they didn't like
-		 * what we told it to do, clean up.
-		 */
-
-		/* If anyone is off the bus, but working on
-		 * a command in the background for us, tell
-		 * the ESP to listen for them.
-		 */
-		if (esp->disconnected_SC)
-			esp_cmd(esp, ESP_CMD_ESEL);
-
-		if (((1<<SCptr->device->id) & esp->targets_present) &&
-		    esp->seqreg != 0 &&
-		    (esp->cur_msgout[0] == EXTENDED_MESSAGE) &&
-		    (SCptr->SCp.phase == in_slct_msg ||
-		     SCptr->SCp.phase == in_slct_stop)) {
-			/* shit */
-			esp->snip = 0;
-			ESPLOG(("esp%d: Failed synchronous negotiation for target %d "
-				"lun %d\n", esp->esp_id, SCptr->device->id, SCptr->device->lun));
-			esp_dev->sync_max_offset = 0;
-			esp_dev->sync_min_period = 0;
-			esp_dev->sync = 1; /* so we don't negotiate again */
-
-			/* Run the command again, this time though we
-			 * won't try to negotiate for synchronous transfers.
-			 *
-			 * XXX I'd like to do something like send an
-			 * XXX INITIATOR_ERROR or ABORT message to the
-			 * XXX target to tell it, "Sorry I confused you,
-			 * XXX please come back and I will be nicer next
-			 * XXX time".  But that requires having the target
-			 * XXX on the bus, and it has dropped BSY on us.
-			 */
-			esp->current_SC = NULL;
-			esp_advance_phase(SCptr, not_issued);
-			prepend_SC(&esp->issue_SC, SCptr);
-			esp_exec_cmd(esp);
-			return do_intr_end;
-		}
-
-		/* Ok, this is normal, this is what we see during boot
-		 * or whenever when we are scanning the bus for targets.
-		 * But first make sure that is really what is happening.
-		 */
-		if (((1<<SCptr->device->id) & esp->targets_present)) {
-			ESPLOG(("esp%d: Warning, live target %d not responding to "
-				"selection.\n", esp->esp_id, SCptr->device->id));
-
-			/* This _CAN_ happen.  The SCSI standard states that
-			 * the target is to _not_ respond to selection if
-			 * _it_ detects bad parity on the bus for any reason.
-			 * Therefore, we assume that if we've talked successfully
-			 * to this target before, bad parity is the problem.
-			 */
-			esp_done(esp, (DID_PARITY << 16));
-		} else {
-			/* Else, there really isn't anyone there. */
-			ESPMISC(("esp: selection failure, maybe nobody there?\n"));
-			ESPMISC(("esp: target %d lun %d\n",
-				 SCptr->device->id, SCptr->device->lun));
-			esp_done(esp, (DID_BAD_TARGET << 16));
-		}
-		return do_intr_end;
-	}
-
-	ESPLOG(("esp%d: Selection failure.\n", esp->esp_id));
-	printk("esp%d: Currently -- ", esp->esp_id);
-	esp_print_ireg(esp->ireg); printk(" ");
-	esp_print_statreg(esp->sreg); printk(" ");
-	esp_print_seqreg(esp->seqreg); printk("\n");
-	printk("esp%d: New -- ", esp->esp_id);
-	esp->sreg = sbus_readb(esp->eregs + ESP_STATUS);
-	esp->seqreg = sbus_readb(esp->eregs + ESP_SSTEP);
-	esp->ireg = sbus_readb(esp->eregs + ESP_INTRPT);
-	esp_print_ireg(esp->ireg); printk(" ");
-	esp_print_statreg(esp->sreg); printk(" ");
-	esp_print_seqreg(esp->seqreg); printk("\n");
-	ESPLOG(("esp%d: resetting bus\n", esp->esp_id));
-	return do_reset_bus; /* ugh... */
-}
-
-/* Continue reading bytes for msgin phase. */
-static int esp_do_msgincont(struct esp *esp)
-{
-	if (esp->ireg & ESP_INTR_BSERV) {
-		/* in the right phase too? */
-		if ((esp->sreg & ESP_STAT_PMASK) == ESP_MIP) {
-			/* phew... */
-			esp_cmd(esp, ESP_CMD_TI);
-			esp_advance_phase(esp->current_SC, in_msgindone);
-			return do_intr_end;
-		}
-
-		/* We changed phase but ESP shows bus service,
-		 * in this case it is most likely that we, the
-		 * hacker who has been up for 20hrs straight
-		 * staring at the screen, drowned in coffee
-		 * smelling like retched cigarette ashes
-		 * have miscoded something..... so, try to
-		 * recover as best we can.
-		 */
-		ESPLOG(("esp%d: message in mis-carriage.\n", esp->esp_id));
-	}
-	esp_advance_phase(esp->current_SC, in_the_dark);
-	return do_phase_determine;
-}
-
-static int check_singlebyte_msg(struct esp *esp)
-{
-	esp->prevmsgin = esp->cur_msgin[0];
-	if (esp->cur_msgin[0] & 0x80) {
-		/* wheee... */
-		ESPLOG(("esp%d: target sends identify amidst phases\n",
-			esp->esp_id));
-		esp_advance_phase(esp->current_SC, in_the_dark);
-		return 0;
-	} else if (((esp->cur_msgin[0] & 0xf0) == 0x20) ||
-		   (esp->cur_msgin[0] == EXTENDED_MESSAGE)) {
-		esp->msgin_len = 2;
-		esp_advance_phase(esp->current_SC, in_msgincont);
-		return 0;
-	}
-	esp_advance_phase(esp->current_SC, in_the_dark);
-	switch (esp->cur_msgin[0]) {
-	default:
-		/* We don't want to hear about it. */
-		ESPLOG(("esp%d: msg %02x which we don't know about\n", esp->esp_id,
-			esp->cur_msgin[0]));
-		return MESSAGE_REJECT;
-
-	case NOP:
-		ESPLOG(("esp%d: target %d sends a nop\n", esp->esp_id,
-			esp->current_SC->device->id));
-		return 0;
-
-	case RESTORE_POINTERS:
-		/* In this case we might also have to backup the
-		 * "slow command" pointer.  It is rare to get such
-		 * a save/restore pointer sequence so early in the
-		 * bus transition sequences, but cover it.
-		 */
-		if (esp->esp_slowcmd) {
-			esp->esp_scmdleft = esp->current_SC->cmd_len;
-			esp->esp_scmdp = &esp->current_SC->cmnd[0];
-		}
-		esp_restore_pointers(esp, esp->current_SC);
-		return 0;
-
-	case SAVE_POINTERS:
-		esp_save_pointers(esp, esp->current_SC);
-		return 0;
-
-	case COMMAND_COMPLETE:
-	case DISCONNECT:
-		/* Freeing the bus, let it go. */
-		esp->current_SC->SCp.phase = in_freeing;
-		return 0;
-
-	case MESSAGE_REJECT:
-		ESPMISC(("msg reject, "));
-		if (esp->prevmsgout == EXTENDED_MESSAGE) {
-			struct esp_device *esp_dev = esp->current_SC->device->hostdata;
-
-			/* Doesn't look like this target can
-			 * do synchronous or WIDE transfers.
-			 */
-			ESPSDTR(("got reject, was trying nego, clearing sync/WIDE\n"));
-			esp_dev->sync = 1;
-			esp_dev->wide = 1;
-			esp_dev->sync_min_period = 0;
-			esp_dev->sync_max_offset = 0;
-			return 0;
-		} else {
-			ESPMISC(("not sync nego, sending ABORT\n"));
-			return ABORT;
-		}
-	};
-}
-
-/* Target negotiates for synchronous transfers before we do, this
- * is legal although very strange.  What is even funnier is that
- * the SCSI2 standard specifically recommends against targets doing
- * this because so many initiators cannot cope with this occurring.
- */
-static int target_with_ants_in_pants(struct esp *esp,
-				     struct scsi_cmnd *SCptr,
-				     struct esp_device *esp_dev)
-{
-	if (esp_dev->sync || SCptr->device->borken) {
-		/* sorry, no can do */
-		ESPSDTR(("forcing to async, "));
-		build_sync_nego_msg(esp, 0, 0);
-		esp_dev->sync = 1;
-		esp->snip = 1;
-		ESPLOG(("esp%d: hoping for msgout\n", esp->esp_id));
-		esp_advance_phase(SCptr, in_the_dark);
-		return EXTENDED_MESSAGE;
-	}
-
-	/* Ok, we'll check them out... */
-	return 0;
-}
-
-static void sync_report(struct esp *esp)
-{
-	int msg3, msg4;
-	char *type;
-
-	msg3 = esp->cur_msgin[3];
-	msg4 = esp->cur_msgin[4];
-	if (msg4) {
-		int hz = 1000000000 / (msg3 * 4);
-		int integer = hz / 1000000;
-		int fraction = (hz - (integer * 1000000)) / 10000;
-		if ((esp->erev == fashme) &&
-		    (esp->config3[esp->current_SC->device->id] & ESP_CONFIG3_EWIDE)) {
-			type = "FAST-WIDE";
-			integer <<= 1;
-			fraction <<= 1;
-		} else if ((msg3 * 4) < 200) {
-			type = "FAST";
-		} else {
-			type = "synchronous";
-		}
-
-		/* Do not transform this back into one big printk
-		 * again, it triggers a bug in our sparc64-gcc272
-		 * sibling call optimization.  -DaveM
-		 */
-		ESPLOG((KERN_INFO "esp%d: target %d ",
-			esp->esp_id, esp->current_SC->device->id));
-		ESPLOG(("[period %dns offset %d %d.%02dMHz ",
-			(int) msg3 * 4, (int) msg4,
-			integer, fraction));
-		ESPLOG(("%s SCSI%s]\n", type,
-			(((msg3 * 4) < 200) ? "-II" : "")));
-	} else {
-		ESPLOG((KERN_INFO "esp%d: target %d asynchronous\n",
-			esp->esp_id, esp->current_SC->device->id));
-	}
-}
-
-static int check_multibyte_msg(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	struct esp_device *esp_dev = SCptr->device->hostdata;
-	u8 regval = 0;
-	int message_out = 0;
-
-	ESPSDTR(("chk multibyte msg: "));
-	if (esp->cur_msgin[2] == EXTENDED_SDTR) {
-		int period = esp->cur_msgin[3];
-		int offset = esp->cur_msgin[4];
-
-		ESPSDTR(("is sync nego response, "));
-		if (!esp->snip) {
-			int rval;
-
-			/* Target negotiates first! */
-			ESPSDTR(("target jumps the gun, "));
-			message_out = EXTENDED_MESSAGE; /* we must respond */
-			rval = target_with_ants_in_pants(esp, SCptr, esp_dev);
-			if (rval)
-				return rval;
-		}
-
-		ESPSDTR(("examining sdtr, "));
-
-		/* Offset cannot be larger than ESP fifo size. */
-		if (offset > 15) {
-			ESPSDTR(("offset too big %2x, ", offset));
-			offset = 15;
-			ESPSDTR(("sending back new offset\n"));
-			build_sync_nego_msg(esp, period, offset);
-			return EXTENDED_MESSAGE;
-		}
-
-		if (offset && period > esp->max_period) {
-			/* Yeee, async for this slow device. */
-			ESPSDTR(("period too long %2x, ", period));
-			build_sync_nego_msg(esp, 0, 0);
-			ESPSDTR(("hoping for msgout\n"));
-			esp_advance_phase(esp->current_SC, in_the_dark);
-			return EXTENDED_MESSAGE;
-		} else if (offset && period < esp->min_period) {
-			ESPSDTR(("period too short %2x, ", period));
-			period = esp->min_period;
-			if (esp->erev > esp236)
-				regval = 4;
-			else
-				regval = 5;
-		} else if (offset) {
-			int tmp;
-
-			ESPSDTR(("period is ok, "));
-			tmp = esp->ccycle / 1000;
-			regval = (((period << 2) + tmp - 1) / tmp);
-			if (regval && ((esp->erev == fas100a ||
-					esp->erev == fas236  ||
-					esp->erev == fashme))) {
-				if (period >= 50)
-					regval--;
-			}
-		}
-
-		if (offset) {
-			u8 bit;
-
-			esp_dev->sync_min_period = (regval & 0x1f);
-			esp_dev->sync_max_offset = (offset | esp->radelay);
-			if (esp->erev == fas100a || esp->erev == fas236 || esp->erev == fashme) {
-				if ((esp->erev == fas100a) || (esp->erev == fashme))
-					bit = ESP_CONFIG3_FAST;
-				else
-					bit = ESP_CONFIG3_FSCSI;
-				if (period < 50) {
-					/* On FAS366, if using fast-20 synchronous transfers
-					 * we need to make sure the REQ/ACK assert/deassert
-					 * control bits are clear.
-					 */
-					if (esp->erev == fashme)
-						esp_dev->sync_max_offset &= ~esp->radelay;
-					esp->config3[SCptr->device->id] |= bit;
-				} else {
-					esp->config3[SCptr->device->id] &= ~bit;
-				}
-				esp->prev_cfg3 = esp->config3[SCptr->device->id];
-				sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-			}
-			esp->prev_soff = esp_dev->sync_max_offset;
-			esp->prev_stp = esp_dev->sync_min_period;
-			sbus_writeb(esp->prev_soff, esp->eregs + ESP_SOFF);
-			sbus_writeb(esp->prev_stp, esp->eregs + ESP_STP);
-			ESPSDTR(("soff=%2x stp=%2x cfg3=%2x\n",
-				 esp_dev->sync_max_offset,
-				 esp_dev->sync_min_period,
-				 esp->config3[SCptr->device->id]));
-
-			esp->snip = 0;
-		} else if (esp_dev->sync_max_offset) {
-			u8 bit;
-
-			/* back to async mode */
-			ESPSDTR(("unaccaptable sync nego, forcing async\n"));
-			esp_dev->sync_max_offset = 0;
-			esp_dev->sync_min_period = 0;
-			esp->prev_soff = 0;
-			esp->prev_stp = 0;
-			sbus_writeb(esp->prev_soff, esp->eregs + ESP_SOFF);
-			sbus_writeb(esp->prev_stp, esp->eregs + ESP_STP);
-			if (esp->erev == fas100a || esp->erev == fas236 || esp->erev == fashme) {
-				if ((esp->erev == fas100a) || (esp->erev == fashme))
-					bit = ESP_CONFIG3_FAST;
-				else
-					bit = ESP_CONFIG3_FSCSI;
-				esp->config3[SCptr->device->id] &= ~bit;
-				esp->prev_cfg3 = esp->config3[SCptr->device->id];
-				sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-			}
-		}
-
-		sync_report(esp);
-
-		ESPSDTR(("chk multibyte msg: sync is known, "));
-		esp_dev->sync = 1;
-
-		if (message_out) {
-			ESPLOG(("esp%d: sending sdtr back, hoping for msgout\n",
-				esp->esp_id));
-			build_sync_nego_msg(esp, period, offset);
-			esp_advance_phase(SCptr, in_the_dark);
-			return EXTENDED_MESSAGE;
-		}
-
-		ESPSDTR(("returning zero\n"));
-		esp_advance_phase(SCptr, in_the_dark); /* ...or else! */
-		return 0;
-	} else if (esp->cur_msgin[2] == EXTENDED_WDTR) {
-		int size = 8 << esp->cur_msgin[3];
-
-		esp->wnip = 0;
-		if (esp->erev != fashme) {
-			ESPLOG(("esp%d: AIEEE wide msg received and not HME.\n",
-				esp->esp_id));
-			message_out = MESSAGE_REJECT;
-		} else if (size > 16) {
-			ESPLOG(("esp%d: AIEEE wide transfer for %d size "
-				"not supported.\n", esp->esp_id, size));
-			message_out = MESSAGE_REJECT;
-		} else {
-			/* Things look good; let's see what we got. */
-			if (size == 16) {
-				/* Set config 3 register for this target. */
-				esp->config3[SCptr->device->id] |= ESP_CONFIG3_EWIDE;
-			} else {
-				/* Just make sure it was one byte sized. */
-				if (size != 8) {
-					ESPLOG(("esp%d: Aieee, wide nego of %d size.\n",
-						esp->esp_id, size));
-					message_out = MESSAGE_REJECT;
-					goto finish;
-				}
-				/* Pure paranoia. */
-				esp->config3[SCptr->device->id] &= ~(ESP_CONFIG3_EWIDE);
-			}
-			esp->prev_cfg3 = esp->config3[SCptr->device->id];
-			sbus_writeb(esp->prev_cfg3, esp->eregs + ESP_CFG3);
-
-			/* Regardless, next try for sync transfers. */
-			build_sync_nego_msg(esp, esp->sync_defp, 15);
-			esp_dev->sync = 1;
-			esp->snip = 1;
-			message_out = EXTENDED_MESSAGE;
-		}
-	} else if (esp->cur_msgin[2] == EXTENDED_MODIFY_DATA_POINTER) {
-		ESPLOG(("esp%d: rejecting modify data ptr msg\n", esp->esp_id));
-		message_out = MESSAGE_REJECT;
-	}
-finish:
-	esp_advance_phase(SCptr, in_the_dark);
-	return message_out;
-}
-
-static int esp_do_msgindone(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	int message_out = 0, it = 0, rval;
-
-	rval = skipahead1(esp, SCptr, in_msgin, in_msgindone);
-	if (rval)
-		return rval;
-	if (SCptr->SCp.sent_command != in_status) {
-		if (!(esp->ireg & ESP_INTR_DC)) {
-			if (esp->msgin_len && (esp->sreg & ESP_STAT_PERR)) {
-				message_out = MSG_PARITY_ERROR;
-				esp_cmd(esp, ESP_CMD_FLUSH);
-			} else if (esp->erev != fashme &&
-			  (it = (sbus_readb(esp->eregs + ESP_FFLAGS) & ESP_FF_FBYTES)) != 1) {
-				/* We certainly dropped the ball somewhere. */
-				message_out = INITIATOR_ERROR;
-				esp_cmd(esp, ESP_CMD_FLUSH);
-			} else if (!esp->msgin_len) {
-				if (esp->erev == fashme)
-					it = esp->hme_fifo_workaround_buffer[0];
-				else
-					it = sbus_readb(esp->eregs + ESP_FDATA);
-				esp_advance_phase(SCptr, in_msgincont);
-			} else {
-				/* it is ok and we want it */
-				if (esp->erev == fashme)
-					it = esp->cur_msgin[esp->msgin_ctr] =
-						esp->hme_fifo_workaround_buffer[0];
-				else
-					it = esp->cur_msgin[esp->msgin_ctr] =
-						sbus_readb(esp->eregs + ESP_FDATA);
-				esp->msgin_ctr++;
-			}
-		} else {
-			esp_advance_phase(SCptr, in_the_dark);
-			return do_work_bus;
-		}
-	} else {
-		it = esp->cur_msgin[0];
-	}
-	if (!message_out && esp->msgin_len) {
-		if (esp->msgin_ctr < esp->msgin_len) {
-			esp_advance_phase(SCptr, in_msgincont);
-		} else if (esp->msgin_len == 1) {
-			message_out = check_singlebyte_msg(esp);
-		} else if (esp->msgin_len == 2) {
-			if (esp->cur_msgin[0] == EXTENDED_MESSAGE) {
-				if ((it + 2) >= 15) {
-					message_out = MESSAGE_REJECT;
-				} else {
-					esp->msgin_len = (it + 2);
-					esp_advance_phase(SCptr, in_msgincont);
-				}
-			} else {
-				message_out = MESSAGE_REJECT; /* foo on you */
-			}
-		} else {
-			message_out = check_multibyte_msg(esp);
-		}
-	}
-	if (message_out < 0) {
-		return -message_out;
-	} else if (message_out) {
-		if (((message_out != 1) &&
-		     ((message_out < 0x20) || (message_out & 0x80))))
-			esp->msgout_len = 1;
-		esp->cur_msgout[0] = message_out;
-		esp_cmd(esp, ESP_CMD_SATN);
-		esp_advance_phase(SCptr, in_the_dark);
-		esp->msgin_len = 0;
-	}
-	esp->sreg = sbus_readb(esp->eregs + ESP_STATUS);
-	esp->sreg &= ~(ESP_STAT_INTR);
-	if ((esp->sreg & (ESP_STAT_PMSG|ESP_STAT_PCD)) == (ESP_STAT_PMSG|ESP_STAT_PCD))
-		esp_cmd(esp, ESP_CMD_MOK);
-	if ((SCptr->SCp.sent_command == in_msgindone) &&
-	    (SCptr->SCp.phase == in_freeing))
-		return esp_do_freebus(esp);
-	return do_intr_end;
-}
-
-static int esp_do_cmdbegin(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-
-	esp_advance_phase(SCptr, in_cmdend);
-	if (esp->erev == fashme) {
-		u32 tmp = sbus_readl(esp->dregs + DMA_CSR);
-		int i;
-
-		for (i = 0; i < esp->esp_scmdleft; i++)
-			esp->esp_command[i] = *esp->esp_scmdp++;
-		esp->esp_scmdleft = 0;
-		esp_cmd(esp, ESP_CMD_FLUSH);
-		esp_setcount(esp->eregs, i, 1);
-		esp_cmd(esp, (ESP_CMD_DMA | ESP_CMD_TI));
-		tmp |= (DMA_SCSI_DISAB | DMA_ENABLE);
-		tmp &= ~(DMA_ST_WRITE);
-		sbus_writel(i, esp->dregs + DMA_COUNT);
-		sbus_writel(esp->esp_command_dvma, esp->dregs + DMA_ADDR);
-		sbus_writel(tmp, esp->dregs + DMA_CSR);
-	} else {
-		u8 tmp;
-
-		esp_cmd(esp, ESP_CMD_FLUSH);
-		tmp = *esp->esp_scmdp++;
-		esp->esp_scmdleft--;
-		sbus_writeb(tmp, esp->eregs + ESP_FDATA);
-		esp_cmd(esp, ESP_CMD_TI);
-	}
-	return do_intr_end;
-}
-
-static int esp_do_cmddone(struct esp *esp)
-{
-	if (esp->erev == fashme)
-		dma_invalidate(esp);
-	else
-		esp_cmd(esp, ESP_CMD_NULL);
-
-	if (esp->ireg & ESP_INTR_BSERV) {
-		esp_advance_phase(esp->current_SC, in_the_dark);
-		return esp_do_phase_determine(esp);
-	}
-
-	ESPLOG(("esp%d: in do_cmddone() but didn't get BSERV interrupt.\n",
-		esp->esp_id));
-	return do_reset_bus;
-}
-
-static int esp_do_msgout(struct esp *esp)
-{
-	esp_cmd(esp, ESP_CMD_FLUSH);
-	switch (esp->msgout_len) {
-	case 1:
-		if (esp->erev == fashme)
-			hme_fifo_push(esp, &esp->cur_msgout[0], 1);
-		else
-			sbus_writeb(esp->cur_msgout[0], esp->eregs + ESP_FDATA);
-
-		esp_cmd(esp, ESP_CMD_TI);
-		break;
-
-	case 2:
-		esp->esp_command[0] = esp->cur_msgout[0];
-		esp->esp_command[1] = esp->cur_msgout[1];
-
-		if (esp->erev == fashme) {
-			hme_fifo_push(esp, &esp->cur_msgout[0], 2);
-			esp_cmd(esp, ESP_CMD_TI);
-		} else {
-			dma_setup(esp, esp->esp_command_dvma, 2, 0);
-			esp_setcount(esp->eregs, 2, 0);
-			esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-		}
-		break;
-
-	case 4:
-		esp->esp_command[0] = esp->cur_msgout[0];
-		esp->esp_command[1] = esp->cur_msgout[1];
-		esp->esp_command[2] = esp->cur_msgout[2];
-		esp->esp_command[3] = esp->cur_msgout[3];
-		esp->snip = 1;
-
-		if (esp->erev == fashme) {
-			hme_fifo_push(esp, &esp->cur_msgout[0], 4);
-			esp_cmd(esp, ESP_CMD_TI);
-		} else {
-			dma_setup(esp, esp->esp_command_dvma, 4, 0);
-			esp_setcount(esp->eregs, 4, 0);
-			esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-		}
-		break;
-
-	case 5:
-		esp->esp_command[0] = esp->cur_msgout[0];
-		esp->esp_command[1] = esp->cur_msgout[1];
-		esp->esp_command[2] = esp->cur_msgout[2];
-		esp->esp_command[3] = esp->cur_msgout[3];
-		esp->esp_command[4] = esp->cur_msgout[4];
-		esp->snip = 1;
-
-		if (esp->erev == fashme) {
-			hme_fifo_push(esp, &esp->cur_msgout[0], 5);
-			esp_cmd(esp, ESP_CMD_TI);
-		} else {
-			dma_setup(esp, esp->esp_command_dvma, 5, 0);
-			esp_setcount(esp->eregs, 5, 0);
-			esp_cmd(esp, ESP_CMD_DMA | ESP_CMD_TI);
-		}
-		break;
-
-	default:
-		/* whoops */
-		ESPMISC(("bogus msgout sending NOP\n"));
-		esp->cur_msgout[0] = NOP;
-
-		if (esp->erev == fashme) {
-			hme_fifo_push(esp, &esp->cur_msgout[0], 1);
-		} else {
-			sbus_writeb(esp->cur_msgout[0], esp->eregs + ESP_FDATA);
-		}
-
-		esp->msgout_len = 1;
-		esp_cmd(esp, ESP_CMD_TI);
-		break;
-	};
-
-	esp_advance_phase(esp->current_SC, in_msgoutdone);
-	return do_intr_end;
-}
-
-static int esp_do_msgoutdone(struct esp *esp)
-{
-	if (esp->msgout_len > 1) {
-		/* XXX HME/FAS ATN deassert workaround required,
-		 * XXX no DMA flushing, only possible ESP_CMD_FLUSH
-		 * XXX to kill the fifo.
-		 */
-		if (esp->erev != fashme) {
-			u32 tmp;
-
-			while ((tmp = sbus_readl(esp->dregs + DMA_CSR)) & DMA_PEND_READ)
-				udelay(1);
-			tmp &= ~DMA_ENABLE;
-			sbus_writel(tmp, esp->dregs + DMA_CSR);
-			dma_invalidate(esp);
-		} else {
-			esp_cmd(esp, ESP_CMD_FLUSH);
-		}
-	}
-	if (!(esp->ireg & ESP_INTR_DC)) {
-		if (esp->erev != fashme)
-			esp_cmd(esp, ESP_CMD_NULL);
-		switch (esp->sreg & ESP_STAT_PMASK) {
-		case ESP_MOP:
-			/* whoops, parity error */
-			ESPLOG(("esp%d: still in msgout, parity error assumed\n",
-				esp->esp_id));
-			if (esp->msgout_len > 1)
-				esp_cmd(esp, ESP_CMD_SATN);
-			esp_advance_phase(esp->current_SC, in_msgout);
-			return do_work_bus;
-
-		case ESP_DIP:
-			break;
-
-		default:
-			/* Happy Meal fifo is touchy... */
-			if ((esp->erev != fashme) &&
-			    !fcount(esp) &&
-			    !(((struct esp_device *)esp->current_SC->device->hostdata)->sync_max_offset))
-				esp_cmd(esp, ESP_CMD_FLUSH);
-			break;
-
-		};
-	} else {
-		ESPLOG(("esp%d: disconnect, resetting bus\n", esp->esp_id));
-		return do_reset_bus;
-	}
-
-	/* If we sent out a synchronous negotiation message, update
-	 * our state.
-	 */
-	if (esp->cur_msgout[2] == EXTENDED_MESSAGE &&
-	    esp->cur_msgout[4] == EXTENDED_SDTR) {
-		esp->snip = 1; /* anal retentiveness... */
-	}
-
-	esp->prevmsgout = esp->cur_msgout[0];
-	esp->msgout_len = 0;
-	esp_advance_phase(esp->current_SC, in_the_dark);
-	return esp_do_phase_determine(esp);
-}
-
-static int esp_bus_unexpected(struct esp *esp)
-{
-	ESPLOG(("esp%d: command in weird state %2x\n",
-		esp->esp_id, esp->current_SC->SCp.phase));
-	return do_reset_bus;
-}
-
-static espfunc_t bus_vector[] = {
-	esp_do_data_finale,
-	esp_do_data_finale,
-	esp_bus_unexpected,
-	esp_do_msgin,
-	esp_do_msgincont,
-	esp_do_msgindone,
-	esp_do_msgout,
-	esp_do_msgoutdone,
-	esp_do_cmdbegin,
-	esp_do_cmddone,
-	esp_do_status,
-	esp_do_freebus,
-	esp_do_phase_determine,
-	esp_bus_unexpected,
-	esp_bus_unexpected,
-	esp_bus_unexpected,
-};
-
-/* This is the second tier in our dual-level SCSI state machine. */
-static int esp_work_bus(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr = esp->current_SC;
-	unsigned int phase;
-
-	ESPBUS(("esp_work_bus: "));
-	if (!SCptr) {
-		ESPBUS(("reconnect\n"));
-		return esp_do_reconnect(esp);
-	}
-	phase = SCptr->SCp.phase;
-	if ((phase & 0xf0) == in_phases_mask)
-		return bus_vector[(phase & 0x0f)](esp);
-	else if ((phase & 0xf0) == in_slct_mask)
-		return esp_select_complete(esp);
-	else
-		return esp_bus_unexpected(esp);
-}
-
-static espfunc_t isvc_vector[] = {
-	NULL,
-	esp_do_phase_determine,
-	esp_do_resetbus,
-	esp_finish_reset,
-	esp_work_bus
-};
-
-/* Main interrupt handler for an esp adapter. */
-static void esp_handle(struct esp *esp)
-{
-	struct scsi_cmnd *SCptr;
-	int what_next = do_intr_end;
-
-	SCptr = esp->current_SC;
-
-	/* Check for errors. */
-	esp->sreg = sbus_readb(esp->eregs + ESP_STATUS);
-	esp->sreg &= (~ESP_STAT_INTR);
-	if (esp->erev == fashme) {
-		esp->sreg2 = sbus_readb(esp->eregs + ESP_STATUS2);
-		esp->seqreg = (sbus_readb(esp->eregs + ESP_SSTEP) & ESP_STEP_VBITS);
-	}
-
-	if (esp->sreg & (ESP_STAT_SPAM)) {
-		/* Gross error, could be due to one of:
-		 *
-		 * - top of fifo overwritten, could be because
-		 *   we tried to do a synchronous transfer with
-		 *   an offset greater than ESP fifo size
-		 *
-		 * - top of command register overwritten
-		 *
-		 * - DMA setup to go in one direction, SCSI
-		 *   bus points in the other, whoops
-		 *
-		 * - weird phase change during asynchronous
-		 *   data phase while we are initiator
-		 */
-		ESPLOG(("esp%d: Gross error sreg=%2x\n", esp->esp_id, esp->sreg));
-
-		/* If a command is live on the bus we cannot safely
-		 * reset the bus, so we'll just let the pieces fall
-		 * where they may.  Here we are hoping that the
-		 * target will be able to cleanly go away soon
-		 * so we can safely reset things.
-		 */
-		if (!SCptr) {
-			ESPLOG(("esp%d: No current cmd during gross error, "
-				"resetting bus\n", esp->esp_id));
-			what_next = do_reset_bus;
-			goto state_machine;
-		}
-	}
-
-	if (sbus_readl(esp->dregs + DMA_CSR) & DMA_HNDL_ERROR) {
-		/* A DMA gate array error.  Here we must
-		 * be seeing one of two things.  Either the
-		 * virtual to physical address translation
-		 * on the SBUS could not occur, else the
-		 * translation it did get pointed to a bogus
-		 * page.  Ho hum...
-		 */
-		ESPLOG(("esp%d: DMA error %08x\n", esp->esp_id,
-			sbus_readl(esp->dregs + DMA_CSR)));
-
-		/* DMA gate array itself must be reset to clear the
-		 * error condition.
-		 */
-		esp_reset_dma(esp);
-
-		what_next = do_reset_bus;
-		goto state_machine;
-	}
-
-	esp->ireg = sbus_readb(esp->eregs + ESP_INTRPT);   /* Unlatch intr reg */
-
-	if (esp->erev == fashme) {
-		/* This chip is really losing. */
-		ESPHME(("HME["));
-
-		ESPHME(("sreg2=%02x,", esp->sreg2));
-		/* Must latch fifo before reading the interrupt
-		 * register else garbage ends up in the FIFO
-		 * which confuses the driver utterly.
-		 */
-		if (!(esp->sreg2 & ESP_STAT2_FEMPTY) ||
-		    (esp->sreg2 & ESP_STAT2_F1BYTE)) {
-			ESPHME(("fifo_workaround]"));
-			hme_fifo_read(esp);
-		} else {
-			ESPHME(("no_fifo_workaround]"));
-		}
-	}
-
-	/* No current cmd is only valid at this point when there are
-	 * commands off the bus or we are trying a reset.
-	 */
-	if (!SCptr && !esp->disconnected_SC && !(esp->ireg & ESP_INTR_SR)) {
-		/* Panic is safe, since current_SC is null. */
-		ESPLOG(("esp%d: no command in esp_handle()\n", esp->esp_id));
-		panic("esp_handle: current_SC == penguin within interrupt!");
-	}
-
-	if (esp->ireg & (ESP_INTR_IC)) {
-		/* Illegal command fed to ESP.  Outside of obvious
-		 * software bugs that could cause this, there is
-		 * a condition with esp100 where we can confuse the
-		 * ESP into an erroneous illegal command interrupt
-		 * because it does not scrape the FIFO properly
-		 * for reselection.  See esp100_reconnect_hwbug()
-		 * to see how we try very hard to avoid this.
-		 */
-		ESPLOG(("esp%d: invalid command\n", esp->esp_id));
-
-		esp_dump_state(esp);
-
-		if (SCptr != NULL) {
-			/* Devices with very buggy firmware can drop BSY
-			 * during a scatter list interrupt when using sync
-			 * mode transfers.  We continue the transfer as
-			 * expected, the target drops the bus, the ESP
-			 * gets confused, and we get a illegal command
-			 * interrupt because the bus is in the disconnected
-			 * state now and ESP_CMD_TI is only allowed when
-			 * a nexus is alive on the bus.
-			 */
-			ESPLOG(("esp%d: Forcing async and disabling disconnect for "
-				"target %d\n", esp->esp_id, SCptr->device->id));
-			SCptr->device->borken = 1; /* foo on you */
-		}
-
-		what_next = do_reset_bus;
-	} else if (!(esp->ireg & ~(ESP_INTR_FDONE | ESP_INTR_BSERV | ESP_INTR_DC))) {
-		if (SCptr) {
-			unsigned int phase = SCptr->SCp.phase;
-
-			if (phase & in_phases_mask) {
-				what_next = esp_work_bus(esp);
-			} else if (phase & in_slct_mask) {
-				what_next = esp_select_complete(esp);
-			} else {
-				ESPLOG(("esp%d: interrupt for no good reason...\n",
-					esp->esp_id));
-				what_next = do_intr_end;
-			}
-		} else {
-			ESPLOG(("esp%d: BSERV or FDONE or DC while SCptr==NULL\n",
-				esp->esp_id));
-			what_next = do_reset_bus;
-		}
-	} else if (esp->ireg & ESP_INTR_SR) {
-		ESPLOG(("esp%d: SCSI bus reset interrupt\n", esp->esp_id));
-		what_next = do_reset_complete;
-	} else if (esp->ireg & (ESP_INTR_S | ESP_INTR_SATN)) {
-		ESPLOG(("esp%d: AIEEE we have been selected by another initiator!\n",
-			esp->esp_id));
-		what_next = do_reset_bus;
-	} else if (esp->ireg & ESP_INTR_RSEL) {
-		if (SCptr == NULL) {
-			/* This is ok. */
-			what_next = esp_do_reconnect(esp);
-		} else if (SCptr->SCp.phase & in_slct_mask) {
-			/* Only selection code knows how to clean
-			 * up properly.
-			 */
-			ESPDISC(("Reselected during selection attempt\n"));
-			what_next = esp_select_complete(esp);
-		} else {
-			ESPLOG(("esp%d: Reselected while bus is busy\n",
-				esp->esp_id));
-			what_next = do_reset_bus;
-		}
-	}
-
-	/* This is tier-one in our dual level SCSI state machine. */
-state_machine:
-	while (what_next != do_intr_end) {
-		if (what_next >= do_phase_determine &&
-		    what_next < do_intr_end) {
-			what_next = isvc_vector[what_next](esp);
-		} else {
-			/* state is completely lost ;-( */
-			ESPLOG(("esp%d: interrupt engine loses state, resetting bus\n",
-				esp->esp_id));
-			what_next = do_reset_bus;
-		}
-	}
-}
-
-/* Service only the ESP described by dev_id. */
-static irqreturn_t esp_intr(int irq, void *dev_id)
-{
-	struct esp *esp = dev_id;
-	unsigned long flags;
-
-	spin_lock_irqsave(esp->ehost->host_lock, flags);
-	if (ESP_IRQ_P(esp->dregs)) {
-		ESP_INTSOFF(esp->dregs);
-
-		ESPIRQ(("I[%d:%d](", smp_processor_id(), esp->esp_id));
-		esp_handle(esp);
-		ESPIRQ((")"));
-
-		ESP_INTSON(esp->dregs);
-	}
-	spin_unlock_irqrestore(esp->ehost->host_lock, flags);
-
-	return IRQ_HANDLED;
-}
-
-static int esp_slave_alloc(struct scsi_device *SDptr)
-{
-	struct esp_device *esp_dev =
-		kmalloc(sizeof(struct esp_device), GFP_ATOMIC);
-
-	if (!esp_dev)
-		return -ENOMEM;
-	memset(esp_dev, 0, sizeof(struct esp_device));
-	SDptr->hostdata = esp_dev;
-	return 0;
-}
-
-static void esp_slave_destroy(struct scsi_device *SDptr)
-{
-	struct esp *esp = (struct esp *) SDptr->host->hostdata;
-
-	esp->targets_present &= ~(1 << SDptr->id);
-	kfree(SDptr->hostdata);
-	SDptr->hostdata = NULL;
-}
-
-static struct scsi_host_template esp_template = {
-	.module			= THIS_MODULE,
-	.name			= "esp",
-	.info			= esp_info,
-	.slave_alloc		= esp_slave_alloc,
-	.slave_destroy		= esp_slave_destroy,
-	.queuecommand		= esp_queue,
-	.eh_abort_handler	= esp_abort,
-	.eh_bus_reset_handler	= esp_reset,
-	.can_queue		= 7,
-	.this_id		= 7,
-	.sg_tablesize		= SG_ALL,
-	.cmd_per_lun		= 1,
-	.use_clustering		= ENABLE_CLUSTERING,
-	.proc_name		= "esp",
-	.proc_info		= esp_proc_info,
-};
-
-#ifndef CONFIG_SUN4
-static struct of_device_id esp_match[] = {
-	{
-		.name = "SUNW,esp",
-		.data = &esp_template,
-	},
-	{
-		.name = "SUNW,fas",
-		.data = &esp_template,
-	},
-	{
-		.name = "esp",
-		.data = &esp_template,
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, esp_match);
-
-static struct of_platform_driver esp_sbus_driver = {
-	.name		= "esp",
-	.match_table	= esp_match,
-	.probe		= esp_sbus_probe,
-	.remove		= __devexit_p(esp_sbus_remove),
-};
-#endif
-
-static int __init esp_init(void)
-{
-#ifdef CONFIG_SUN4
-	return esp_sun4_probe(&esp_template);
-#else
-	return of_register_driver(&esp_sbus_driver, &sbus_bus_type);
-#endif
-}
-
-static void __exit esp_exit(void)
-{
-#ifdef CONFIG_SUN4
-	esp_sun4_remove();
-#else
-	of_unregister_driver(&esp_sbus_driver);
-#endif
-}
-
-MODULE_DESCRIPTION("ESP Sun SCSI driver");
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(DRV_VERSION);
-
-module_init(esp_init);
-module_exit(esp_exit);
diff --git a/drivers/scsi/esp.h b/drivers/scsi/esp.h
deleted file mode 100644
index a98cda9121f..00000000000
--- a/drivers/scsi/esp.h
+++ /dev/null
@@ -1,406 +0,0 @@
-/* $Id: esp.h,v 1.29 2001/12/11 04:55:47 davem Exp $
- * esp.h:  Defines and structures for the Sparc ESP (Enhanced SCSI
- *         Processor) driver under Linux.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- */
-
-#ifndef _SPARC_ESP_H
-#define _SPARC_ESP_H
-
-/* For dvma controller register definitions. */
-#include <asm/dma.h>
-
-/* The ESP SCSI controllers have their register sets in three
- * "classes":
- *
- * 1) Registers which are both read and write.
- * 2) Registers which are read only.
- * 3) Registers which are write only.
- *
- * Yet, they all live within the same IO space.
- */
-
-/* All the ESP registers are one byte each and are accessed longwords
- * apart with a big-endian ordering to the bytes.
- */
-					/* Access    Description              Offset */
-#define ESP_TCLOW	0x00UL		/* rw  Low bits of the transfer count 0x00   */
-#define ESP_TCMED	0x04UL		/* rw  Mid bits of the transfer count 0x04   */
-#define ESP_FDATA	0x08UL		/* rw  FIFO data bits                 0x08   */
-#define ESP_CMD		0x0cUL		/* rw  SCSI command bits              0x0c   */
-#define ESP_STATUS	0x10UL		/* ro  ESP status register            0x10   */
-#define ESP_BUSID	ESP_STATUS	/* wo  Bus ID for select/reselect     0x10   */
-#define ESP_INTRPT	0x14UL		/* ro  Kind of interrupt              0x14   */
-#define ESP_TIMEO	ESP_INTRPT	/* wo  Timeout value for select/resel 0x14   */
-#define ESP_SSTEP	0x18UL		/* ro  Sequence step register         0x18   */
-#define ESP_STP		ESP_SSTEP	/* wo  Transfer period per sync       0x18   */
-#define ESP_FFLAGS	0x1cUL		/* ro  Bits of current FIFO info      0x1c   */
-#define ESP_SOFF	ESP_FFLAGS	/* wo  Sync offset                    0x1c   */
-#define ESP_CFG1	0x20UL		/* rw  First configuration register   0x20   */
-#define ESP_CFACT	0x24UL		/* wo  Clock conversion factor        0x24   */
-#define ESP_STATUS2	ESP_CFACT	/* ro  HME status2 register           0x24   */
-#define ESP_CTEST	0x28UL		/* wo  Chip test register             0x28   */
-#define ESP_CFG2	0x2cUL		/* rw  Second configuration register  0x2c   */
-#define ESP_CFG3	0x30UL		/* rw  Third configuration register   0x30   */
-#define ESP_TCHI	0x38UL		/* rw  High bits of transfer count    0x38   */
-#define ESP_UID		ESP_TCHI	/* ro  Unique ID code                 0x38   */
-#define FAS_RLO		ESP_TCHI	/* rw  HME extended counter           0x38   */
-#define ESP_FGRND	0x3cUL		/* rw  Data base for fifo             0x3c   */
-#define FAS_RHI		ESP_FGRND	/* rw  HME extended counter           0x3c   */
-#define ESP_REG_SIZE	0x40UL
-
-/* Various revisions of the ESP board. */
-enum esp_rev {
-	esp100     = 0x00,  /* NCR53C90 - very broken */
-	esp100a    = 0x01,  /* NCR53C90A */
-	esp236     = 0x02,
-	fas236     = 0x03,
-	fas100a    = 0x04,
-	fast       = 0x05,
-	fashme     = 0x06,
-	espunknown = 0x07
-};
-
-/* We allocate one of these for each scsi device and attach it to
- * SDptr->hostdata for use in the driver
- */
-struct esp_device {
-  unsigned char sync_min_period;
-  unsigned char sync_max_offset;
-  unsigned sync:1;
-  unsigned wide:1;
-  unsigned disconnect:1;
-};
-
-struct scsi_cmnd;
-
-/* We get one of these for each ESP probed. */
-struct esp {
-	void __iomem		*eregs;		/* ESP controller registers */
-	void __iomem		*dregs;		/* DMA controller registers */
-	struct sbus_dma		*dma;		/* DMA controller sw state */
-	struct Scsi_Host	*ehost;		/* Backpointer to SCSI Host */
-	struct sbus_dev		*sdev;		/* Pointer to SBus entry */
-
-	/* ESP Configuration Registers */
-	u8			config1;	/* Copy of the 1st config register */
-	u8			config2;	/* Copy of the 2nd config register */
-	u8			config3[16];	/* Copy of the 3rd config register */
-
-	/* The current command we are sending to the ESP chip.  This esp_command
-	 * ptr needs to be mapped in DVMA area so we can send commands and read
-	 * from the ESP fifo without burning precious CPU cycles.  Programmed I/O
-	 * sucks when we have the DVMA to do it for us.  The ESP is stupid and will
-	 * only send out 6, 10, and 12 byte SCSI commands, others we need to send
-	 * one byte at a time.  esp_slowcmd being set says that we are doing one
-	 * of the command types ESP doesn't understand, esp_scmdp keeps track of
-	 * which byte we are sending, esp_scmdleft says how many bytes to go.
-	 */
-	volatile u8		*esp_command;    /* Location of command (CPU view)  */
-	__u32			esp_command_dvma;/* Location of command (DVMA view) */
-	unsigned char		esp_clen;	 /* Length of this command */
-	unsigned char		esp_slowcmd;
-	unsigned char		*esp_scmdp;
-	unsigned char		esp_scmdleft;
-
-	/* The following are used to determine the cause of an IRQ. Upon every
-	 * IRQ entry we synchronize these with the hardware registers.
-	 */
-	u8			ireg;		/* Copy of ESP interrupt register */
-	u8			sreg;		/* Copy of ESP status register */
-	u8			seqreg;		/* Copy of ESP sequence step register */
-	u8			sreg2;		/* Copy of HME status2 register */
-
-	/* To save register writes to the ESP, which can be expensive, we
-	 * keep track of the previous value that various registers had for
-	 * the last target we connected to.  If they are the same for the
-	 * current target, we skip the register writes as they are not needed.
-	 */
-	u8			prev_soff, prev_stp;
-	u8			prev_cfg3, __cache_pad;
-
-	/* We also keep a cache of the previous FAS/HME DMA CSR register value.  */
-	u32			prev_hme_dmacsr;
-
-	/* The HME is the biggest piece of shit I have ever seen. */
-	u8			hme_fifo_workaround_buffer[16 * 2];
-	u8			hme_fifo_workaround_count;
-
-	/* For each target we keep track of save/restore data
-	 * pointer information.  This needs to be updated majorly
-	 * when we add support for tagged queueing.  -DaveM
-	 */
-	struct esp_pointers {
-		char			*saved_ptr;
-		struct scatterlist	*saved_buffer;
-		int			saved_this_residual;
-		int			saved_buffers_residual;
-	} data_pointers[16] /*XXX [MAX_TAGS_PER_TARGET]*/;
-
-	/* Clock periods, frequencies, synchronization, etc. */
-	unsigned int		cfreq;		/* Clock frequency in HZ */
-	unsigned int		cfact;		/* Clock conversion factor */
-	unsigned int		raw_cfact;	/* Raw copy from probing */
-	unsigned int		ccycle;		/* One ESP clock cycle */
-	unsigned int		ctick;		/* One ESP clock time */
-	unsigned int		radelay;	/* FAST chip req/ack delay */
-	unsigned int		neg_defp;	/* Default negotiation period */
-	unsigned int		sync_defp;	/* Default sync transfer period */
-	unsigned int		max_period;	/* longest our period can be */
-	unsigned int		min_period;	/* shortest period we can withstand */
-
-	struct esp		*next;		/* Next ESP we probed or NULL */
-	char			prom_name[64];	/* Name of ESP device from prom */
-	int			prom_node;	/* Prom node where ESP found */
-	int			esp_id;		/* Unique per-ESP ID number */
-
-	/* For slow to medium speed input clock rates we shoot for 5mb/s,
-	 * but for high input clock rates we try to do 10mb/s although I
-	 * don't think a transfer can even run that fast with an ESP even
-	 * with DMA2 scatter gather pipelining.
-	 */
-#define SYNC_DEFP_SLOW            0x32   /* 5mb/s  */
-#define SYNC_DEFP_FAST            0x19   /* 10mb/s */
-
-	unsigned int		snip;		/* Sync. negotiation in progress */
-	unsigned int		wnip;		/* WIDE negotiation in progress */
-	unsigned int		targets_present;/* targets spoken to before */
-
-	int		current_transfer_size;	/* Set at beginning of data dma */
-
-	u8			espcmdlog[32];	/* Log of current esp cmds sent. */
-	u8			espcmdent;	/* Current entry in esp cmd log. */
-
-	/* Misc. info about this ESP */
-	enum esp_rev		erev;		/* ESP revision */
-	int			irq;		/* SBus IRQ for this ESP */
-	int			scsi_id;	/* Who am I as initiator? */
-	int			scsi_id_mask;	/* Bitmask of 'me'. */
-	int			diff;		/* Differential SCSI bus? */
-	int			bursts;		/* Burst sizes our DVMA supports */
-
-	/* Our command queues, only one cmd lives in the current_SC queue. */
-	struct scsi_cmnd	*issue_SC;	/* Commands to be issued */
-	struct scsi_cmnd	*current_SC;	/* Who is currently working the bus */
-	struct scsi_cmnd	*disconnected_SC;/* Commands disconnected from the bus */
-
-	/* Message goo */
-	u8			cur_msgout[16];
-	u8			cur_msgin[16];
-	u8			prevmsgout, prevmsgin;
-	u8			msgout_len, msgin_len;
-	u8			msgout_ctr, msgin_ctr;
-
-	/* States that we cannot keep in the per cmd structure because they
-	 * cannot be assosciated with any specific command.
-	 */
-	u8			resetting_bus;
-	wait_queue_head_t	reset_queue;
-};
-
-/* Bitfield meanings for the above registers. */
-
-/* ESP config reg 1, read-write, found on all ESP chips */
-#define ESP_CONFIG1_ID        0x07             /* My BUS ID bits */
-#define ESP_CONFIG1_CHTEST    0x08             /* Enable ESP chip tests */
-#define ESP_CONFIG1_PENABLE   0x10             /* Enable parity checks */
-#define ESP_CONFIG1_PARTEST   0x20             /* Parity test mode enabled? */
-#define ESP_CONFIG1_SRRDISAB  0x40             /* Disable SCSI reset reports */
-#define ESP_CONFIG1_SLCABLE   0x80             /* Enable slow cable mode */
-
-/* ESP config reg 2, read-write, found only on esp100a+esp200+esp236 chips */
-#define ESP_CONFIG2_DMAPARITY 0x01             /* enable DMA Parity (200,236) */
-#define ESP_CONFIG2_REGPARITY 0x02             /* enable reg Parity (200,236) */
-#define ESP_CONFIG2_BADPARITY 0x04             /* Bad parity target abort  */
-#define ESP_CONFIG2_SCSI2ENAB 0x08             /* Enable SCSI-2 features (tmode only) */
-#define ESP_CONFIG2_HI        0x10             /* High Impedance DREQ ???  */
-#define ESP_CONFIG2_HMEFENAB  0x10             /* HME features enable */
-#define ESP_CONFIG2_BCM       0x20             /* Enable byte-ctrl (236)   */
-#define ESP_CONFIG2_DISPINT   0x20             /* Disable pause irq (hme) */
-#define ESP_CONFIG2_FENAB     0x40             /* Enable features (fas100,esp216)      */
-#define ESP_CONFIG2_SPL       0x40             /* Enable status-phase latch (esp236)   */
-#define ESP_CONFIG2_MKDONE    0x40             /* HME magic feature */
-#define ESP_CONFIG2_HME32     0x80             /* HME 32 extended */
-#define ESP_CONFIG2_MAGIC     0xe0             /* Invalid bits... */
-
-/* ESP config register 3 read-write, found only esp236+fas236+fas100a+hme chips */
-#define ESP_CONFIG3_FCLOCK    0x01             /* FAST SCSI clock rate (esp100a/hme) */
-#define ESP_CONFIG3_TEM       0x01             /* Enable thresh-8 mode (esp/fas236)  */
-#define ESP_CONFIG3_FAST      0x02             /* Enable FAST SCSI     (esp100a/hme) */
-#define ESP_CONFIG3_ADMA      0x02             /* Enable alternate-dma (esp/fas236)  */
-#define ESP_CONFIG3_TENB      0x04             /* group2 SCSI2 support (esp100a/hme) */
-#define ESP_CONFIG3_SRB       0x04             /* Save residual byte   (esp/fas236)  */
-#define ESP_CONFIG3_TMS       0x08             /* Three-byte msg's ok  (esp100a/hme) */
-#define ESP_CONFIG3_FCLK      0x08             /* Fast SCSI clock rate (esp/fas236)  */
-#define ESP_CONFIG3_IDMSG     0x10             /* ID message checking  (esp100a/hme) */
-#define ESP_CONFIG3_FSCSI     0x10             /* Enable FAST SCSI     (esp/fas236)  */
-#define ESP_CONFIG3_GTM       0x20             /* group2 SCSI2 support (esp/fas236)  */
-#define ESP_CONFIG3_IDBIT3    0x20             /* Bit 3 of HME SCSI-ID (hme)         */
-#define ESP_CONFIG3_TBMS      0x40             /* Three-byte msg's ok  (esp/fas236)  */
-#define ESP_CONFIG3_EWIDE     0x40             /* Enable Wide-SCSI     (hme)         */
-#define ESP_CONFIG3_IMS       0x80             /* ID msg chk'ng        (esp/fas236)  */
-#define ESP_CONFIG3_OBPUSH    0x80             /* Push odd-byte to dma (hme)         */
-
-/* ESP command register read-write */
-/* Group 1 commands:  These may be sent at any point in time to the ESP
- *                    chip.  None of them can generate interrupts 'cept
- *                    the "SCSI bus reset" command if you have not disabled
- *                    SCSI reset interrupts in the config1 ESP register.
- */
-#define ESP_CMD_NULL          0x00             /* Null command, ie. a nop */
-#define ESP_CMD_FLUSH         0x01             /* FIFO Flush */
-#define ESP_CMD_RC            0x02             /* Chip reset */
-#define ESP_CMD_RS            0x03             /* SCSI bus reset */
-
-/* Group 2 commands:  ESP must be an initiator and connected to a target
- *                    for these commands to work.
- */
-#define ESP_CMD_TI            0x10             /* Transfer Information */
-#define ESP_CMD_ICCSEQ        0x11             /* Initiator cmd complete sequence */
-#define ESP_CMD_MOK           0x12             /* Message okie-dokie */
-#define ESP_CMD_TPAD          0x18             /* Transfer Pad */
-#define ESP_CMD_SATN          0x1a             /* Set ATN */
-#define ESP_CMD_RATN          0x1b             /* De-assert ATN */
-
-/* Group 3 commands:  ESP must be in the MSGOUT or MSGIN state and be connected
- *                    to a target as the initiator for these commands to work.
- */
-#define ESP_CMD_SMSG          0x20             /* Send message */
-#define ESP_CMD_SSTAT         0x21             /* Send status */
-#define ESP_CMD_SDATA         0x22             /* Send data */
-#define ESP_CMD_DSEQ          0x23             /* Discontinue Sequence */
-#define ESP_CMD_TSEQ          0x24             /* Terminate Sequence */
-#define ESP_CMD_TCCSEQ        0x25             /* Target cmd cmplt sequence */
-#define ESP_CMD_DCNCT         0x27             /* Disconnect */
-#define ESP_CMD_RMSG          0x28             /* Receive Message */
-#define ESP_CMD_RCMD          0x29             /* Receive Command */
-#define ESP_CMD_RDATA         0x2a             /* Receive Data */
-#define ESP_CMD_RCSEQ         0x2b             /* Receive cmd sequence */
-
-/* Group 4 commands:  The ESP must be in the disconnected state and must
- *                    not be connected to any targets as initiator for
- *                    these commands to work.
- */
-#define ESP_CMD_RSEL          0x40             /* Reselect */
-#define ESP_CMD_SEL           0x41             /* Select w/o ATN */
-#define ESP_CMD_SELA          0x42             /* Select w/ATN */
-#define ESP_CMD_SELAS         0x43             /* Select w/ATN & STOP */
-#define ESP_CMD_ESEL          0x44             /* Enable selection */
-#define ESP_CMD_DSEL          0x45             /* Disable selections */
-#define ESP_CMD_SA3           0x46             /* Select w/ATN3 */
-#define ESP_CMD_RSEL3         0x47             /* Reselect3 */
-
-/* This bit enables the ESP's DMA on the SBus */
-#define ESP_CMD_DMA           0x80             /* Do DMA? */
-
-
-/* ESP status register read-only */
-#define ESP_STAT_PIO          0x01             /* IO phase bit */
-#define ESP_STAT_PCD          0x02             /* CD phase bit */
-#define ESP_STAT_PMSG         0x04             /* MSG phase bit */
-#define ESP_STAT_PMASK        0x07             /* Mask of phase bits */
-#define ESP_STAT_TDONE        0x08             /* Transfer Completed */
-#define ESP_STAT_TCNT         0x10             /* Transfer Counter Is Zero */
-#define ESP_STAT_PERR         0x20             /* Parity error */
-#define ESP_STAT_SPAM         0x40             /* Real bad error */
-/* This indicates the 'interrupt pending' condition on esp236, it is a reserved
- * bit on other revs of the ESP.
- */
-#define ESP_STAT_INTR         0x80             /* Interrupt */
-
-/* HME only: status 2 register */
-#define ESP_STAT2_SCHBIT      0x01 /* Upper bits 3-7 of sstep enabled */
-#define ESP_STAT2_FFLAGS      0x02 /* The fifo flags are now latched */
-#define ESP_STAT2_XCNT        0x04 /* The transfer counter is latched */
-#define ESP_STAT2_CREGA       0x08 /* The command reg is active now */
-#define ESP_STAT2_WIDE        0x10 /* Interface on this adapter is wide */
-#define ESP_STAT2_F1BYTE      0x20 /* There is one byte at top of fifo */
-#define ESP_STAT2_FMSB        0x40 /* Next byte in fifo is most significant */
-#define ESP_STAT2_FEMPTY      0x80 /* FIFO is empty */
-
-/* The status register can be masked with ESP_STAT_PMASK and compared
- * with the following values to determine the current phase the ESP
- * (at least thinks it) is in.  For our purposes we also add our own
- * software 'done' bit for our phase management engine.
- */
-#define ESP_DOP   (0)                                       /* Data Out  */
-#define ESP_DIP   (ESP_STAT_PIO)                            /* Data In   */
-#define ESP_CMDP  (ESP_STAT_PCD)                            /* Command   */
-#define ESP_STATP (ESP_STAT_PCD|ESP_STAT_PIO)               /* Status    */
-#define ESP_MOP   (ESP_STAT_PMSG|ESP_STAT_PCD)              /* Message Out */
-#define ESP_MIP   (ESP_STAT_PMSG|ESP_STAT_PCD|ESP_STAT_PIO) /* Message In */
-
-/* ESP interrupt register read-only */
-#define ESP_INTR_S            0x01             /* Select w/o ATN */
-#define ESP_INTR_SATN         0x02             /* Select w/ATN */
-#define ESP_INTR_RSEL         0x04             /* Reselected */
-#define ESP_INTR_FDONE        0x08             /* Function done */
-#define ESP_INTR_BSERV        0x10             /* Bus service */
-#define ESP_INTR_DC           0x20             /* Disconnect */
-#define ESP_INTR_IC           0x40             /* Illegal command given */
-#define ESP_INTR_SR           0x80             /* SCSI bus reset detected */
-
-/* Interrupt status macros */
-#define ESP_SRESET_IRQ(esp)  ((esp)->intreg & (ESP_INTR_SR))
-#define ESP_ILLCMD_IRQ(esp)  ((esp)->intreg & (ESP_INTR_IC))
-#define ESP_SELECT_WITH_ATN_IRQ(esp)     ((esp)->intreg & (ESP_INTR_SATN))
-#define ESP_SELECT_WITHOUT_ATN_IRQ(esp)  ((esp)->intreg & (ESP_INTR_S))
-#define ESP_SELECTION_IRQ(esp)  ((ESP_SELECT_WITH_ATN_IRQ(esp)) ||         \
-				 (ESP_SELECT_WITHOUT_ATN_IRQ(esp)))
-#define ESP_RESELECTION_IRQ(esp)         ((esp)->intreg & (ESP_INTR_RSEL))
-
-/* ESP sequence step register read-only */
-#define ESP_STEP_VBITS        0x07             /* Valid bits */
-#define ESP_STEP_ASEL         0x00             /* Selection&Arbitrate cmplt */
-#define ESP_STEP_SID          0x01             /* One msg byte sent */
-#define ESP_STEP_NCMD         0x02             /* Was not in command phase */
-#define ESP_STEP_PPC          0x03             /* Early phase chg caused cmnd
-                                                * bytes to be lost
-                                                */
-#define ESP_STEP_FINI4        0x04             /* Command was sent ok */
-
-/* Ho hum, some ESP's set the step register to this as well... */
-#define ESP_STEP_FINI5        0x05
-#define ESP_STEP_FINI6        0x06
-#define ESP_STEP_FINI7        0x07
-
-/* ESP chip-test register read-write */
-#define ESP_TEST_TARG         0x01             /* Target test mode */
-#define ESP_TEST_INI          0x02             /* Initiator test mode */
-#define ESP_TEST_TS           0x04             /* Tristate test mode */
-
-/* ESP unique ID register read-only, found on fas236+fas100a only */
-#define ESP_UID_F100A         0x00             /* ESP FAS100A  */
-#define ESP_UID_F236          0x02             /* ESP FAS236   */
-#define ESP_UID_REV           0x07             /* ESP revision */
-#define ESP_UID_FAM           0xf8             /* ESP family   */
-
-/* ESP fifo flags register read-only */
-/* Note that the following implies a 16 byte FIFO on the ESP. */
-#define ESP_FF_FBYTES         0x1f             /* Num bytes in FIFO */
-#define ESP_FF_ONOTZERO       0x20             /* offset ctr not zero (esp100) */
-#define ESP_FF_SSTEP          0xe0             /* Sequence step */
-
-/* ESP clock conversion factor register write-only */
-#define ESP_CCF_F0            0x00             /* 35.01MHz - 40MHz */
-#define ESP_CCF_NEVER         0x01             /* Set it to this and die */
-#define ESP_CCF_F2            0x02             /* 10MHz */
-#define ESP_CCF_F3            0x03             /* 10.01MHz - 15MHz */
-#define ESP_CCF_F4            0x04             /* 15.01MHz - 20MHz */
-#define ESP_CCF_F5            0x05             /* 20.01MHz - 25MHz */
-#define ESP_CCF_F6            0x06             /* 25.01MHz - 30MHz */
-#define ESP_CCF_F7            0x07             /* 30.01MHz - 35MHz */
-
-/* HME only... */
-#define ESP_BUSID_RESELID     0x10
-#define ESP_BUSID_CTR32BIT    0x40
-
-#define ESP_BUS_TIMEOUT        275             /* In milli-seconds */
-#define ESP_TIMEO_CONST       8192
-#define ESP_NEG_DEFP(mhz, cfact) \
-        ((ESP_BUS_TIMEOUT * ((mhz) / 1000)) / (8192 * (cfact)))
-#define ESP_MHZ_TO_CYCLE(mhertz)  ((1000000000) / ((mhertz) / 1000))
-#define ESP_TICK(ccf, cycle)  ((7682 * (ccf) * (cycle) / 1000))
-
-#endif /* !(_SPARC_ESP_H) */
diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c
new file mode 100644
index 00000000000..3cd5bf723da
--- /dev/null
+++ b/drivers/scsi/esp_scsi.c
@@ -0,0 +1,2710 @@
+/* esp_scsi.c: ESP SCSI driver.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/completion.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_transport_spi.h>
+
+#include "esp_scsi.h"
+
+#define DRV_MODULE_NAME		"esp"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_VERSION		"2.000"
+#define DRV_MODULE_RELDATE	"April 19, 2007"
+
+/* SCSI bus reset settle time in seconds.  */
+static int esp_bus_reset_settle = 3;
+
+static u32 esp_debug;
+#define ESP_DEBUG_INTR		0x00000001
+#define ESP_DEBUG_SCSICMD	0x00000002
+#define ESP_DEBUG_RESET		0x00000004
+#define ESP_DEBUG_MSGIN		0x00000008
+#define ESP_DEBUG_MSGOUT	0x00000010
+#define ESP_DEBUG_CMDDONE	0x00000020
+#define ESP_DEBUG_DISCONNECT	0x00000040
+#define ESP_DEBUG_DATASTART	0x00000080
+#define ESP_DEBUG_DATADONE	0x00000100
+#define ESP_DEBUG_RECONNECT	0x00000200
+#define ESP_DEBUG_AUTOSENSE	0x00000400
+
+#define esp_log_intr(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_INTR) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_reset(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_RESET) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_msgin(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_MSGIN) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_msgout(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_MSGOUT) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_cmddone(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_CMDDONE) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_disconnect(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_DISCONNECT) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_datastart(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_DATASTART) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_datadone(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_DATADONE) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_reconnect(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_RECONNECT) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_log_autosense(f, a...) \
+do {	if (esp_debug & ESP_DEBUG_AUTOSENSE) \
+		printk(f, ## a); \
+} while (0)
+
+#define esp_read8(REG)		esp->ops->esp_read8(esp, REG)
+#define esp_write8(VAL,REG)	esp->ops->esp_write8(esp, VAL, REG)
+
+static void esp_log_fill_regs(struct esp *esp,
+			      struct esp_event_ent *p)
+{
+	p->sreg = esp->sreg;
+	p->seqreg = esp->seqreg;
+	p->sreg2 = esp->sreg2;
+	p->ireg = esp->ireg;
+	p->select_state = esp->select_state;
+	p->event = esp->event;
+}
+
+void scsi_esp_cmd(struct esp *esp, u8 val)
+{
+	struct esp_event_ent *p;
+	int idx = esp->esp_event_cur;
+
+	p = &esp->esp_event_log[idx];
+	p->type = ESP_EVENT_TYPE_CMD;
+	p->val = val;
+	esp_log_fill_regs(esp, p);
+
+	esp->esp_event_cur = (idx + 1) & (ESP_EVENT_LOG_SZ - 1);
+
+	esp_write8(val, ESP_CMD);
+}
+EXPORT_SYMBOL(scsi_esp_cmd);
+
+static void esp_event(struct esp *esp, u8 val)
+{
+	struct esp_event_ent *p;
+	int idx = esp->esp_event_cur;
+
+	p = &esp->esp_event_log[idx];
+	p->type = ESP_EVENT_TYPE_EVENT;
+	p->val = val;
+	esp_log_fill_regs(esp, p);
+
+	esp->esp_event_cur = (idx + 1) & (ESP_EVENT_LOG_SZ - 1);
+
+	esp->event = val;
+}
+
+static void esp_dump_cmd_log(struct esp *esp)
+{
+	int idx = esp->esp_event_cur;
+	int stop = idx;
+
+	printk(KERN_INFO PFX "esp%d: Dumping command log\n",
+	       esp->host->unique_id);
+	do {
+		struct esp_event_ent *p = &esp->esp_event_log[idx];
+
+		printk(KERN_INFO PFX "esp%d: ent[%d] %s ",
+		       esp->host->unique_id, idx,
+		       p->type == ESP_EVENT_TYPE_CMD ? "CMD" : "EVENT");
+
+		printk("val[%02x] sreg[%02x] seqreg[%02x] "
+		       "sreg2[%02x] ireg[%02x] ss[%02x] event[%02x]\n",
+		       p->val, p->sreg, p->seqreg,
+		       p->sreg2, p->ireg, p->select_state, p->event);
+
+		idx = (idx + 1) & (ESP_EVENT_LOG_SZ - 1);
+	} while (idx != stop);
+}
+
+static void esp_flush_fifo(struct esp *esp)
+{
+	scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+	if (esp->rev == ESP236) {
+		int lim = 1000;
+
+		while (esp_read8(ESP_FFLAGS) & ESP_FF_FBYTES) {
+			if (--lim == 0) {
+				printk(KERN_ALERT PFX "esp%d: ESP_FF_BYTES "
+				       "will not clear!\n",
+				       esp->host->unique_id);
+				break;
+			}
+			udelay(1);
+		}
+	}
+}
+
+static void hme_read_fifo(struct esp *esp)
+{
+	int fcnt = esp_read8(ESP_FFLAGS) & ESP_FF_FBYTES;
+	int idx = 0;
+
+	while (fcnt--) {
+		esp->fifo[idx++] = esp_read8(ESP_FDATA);
+		esp->fifo[idx++] = esp_read8(ESP_FDATA);
+	}
+	if (esp->sreg2 & ESP_STAT2_F1BYTE) {
+		esp_write8(0, ESP_FDATA);
+		esp->fifo[idx++] = esp_read8(ESP_FDATA);
+		scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+	}
+	esp->fifo_cnt = idx;
+}
+
+static void esp_set_all_config3(struct esp *esp, u8 val)
+{
+	int i;
+
+	for (i = 0; i < ESP_MAX_TARGET; i++)
+		esp->target[i].esp_config3 = val;
+}
+
+/* Reset the ESP chip, _not_ the SCSI bus. */
+static void esp_reset_esp(struct esp *esp)
+{
+	u8 family_code, version;
+
+	/* Now reset the ESP chip */
+	scsi_esp_cmd(esp, ESP_CMD_RC);
+	scsi_esp_cmd(esp, ESP_CMD_NULL | ESP_CMD_DMA);
+	scsi_esp_cmd(esp, ESP_CMD_NULL | ESP_CMD_DMA);
+
+	/* Reload the configuration registers */
+	esp_write8(esp->cfact, ESP_CFACT);
+
+	esp->prev_stp = 0;
+	esp_write8(esp->prev_stp, ESP_STP);
+
+	esp->prev_soff = 0;
+	esp_write8(esp->prev_soff, ESP_SOFF);
+
+	esp_write8(esp->neg_defp, ESP_TIMEO);
+
+	/* This is the only point at which it is reliable to read
+	 * the ID-code for a fast ESP chip variants.
+	 */
+	esp->max_period = ((35 * esp->ccycle) / 1000);
+	if (esp->rev == FAST) {
+		version = esp_read8(ESP_UID);
+		family_code = (version & 0xf8) >> 3;
+		if (family_code == 0x02)
+			esp->rev = FAS236;
+		else if (family_code == 0x0a)
+			esp->rev = FASHME; /* Version is usually '5'. */
+		else
+			esp->rev = FAS100A;
+		esp->min_period = ((4 * esp->ccycle) / 1000);
+	} else {
+		esp->min_period = ((5 * esp->ccycle) / 1000);
+	}
+	esp->max_period = (esp->max_period + 3)>>2;
+	esp->min_period = (esp->min_period + 3)>>2;
+
+	esp_write8(esp->config1, ESP_CFG1);
+	switch (esp->rev) {
+	case ESP100:
+		/* nothing to do */
+		break;
+
+	case ESP100A:
+		esp_write8(esp->config2, ESP_CFG2);
+		break;
+
+	case ESP236:
+		/* Slow 236 */
+		esp_write8(esp->config2, ESP_CFG2);
+		esp->prev_cfg3 = esp->target[0].esp_config3;
+		esp_write8(esp->prev_cfg3, ESP_CFG3);
+		break;
+
+	case FASHME:
+		esp->config2 |= (ESP_CONFIG2_HME32 | ESP_CONFIG2_HMEFENAB);
+		/* fallthrough... */
+
+	case FAS236:
+		/* Fast 236 or HME */
+		esp_write8(esp->config2, ESP_CFG2);
+		if (esp->rev == FASHME) {
+			u8 cfg3 = esp->target[0].esp_config3;
+
+			cfg3 |= ESP_CONFIG3_FCLOCK | ESP_CONFIG3_OBPUSH;
+			if (esp->scsi_id >= 8)
+				cfg3 |= ESP_CONFIG3_IDBIT3;
+			esp_set_all_config3(esp, cfg3);
+		} else {
+			u32 cfg3 = esp->target[0].esp_config3;
+
+			cfg3 |= ESP_CONFIG3_FCLK;
+			esp_set_all_config3(esp, cfg3);
+		}
+		esp->prev_cfg3 = esp->target[0].esp_config3;
+		esp_write8(esp->prev_cfg3, ESP_CFG3);
+		if (esp->rev == FASHME) {
+			esp->radelay = 80;
+		} else {
+			if (esp->flags & ESP_FLAG_DIFFERENTIAL)
+				esp->radelay = 0;
+			else
+				esp->radelay = 96;
+		}
+		break;
+
+	case FAS100A:
+		/* Fast 100a */
+		esp_write8(esp->config2, ESP_CFG2);
+		esp_set_all_config3(esp,
+				    (esp->target[0].esp_config3 |
+				     ESP_CONFIG3_FCLOCK));
+		esp->prev_cfg3 = esp->target[0].esp_config3;
+		esp_write8(esp->prev_cfg3, ESP_CFG3);
+		esp->radelay = 32;
+		break;
+
+	default:
+		break;
+	}
+
+	/* Eat any bitrot in the chip */
+	esp_read8(ESP_INTRPT);
+	udelay(100);
+}
+
+static void esp_map_dma(struct esp *esp, struct scsi_cmnd *cmd)
+{
+	struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd);
+	struct scatterlist *sg = cmd->request_buffer;
+	int dir = cmd->sc_data_direction;
+	int total, i;
+
+	if (dir == DMA_NONE)
+		return;
+
+	BUG_ON(cmd->use_sg == 0);
+
+	spriv->u.num_sg = esp->ops->map_sg(esp, sg,
+					   cmd->use_sg, dir);
+	spriv->cur_residue = sg_dma_len(sg);
+	spriv->cur_sg = sg;
+
+	total = 0;
+	for (i = 0; i < spriv->u.num_sg; i++)
+		total += sg_dma_len(&sg[i]);
+	spriv->tot_residue = total;
+}
+
+static dma_addr_t esp_cur_dma_addr(struct esp_cmd_entry *ent,
+				   struct scsi_cmnd *cmd)
+{
+	struct esp_cmd_priv *p = ESP_CMD_PRIV(cmd);
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		return ent->sense_dma +
+			(ent->sense_ptr - cmd->sense_buffer);
+	}
+
+	return sg_dma_address(p->cur_sg) +
+		(sg_dma_len(p->cur_sg) -
+		 p->cur_residue);
+}
+
+static unsigned int esp_cur_dma_len(struct esp_cmd_entry *ent,
+				    struct scsi_cmnd *cmd)
+{
+	struct esp_cmd_priv *p = ESP_CMD_PRIV(cmd);
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		return SCSI_SENSE_BUFFERSIZE -
+			(ent->sense_ptr - cmd->sense_buffer);
+	}
+	return p->cur_residue;
+}
+
+static void esp_advance_dma(struct esp *esp, struct esp_cmd_entry *ent,
+			    struct scsi_cmnd *cmd, unsigned int len)
+{
+	struct esp_cmd_priv *p = ESP_CMD_PRIV(cmd);
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		ent->sense_ptr += len;
+		return;
+	}
+
+	p->cur_residue -= len;
+	p->tot_residue -= len;
+	if (p->cur_residue < 0 || p->tot_residue < 0) {
+		printk(KERN_ERR PFX "esp%d: Data transfer overflow.\n",
+		       esp->host->unique_id);
+		printk(KERN_ERR PFX "esp%d: cur_residue[%d] tot_residue[%d] "
+		       "len[%u]\n",
+		       esp->host->unique_id,
+		       p->cur_residue, p->tot_residue, len);
+		p->cur_residue = 0;
+		p->tot_residue = 0;
+	}
+	if (!p->cur_residue && p->tot_residue) {
+		p->cur_sg++;
+		p->cur_residue = sg_dma_len(p->cur_sg);
+	}
+}
+
+static void esp_unmap_dma(struct esp *esp, struct scsi_cmnd *cmd)
+{
+	struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd);
+	int dir = cmd->sc_data_direction;
+
+	if (dir == DMA_NONE)
+		return;
+
+	esp->ops->unmap_sg(esp, cmd->request_buffer,
+			   spriv->u.num_sg, dir);
+}
+
+static void esp_save_pointers(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	struct scsi_cmnd *cmd = ent->cmd;
+	struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd);
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		ent->saved_sense_ptr = ent->sense_ptr;
+		return;
+	}
+	ent->saved_cur_residue = spriv->cur_residue;
+	ent->saved_cur_sg = spriv->cur_sg;
+	ent->saved_tot_residue = spriv->tot_residue;
+}
+
+static void esp_restore_pointers(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	struct scsi_cmnd *cmd = ent->cmd;
+	struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd);
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		ent->sense_ptr = ent->saved_sense_ptr;
+		return;
+	}
+	spriv->cur_residue = ent->saved_cur_residue;
+	spriv->cur_sg = ent->saved_cur_sg;
+	spriv->tot_residue = ent->saved_tot_residue;
+}
+
+static void esp_check_command_len(struct esp *esp, struct scsi_cmnd *cmd)
+{
+	if (cmd->cmd_len == 6 ||
+	    cmd->cmd_len == 10 ||
+	    cmd->cmd_len == 12) {
+		esp->flags &= ~ESP_FLAG_DOING_SLOWCMD;
+	} else {
+		esp->flags |= ESP_FLAG_DOING_SLOWCMD;
+	}
+}
+
+static void esp_write_tgt_config3(struct esp *esp, int tgt)
+{
+	if (esp->rev > ESP100A) {
+		u8 val = esp->target[tgt].esp_config3;
+
+		if (val != esp->prev_cfg3) {
+			esp->prev_cfg3 = val;
+			esp_write8(val, ESP_CFG3);
+		}
+	}
+}
+
+static void esp_write_tgt_sync(struct esp *esp, int tgt)
+{
+	u8 off = esp->target[tgt].esp_offset;
+	u8 per = esp->target[tgt].esp_period;
+
+	if (off != esp->prev_soff) {
+		esp->prev_soff = off;
+		esp_write8(off, ESP_SOFF);
+	}
+	if (per != esp->prev_stp) {
+		esp->prev_stp = per;
+		esp_write8(per, ESP_STP);
+	}
+}
+
+static u32 esp_dma_length_limit(struct esp *esp, u32 dma_addr, u32 dma_len)
+{
+	if (esp->rev == FASHME) {
+		/* Arbitrary segment boundaries, 24-bit counts.  */
+		if (dma_len > (1U << 24))
+			dma_len = (1U << 24);
+	} else {
+		u32 base, end;
+
+		/* ESP chip limits other variants by 16-bits of transfer
+		 * count.  Actually on FAS100A and FAS236 we could get
+		 * 24-bits of transfer count by enabling ESP_CONFIG2_FENAB
+		 * in the ESP_CFG2 register but that causes other unwanted
+		 * changes so we don't use it currently.
+		 */
+		if (dma_len > (1U << 16))
+			dma_len = (1U << 16);
+
+		/* All of the DMA variants hooked up to these chips
+		 * cannot handle crossing a 24-bit address boundary.
+		 */
+		base = dma_addr & ((1U << 24) - 1U);
+		end = base + dma_len;
+		if (end > (1U << 24))
+			end = (1U <<24);
+		dma_len = end - base;
+	}
+	return dma_len;
+}
+
+static int esp_need_to_nego_wide(struct esp_target_data *tp)
+{
+	struct scsi_target *target = tp->starget;
+
+	return spi_width(target) != tp->nego_goal_width;
+}
+
+static int esp_need_to_nego_sync(struct esp_target_data *tp)
+{
+	struct scsi_target *target = tp->starget;
+
+	/* When offset is zero, period is "don't care".  */
+	if (!spi_offset(target) && !tp->nego_goal_offset)
+		return 0;
+
+	if (spi_offset(target) == tp->nego_goal_offset &&
+	    spi_period(target) == tp->nego_goal_period)
+		return 0;
+
+	return 1;
+}
+
+static int esp_alloc_lun_tag(struct esp_cmd_entry *ent,
+			     struct esp_lun_data *lp)
+{
+	if (!ent->tag[0]) {
+		/* Non-tagged, slot already taken?  */
+		if (lp->non_tagged_cmd)
+			return -EBUSY;
+
+		if (lp->hold) {
+			/* We are being held by active tagged
+			 * commands.
+			 */
+			if (lp->num_tagged)
+				return -EBUSY;
+
+			/* Tagged commands completed, we can unplug
+			 * the queue and run this untagged command.
+			 */
+			lp->hold = 0;
+		} else if (lp->num_tagged) {
+			/* Plug the queue until num_tagged decreases
+			 * to zero in esp_free_lun_tag.
+			 */
+			lp->hold = 1;
+			return -EBUSY;
+		}
+
+		lp->non_tagged_cmd = ent;
+		return 0;
+	} else {
+		/* Tagged command, see if blocked by a
+		 * non-tagged one.
+		 */
+		if (lp->non_tagged_cmd || lp->hold)
+			return -EBUSY;
+	}
+
+	BUG_ON(lp->tagged_cmds[ent->tag[1]]);
+
+	lp->tagged_cmds[ent->tag[1]] = ent;
+	lp->num_tagged++;
+
+	return 0;
+}
+
+static void esp_free_lun_tag(struct esp_cmd_entry *ent,
+			     struct esp_lun_data *lp)
+{
+	if (ent->tag[0]) {
+		BUG_ON(lp->tagged_cmds[ent->tag[1]] != ent);
+		lp->tagged_cmds[ent->tag[1]] = NULL;
+		lp->num_tagged--;
+	} else {
+		BUG_ON(lp->non_tagged_cmd != ent);
+		lp->non_tagged_cmd = NULL;
+	}
+}
+
+/* When a contingent allegiance conditon is created, we force feed a
+ * REQUEST_SENSE command to the device to fetch the sense data.  I
+ * tried many other schemes, relying on the scsi error handling layer
+ * to send out the REQUEST_SENSE automatically, but this was difficult
+ * to get right especially in the presence of applications like smartd
+ * which use SG_IO to send out their own REQUEST_SENSE commands.
+ */
+static void esp_autosense(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	struct scsi_cmnd *cmd = ent->cmd;
+	struct scsi_device *dev = cmd->device;
+	int tgt, lun;
+	u8 *p, val;
+
+	tgt = dev->id;
+	lun = dev->lun;
+
+
+	if (!ent->sense_ptr) {
+		esp_log_autosense("esp%d: Doing auto-sense for "
+				  "tgt[%d] lun[%d]\n",
+				  esp->host->unique_id, tgt, lun);
+
+		ent->sense_ptr = cmd->sense_buffer;
+		ent->sense_dma = esp->ops->map_single(esp,
+						      ent->sense_ptr,
+						      SCSI_SENSE_BUFFERSIZE,
+						      DMA_FROM_DEVICE);
+	}
+	ent->saved_sense_ptr = ent->sense_ptr;
+
+	esp->active_cmd = ent;
+
+	p = esp->command_block;
+	esp->msg_out_len = 0;
+
+	*p++ = IDENTIFY(0, lun);
+	*p++ = REQUEST_SENSE;
+	*p++ = ((dev->scsi_level <= SCSI_2) ?
+		(lun << 5) : 0);
+	*p++ = 0;
+	*p++ = 0;
+	*p++ = SCSI_SENSE_BUFFERSIZE;
+	*p++ = 0;
+
+	esp->select_state = ESP_SELECT_BASIC;
+
+	val = tgt;
+	if (esp->rev == FASHME)
+		val |= ESP_BUSID_RESELID | ESP_BUSID_CTR32BIT;
+	esp_write8(val, ESP_BUSID);
+
+	esp_write_tgt_sync(esp, tgt);
+	esp_write_tgt_config3(esp, tgt);
+
+	val = (p - esp->command_block);
+
+	if (esp->rev == FASHME)
+		scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+	esp->ops->send_dma_cmd(esp, esp->command_block_dma,
+			       val, 16, 0, ESP_CMD_DMA | ESP_CMD_SELA);
+}
+
+static struct esp_cmd_entry *find_and_prep_issuable_command(struct esp *esp)
+{
+	struct esp_cmd_entry *ent;
+
+	list_for_each_entry(ent, &esp->queued_cmds, list) {
+		struct scsi_cmnd *cmd = ent->cmd;
+		struct scsi_device *dev = cmd->device;
+		struct esp_lun_data *lp = dev->hostdata;
+
+		if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+			ent->tag[0] = 0;
+			ent->tag[1] = 0;
+			return ent;
+		}
+
+		if (!scsi_populate_tag_msg(cmd, &ent->tag[0])) {
+			ent->tag[0] = 0;
+			ent->tag[1] = 0;
+		}
+
+		if (esp_alloc_lun_tag(ent, lp) < 0)
+			continue;
+
+		return ent;
+	}
+
+	return NULL;
+}
+
+static void esp_maybe_execute_command(struct esp *esp)
+{
+	struct esp_target_data *tp;
+	struct esp_lun_data *lp;
+	struct scsi_device *dev;
+	struct scsi_cmnd *cmd;
+	struct esp_cmd_entry *ent;
+	int tgt, lun, i;
+	u32 val, start_cmd;
+	u8 *p;
+
+	if (esp->active_cmd ||
+	    (esp->flags & ESP_FLAG_RESETTING))
+		return;
+
+	ent = find_and_prep_issuable_command(esp);
+	if (!ent)
+		return;
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		esp_autosense(esp, ent);
+		return;
+	}
+
+	cmd = ent->cmd;
+	dev = cmd->device;
+	tgt = dev->id;
+	lun = dev->lun;
+	tp = &esp->target[tgt];
+	lp = dev->hostdata;
+
+	list_del(&ent->list);
+	list_add(&ent->list, &esp->active_cmds);
+
+	esp->active_cmd = ent;
+
+	esp_map_dma(esp, cmd);
+	esp_save_pointers(esp, ent);
+
+	esp_check_command_len(esp, cmd);
+
+	p = esp->command_block;
+
+	esp->msg_out_len = 0;
+	if (tp->flags & ESP_TGT_CHECK_NEGO) {
+		/* Need to negotiate.  If the target is broken
+		 * go for synchronous transfers and non-wide.
+		 */
+		if (tp->flags & ESP_TGT_BROKEN) {
+			tp->flags &= ~ESP_TGT_DISCONNECT;
+			tp->nego_goal_period = 0;
+			tp->nego_goal_offset = 0;
+			tp->nego_goal_width = 0;
+			tp->nego_goal_tags = 0;
+		}
+
+		/* If the settings are not changing, skip this.  */
+		if (spi_width(tp->starget) == tp->nego_goal_width &&
+		    spi_period(tp->starget) == tp->nego_goal_period &&
+		    spi_offset(tp->starget) == tp->nego_goal_offset) {
+			tp->flags &= ~ESP_TGT_CHECK_NEGO;
+			goto build_identify;
+		}
+
+		if (esp->rev == FASHME && esp_need_to_nego_wide(tp)) {
+			esp->msg_out_len =
+				spi_populate_width_msg(&esp->msg_out[0],
+						       (tp->nego_goal_width ?
+							1 : 0));
+			tp->flags |= ESP_TGT_NEGO_WIDE;
+		} else if (esp_need_to_nego_sync(tp)) {
+			esp->msg_out_len =
+				spi_populate_sync_msg(&esp->msg_out[0],
+						      tp->nego_goal_period,
+						      tp->nego_goal_offset);
+			tp->flags |= ESP_TGT_NEGO_SYNC;
+		} else {
+			tp->flags &= ~ESP_TGT_CHECK_NEGO;
+		}
+
+		/* Process it like a slow command.  */
+		if (tp->flags & (ESP_TGT_NEGO_WIDE | ESP_TGT_NEGO_SYNC))
+			esp->flags |= ESP_FLAG_DOING_SLOWCMD;
+	}
+
+build_identify:
+	/* If we don't have a lun-data struct yet, we're probing
+	 * so do not disconnect.  Also, do not disconnect unless
+	 * we have a tag on this command.
+	 */
+	if (lp && (tp->flags & ESP_TGT_DISCONNECT) && ent->tag[0])
+		*p++ = IDENTIFY(1, lun);
+	else
+		*p++ = IDENTIFY(0, lun);
+
+	if (ent->tag[0] && esp->rev == ESP100) {
+		/* ESP100 lacks select w/atn3 command, use select
+		 * and stop instead.
+		 */
+		esp->flags |= ESP_FLAG_DOING_SLOWCMD;
+	}
+
+	if (!(esp->flags & ESP_FLAG_DOING_SLOWCMD)) {
+		start_cmd = ESP_CMD_DMA | ESP_CMD_SELA;
+		if (ent->tag[0]) {
+			*p++ = ent->tag[0];
+			*p++ = ent->tag[1];
+
+			start_cmd = ESP_CMD_DMA | ESP_CMD_SA3;
+		}
+
+		for (i = 0; i < cmd->cmd_len; i++)
+			*p++ = cmd->cmnd[i];
+
+		esp->select_state = ESP_SELECT_BASIC;
+	} else {
+		esp->cmd_bytes_left = cmd->cmd_len;
+		esp->cmd_bytes_ptr = &cmd->cmnd[0];
+
+		if (ent->tag[0]) {
+			for (i = esp->msg_out_len - 1;
+			     i >= 0; i--)
+				esp->msg_out[i + 2] = esp->msg_out[i];
+			esp->msg_out[0] = ent->tag[0];
+			esp->msg_out[1] = ent->tag[1];
+			esp->msg_out_len += 2;
+		}
+
+		start_cmd = ESP_CMD_DMA | ESP_CMD_SELAS;
+		esp->select_state = ESP_SELECT_MSGOUT;
+	}
+	val = tgt;
+	if (esp->rev == FASHME)
+		val |= ESP_BUSID_RESELID | ESP_BUSID_CTR32BIT;
+	esp_write8(val, ESP_BUSID);
+
+	esp_write_tgt_sync(esp, tgt);
+	esp_write_tgt_config3(esp, tgt);
+
+	val = (p - esp->command_block);
+
+	if (esp_debug & ESP_DEBUG_SCSICMD) {
+		printk("ESP: tgt[%d] lun[%d] scsi_cmd [ ", tgt, lun);
+		for (i = 0; i < cmd->cmd_len; i++)
+			printk("%02x ", cmd->cmnd[i]);
+		printk("]\n");
+	}
+
+	if (esp->rev == FASHME)
+		scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+	esp->ops->send_dma_cmd(esp, esp->command_block_dma,
+			       val, 16, 0, start_cmd);
+}
+
+static struct esp_cmd_entry *esp_get_ent(struct esp *esp)
+{
+	struct list_head *head = &esp->esp_cmd_pool;
+	struct esp_cmd_entry *ret;
+
+	if (list_empty(head)) {
+		ret = kzalloc(sizeof(struct esp_cmd_entry), GFP_ATOMIC);
+	} else {
+		ret = list_entry(head->next, struct esp_cmd_entry, list);
+		list_del(&ret->list);
+		memset(ret, 0, sizeof(*ret));
+	}
+	return ret;
+}
+
+static void esp_put_ent(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	list_add(&ent->list, &esp->esp_cmd_pool);
+}
+
+static void esp_cmd_is_done(struct esp *esp, struct esp_cmd_entry *ent,
+			    struct scsi_cmnd *cmd, unsigned int result)
+{
+	struct scsi_device *dev = cmd->device;
+	int tgt = dev->id;
+	int lun = dev->lun;
+
+	esp->active_cmd = NULL;
+	esp_unmap_dma(esp, cmd);
+	esp_free_lun_tag(ent, dev->hostdata);
+	cmd->result = result;
+
+	if (ent->eh_done) {
+		complete(ent->eh_done);
+		ent->eh_done = NULL;
+	}
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		esp->ops->unmap_single(esp, ent->sense_dma,
+				       SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
+		ent->sense_ptr = NULL;
+
+		/* Restore the message/status bytes to what we actually
+		 * saw originally.  Also, report that we are providing
+		 * the sense data.
+		 */
+		cmd->result = ((DRIVER_SENSE << 24) |
+			       (DID_OK << 16) |
+			       (COMMAND_COMPLETE << 8) |
+			       (SAM_STAT_CHECK_CONDITION << 0));
+
+		ent->flags &= ~ESP_CMD_FLAG_AUTOSENSE;
+		if (esp_debug & ESP_DEBUG_AUTOSENSE) {
+			int i;
+
+			printk("esp%d: tgt[%d] lun[%d] AUTO SENSE[ ",
+			       esp->host->unique_id, tgt, lun);
+			for (i = 0; i < 18; i++)
+				printk("%02x ", cmd->sense_buffer[i]);
+			printk("]\n");
+		}
+	}
+
+	cmd->scsi_done(cmd);
+
+	list_del(&ent->list);
+	esp_put_ent(esp, ent);
+
+	esp_maybe_execute_command(esp);
+}
+
+static unsigned int compose_result(unsigned int status, unsigned int message,
+				   unsigned int driver_code)
+{
+	return (status | (message << 8) | (driver_code << 16));
+}
+
+static void esp_event_queue_full(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	struct scsi_device *dev = ent->cmd->device;
+	struct esp_lun_data *lp = dev->hostdata;
+
+	scsi_track_queue_full(dev, lp->num_tagged - 1);
+}
+
+static int esp_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+{
+	struct scsi_device *dev = cmd->device;
+	struct esp *esp = host_to_esp(dev->host);
+	struct esp_cmd_priv *spriv;
+	struct esp_cmd_entry *ent;
+
+	ent = esp_get_ent(esp);
+	if (!ent)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
+	ent->cmd = cmd;
+
+	cmd->scsi_done = done;
+
+	spriv = ESP_CMD_PRIV(cmd);
+	spriv->u.dma_addr = ~(dma_addr_t)0x0;
+
+	list_add_tail(&ent->list, &esp->queued_cmds);
+
+	esp_maybe_execute_command(esp);
+
+	return 0;
+}
+
+static int esp_check_gross_error(struct esp *esp)
+{
+	if (esp->sreg & ESP_STAT_SPAM) {
+		/* Gross Error, could be one of:
+		 * - top of fifo overwritten
+		 * - top of command register overwritten
+		 * - DMA programmed with wrong direction
+		 * - improper phase change
+		 */
+		printk(KERN_ERR PFX "esp%d: Gross error sreg[%02x]\n",
+		       esp->host->unique_id, esp->sreg);
+		/* XXX Reset the chip. XXX */
+		return 1;
+	}
+	return 0;
+}
+
+static int esp_check_spur_intr(struct esp *esp)
+{
+	switch (esp->rev) {
+	case ESP100:
+	case ESP100A:
+		/* The interrupt pending bit of the status register cannot
+		 * be trusted on these revisions.
+		 */
+		esp->sreg &= ~ESP_STAT_INTR;
+		break;
+
+	default:
+		if (!(esp->sreg & ESP_STAT_INTR)) {
+			esp->ireg = esp_read8(ESP_INTRPT);
+			if (esp->ireg & ESP_INTR_SR)
+				return 1;
+
+			/* If the DMA is indicating interrupt pending and the
+			 * ESP is not, the only possibility is a DMA error.
+			 */
+			if (!esp->ops->dma_error(esp)) {
+				printk(KERN_ERR PFX "esp%d: Spurious irq, "
+				       "sreg=%x.\n",
+				       esp->host->unique_id, esp->sreg);
+				return -1;
+			}
+
+			printk(KERN_ERR PFX "esp%d: DMA error\n",
+			       esp->host->unique_id);
+
+			/* XXX Reset the chip. XXX */
+			return -1;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static void esp_schedule_reset(struct esp *esp)
+{
+	esp_log_reset("ESP: esp_schedule_reset() from %p\n",
+		      __builtin_return_address(0));
+	esp->flags |= ESP_FLAG_RESETTING;
+	esp_event(esp, ESP_EVENT_RESET);
+}
+
+/* In order to avoid having to add a special half-reconnected state
+ * into the driver we just sit here and poll through the rest of
+ * the reselection process to get the tag message bytes.
+ */
+static struct esp_cmd_entry *esp_reconnect_with_tag(struct esp *esp,
+						    struct esp_lun_data *lp)
+{
+	struct esp_cmd_entry *ent;
+	int i;
+
+	if (!lp->num_tagged) {
+		printk(KERN_ERR PFX "esp%d: Reconnect w/num_tagged==0\n",
+		       esp->host->unique_id);
+		return NULL;
+	}
+
+	esp_log_reconnect("ESP: reconnect tag, ");
+
+	for (i = 0; i < ESP_QUICKIRQ_LIMIT; i++) {
+		if (esp->ops->irq_pending(esp))
+			break;
+	}
+	if (i == ESP_QUICKIRQ_LIMIT) {
+		printk(KERN_ERR PFX "esp%d: Reconnect IRQ1 timeout\n",
+		       esp->host->unique_id);
+		return NULL;
+	}
+
+	esp->sreg = esp_read8(ESP_STATUS);
+	esp->ireg = esp_read8(ESP_INTRPT);
+
+	esp_log_reconnect("IRQ(%d:%x:%x), ",
+			  i, esp->ireg, esp->sreg);
+
+	if (esp->ireg & ESP_INTR_DC) {
+		printk(KERN_ERR PFX "esp%d: Reconnect, got disconnect.\n",
+		       esp->host->unique_id);
+		return NULL;
+	}
+
+	if ((esp->sreg & ESP_STAT_PMASK) != ESP_MIP) {
+		printk(KERN_ERR PFX "esp%d: Reconnect, not MIP sreg[%02x].\n",
+		       esp->host->unique_id, esp->sreg);
+		return NULL;
+	}
+
+	/* DMA in the tag bytes... */
+	esp->command_block[0] = 0xff;
+	esp->command_block[1] = 0xff;
+	esp->ops->send_dma_cmd(esp, esp->command_block_dma,
+			       2, 2, 1, ESP_CMD_DMA | ESP_CMD_TI);
+
+	/* ACK the msssage.  */
+	scsi_esp_cmd(esp, ESP_CMD_MOK);
+
+	for (i = 0; i < ESP_RESELECT_TAG_LIMIT; i++) {
+		if (esp->ops->irq_pending(esp)) {
+			esp->sreg = esp_read8(ESP_STATUS);
+			esp->ireg = esp_read8(ESP_INTRPT);
+			if (esp->ireg & ESP_INTR_FDONE)
+				break;
+		}
+		udelay(1);
+	}
+	if (i == ESP_RESELECT_TAG_LIMIT) {
+		printk(KERN_ERR PFX "esp%d: Reconnect IRQ2 timeout\n",
+		       esp->host->unique_id);
+		return NULL;
+	}
+	esp->ops->dma_drain(esp);
+	esp->ops->dma_invalidate(esp);
+
+	esp_log_reconnect("IRQ2(%d:%x:%x) tag[%x:%x]\n",
+			  i, esp->ireg, esp->sreg,
+			  esp->command_block[0],
+			  esp->command_block[1]);
+
+	if (esp->command_block[0] < SIMPLE_QUEUE_TAG ||
+	    esp->command_block[0] > ORDERED_QUEUE_TAG) {
+		printk(KERN_ERR PFX "esp%d: Reconnect, bad tag "
+		       "type %02x.\n",
+		       esp->host->unique_id, esp->command_block[0]);
+		return NULL;
+	}
+
+	ent = lp->tagged_cmds[esp->command_block[1]];
+	if (!ent) {
+		printk(KERN_ERR PFX "esp%d: Reconnect, no entry for "
+		       "tag %02x.\n",
+		       esp->host->unique_id, esp->command_block[1]);
+		return NULL;
+	}
+
+	return ent;
+}
+
+static int esp_reconnect(struct esp *esp)
+{
+	struct esp_cmd_entry *ent;
+	struct esp_target_data *tp;
+	struct esp_lun_data *lp;
+	struct scsi_device *dev;
+	int target, lun;
+
+	BUG_ON(esp->active_cmd);
+	if (esp->rev == FASHME) {
+		/* FASHME puts the target and lun numbers directly
+		 * into the fifo.
+		 */
+		target = esp->fifo[0];
+		lun = esp->fifo[1] & 0x7;
+	} else {
+		u8 bits = esp_read8(ESP_FDATA);
+
+		/* Older chips put the lun directly into the fifo, but
+		 * the target is given as a sample of the arbitration
+		 * lines on the bus at reselection time.  So we should
+		 * see the ID of the ESP and the one reconnecting target
+		 * set in the bitmap.
+		 */
+		if (!(bits & esp->scsi_id_mask))
+			goto do_reset;
+		bits &= ~esp->scsi_id_mask;
+		if (!bits || (bits & (bits - 1)))
+			goto do_reset;
+
+		target = ffs(bits) - 1;
+		lun = (esp_read8(ESP_FDATA) & 0x7);
+
+		scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+		if (esp->rev == ESP100) {
+			u8 ireg = esp_read8(ESP_INTRPT);
+			/* This chip has a bug during reselection that can
+			 * cause a spurious illegal-command interrupt, which
+			 * we simply ACK here.  Another possibility is a bus
+			 * reset so we must check for that.
+			 */
+			if (ireg & ESP_INTR_SR)
+				goto do_reset;
+		}
+		scsi_esp_cmd(esp, ESP_CMD_NULL);
+	}
+
+	esp_write_tgt_sync(esp, target);
+	esp_write_tgt_config3(esp, target);
+
+	scsi_esp_cmd(esp, ESP_CMD_MOK);
+
+	if (esp->rev == FASHME)
+		esp_write8(target | ESP_BUSID_RESELID | ESP_BUSID_CTR32BIT,
+			   ESP_BUSID);
+
+	tp = &esp->target[target];
+	dev = __scsi_device_lookup_by_target(tp->starget, lun);
+	if (!dev) {
+		printk(KERN_ERR PFX "esp%d: Reconnect, no lp "
+		       "tgt[%u] lun[%u]\n",
+		       esp->host->unique_id, target, lun);
+		goto do_reset;
+	}
+	lp = dev->hostdata;
+
+	ent = lp->non_tagged_cmd;
+	if (!ent) {
+		ent = esp_reconnect_with_tag(esp, lp);
+		if (!ent)
+			goto do_reset;
+	}
+
+	esp->active_cmd = ent;
+
+	if (ent->flags & ESP_CMD_FLAG_ABORT) {
+		esp->msg_out[0] = ABORT_TASK_SET;
+		esp->msg_out_len = 1;
+		scsi_esp_cmd(esp, ESP_CMD_SATN);
+	}
+
+	esp_event(esp, ESP_EVENT_CHECK_PHASE);
+	esp_restore_pointers(esp, ent);
+	esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+	return 1;
+
+do_reset:
+	esp_schedule_reset(esp);
+	return 0;
+}
+
+static int esp_finish_select(struct esp *esp)
+{
+	struct esp_cmd_entry *ent;
+	struct scsi_cmnd *cmd;
+	u8 orig_select_state;
+
+	orig_select_state = esp->select_state;
+
+	/* No longer selecting.  */
+	esp->select_state = ESP_SELECT_NONE;
+
+	esp->seqreg = esp_read8(ESP_SSTEP) & ESP_STEP_VBITS;
+	ent = esp->active_cmd;
+	cmd = ent->cmd;
+
+	if (esp->ops->dma_error(esp)) {
+		/* If we see a DMA error during or as a result of selection,
+		 * all bets are off.
+		 */
+		esp_schedule_reset(esp);
+		esp_cmd_is_done(esp, ent, cmd, (DID_ERROR << 16));
+		return 0;
+	}
+
+	esp->ops->dma_invalidate(esp);
+
+	if (esp->ireg == (ESP_INTR_RSEL | ESP_INTR_FDONE)) {
+		struct esp_target_data *tp = &esp->target[cmd->device->id];
+
+		/* Carefully back out of the selection attempt.  Release
+		 * resources (such as DMA mapping & TAG) and reset state (such
+		 * as message out and command delivery variables).
+		 */
+		if (!(ent->flags & ESP_CMD_FLAG_AUTOSENSE)) {
+			esp_unmap_dma(esp, cmd);
+			esp_free_lun_tag(ent, cmd->device->hostdata);
+			tp->flags &= ~(ESP_TGT_NEGO_SYNC | ESP_TGT_NEGO_WIDE);
+			esp->flags &= ~ESP_FLAG_DOING_SLOWCMD;
+			esp->cmd_bytes_ptr = NULL;
+			esp->cmd_bytes_left = 0;
+		} else {
+			esp->ops->unmap_single(esp, ent->sense_dma,
+					       SCSI_SENSE_BUFFERSIZE,
+					       DMA_FROM_DEVICE);
+			ent->sense_ptr = NULL;
+		}
+
+		/* Now that the state is unwound properly, put back onto
+		 * the issue queue.  This command is no longer active.
+		 */
+		list_del(&ent->list);
+		list_add(&ent->list, &esp->queued_cmds);
+		esp->active_cmd = NULL;
+
+		/* Return value ignored by caller, it directly invokes
+		 * esp_reconnect().
+		 */
+		return 0;
+	}
+
+	if (esp->ireg == ESP_INTR_DC) {
+		struct scsi_device *dev = cmd->device;
+
+		/* Disconnect.  Make sure we re-negotiate sync and
+		 * wide parameters if this target starts responding
+		 * again in the future.
+		 */
+		esp->target[dev->id].flags |= ESP_TGT_CHECK_NEGO;
+
+		scsi_esp_cmd(esp, ESP_CMD_ESEL);
+		esp_cmd_is_done(esp, ent, cmd, (DID_BAD_TARGET << 16));
+		return 1;
+	}
+
+	if (esp->ireg == (ESP_INTR_FDONE | ESP_INTR_BSERV)) {
+		/* Selection successful.  On pre-FAST chips we have
+		 * to do a NOP and possibly clean out the FIFO.
+		 */
+		if (esp->rev <= ESP236) {
+			int fcnt = esp_read8(ESP_FFLAGS) & ESP_FF_FBYTES;
+
+			scsi_esp_cmd(esp, ESP_CMD_NULL);
+
+			if (!fcnt &&
+			    (!esp->prev_soff ||
+			     ((esp->sreg & ESP_STAT_PMASK) != ESP_DIP)))
+				esp_flush_fifo(esp);
+		}
+
+		/* If we are doing a slow command, negotiation, etc.
+		 * we'll do the right thing as we transition to the
+		 * next phase.
+		 */
+		esp_event(esp, ESP_EVENT_CHECK_PHASE);
+		return 0;
+	}
+
+	printk("ESP: Unexpected selection completion ireg[%x].\n",
+	       esp->ireg);
+	esp_schedule_reset(esp);
+	return 0;
+}
+
+static int esp_data_bytes_sent(struct esp *esp, struct esp_cmd_entry *ent,
+			       struct scsi_cmnd *cmd)
+{
+	int fifo_cnt, ecount, bytes_sent, flush_fifo;
+
+	fifo_cnt = esp_read8(ESP_FFLAGS) & ESP_FF_FBYTES;
+	if (esp->prev_cfg3 & ESP_CONFIG3_EWIDE)
+		fifo_cnt <<= 1;
+
+	ecount = 0;
+	if (!(esp->sreg & ESP_STAT_TCNT)) {
+		ecount = ((unsigned int)esp_read8(ESP_TCLOW) |
+			  (((unsigned int)esp_read8(ESP_TCMED)) << 8));
+		if (esp->rev == FASHME)
+			ecount |= ((unsigned int)esp_read8(FAS_RLO)) << 16;
+	}
+
+	bytes_sent = esp->data_dma_len;
+	bytes_sent -= ecount;
+
+	if (!(ent->flags & ESP_CMD_FLAG_WRITE))
+		bytes_sent -= fifo_cnt;
+
+	flush_fifo = 0;
+	if (!esp->prev_soff) {
+		/* Synchronous data transfer, always flush fifo. */
+		flush_fifo = 1;
+	} else {
+		if (esp->rev == ESP100) {
+			u32 fflags, phase;
+
+			/* ESP100 has a chip bug where in the synchronous data
+			 * phase it can mistake a final long REQ pulse from the
+			 * target as an extra data byte.  Fun.
+			 *
+			 * To detect this case we resample the status register
+			 * and fifo flags.  If we're still in a data phase and
+			 * we see spurious chunks in the fifo, we return error
+			 * to the caller which should reset and set things up
+			 * such that we only try future transfers to this
+			 * target in synchronous mode.
+			 */
+			esp->sreg = esp_read8(ESP_STATUS);
+			phase = esp->sreg & ESP_STAT_PMASK;
+			fflags = esp_read8(ESP_FFLAGS);
+
+			if ((phase == ESP_DOP &&
+			     (fflags & ESP_FF_ONOTZERO)) ||
+			    (phase == ESP_DIP &&
+			     (fflags & ESP_FF_FBYTES)))
+				return -1;
+		}
+		if (!(ent->flags & ESP_CMD_FLAG_WRITE))
+			flush_fifo = 1;
+	}
+
+	if (flush_fifo)
+		esp_flush_fifo(esp);
+
+	return bytes_sent;
+}
+
+static void esp_setsync(struct esp *esp, struct esp_target_data *tp,
+			u8 scsi_period, u8 scsi_offset,
+			u8 esp_stp, u8 esp_soff)
+{
+	spi_period(tp->starget) = scsi_period;
+	spi_offset(tp->starget) = scsi_offset;
+	spi_width(tp->starget) = (tp->flags & ESP_TGT_WIDE) ? 1 : 0;
+
+	if (esp_soff) {
+		esp_stp &= 0x1f;
+		esp_soff |= esp->radelay;
+		if (esp->rev >= FAS236) {
+			u8 bit = ESP_CONFIG3_FSCSI;
+			if (esp->rev >= FAS100A)
+				bit = ESP_CONFIG3_FAST;
+
+			if (scsi_period < 50) {
+				if (esp->rev == FASHME)
+					esp_soff &= ~esp->radelay;
+				tp->esp_config3 |= bit;
+			} else {
+				tp->esp_config3 &= ~bit;
+			}
+			esp->prev_cfg3 = tp->esp_config3;
+			esp_write8(esp->prev_cfg3, ESP_CFG3);
+		}
+	}
+
+	tp->esp_period = esp->prev_stp = esp_stp;
+	tp->esp_offset = esp->prev_soff = esp_soff;
+
+	esp_write8(esp_soff, ESP_SOFF);
+	esp_write8(esp_stp, ESP_STP);
+
+	tp->flags &= ~(ESP_TGT_NEGO_SYNC | ESP_TGT_CHECK_NEGO);
+
+	spi_display_xfer_agreement(tp->starget);
+}
+
+static void esp_msgin_reject(struct esp *esp)
+{
+	struct esp_cmd_entry *ent = esp->active_cmd;
+	struct scsi_cmnd *cmd = ent->cmd;
+	struct esp_target_data *tp;
+	int tgt;
+
+	tgt = cmd->device->id;
+	tp = &esp->target[tgt];
+
+	if (tp->flags & ESP_TGT_NEGO_WIDE) {
+		tp->flags &= ~(ESP_TGT_NEGO_WIDE | ESP_TGT_WIDE);
+
+		if (!esp_need_to_nego_sync(tp)) {
+			tp->flags &= ~ESP_TGT_CHECK_NEGO;
+			scsi_esp_cmd(esp, ESP_CMD_RATN);
+		} else {
+			esp->msg_out_len =
+				spi_populate_sync_msg(&esp->msg_out[0],
+						      tp->nego_goal_period,
+						      tp->nego_goal_offset);
+			tp->flags |= ESP_TGT_NEGO_SYNC;
+			scsi_esp_cmd(esp, ESP_CMD_SATN);
+		}
+		return;
+	}
+
+	if (tp->flags & ESP_TGT_NEGO_SYNC) {
+		tp->flags &= ~(ESP_TGT_NEGO_SYNC | ESP_TGT_CHECK_NEGO);
+		tp->esp_period = 0;
+		tp->esp_offset = 0;
+		esp_setsync(esp, tp, 0, 0, 0, 0);
+		scsi_esp_cmd(esp, ESP_CMD_RATN);
+		return;
+	}
+
+	esp->msg_out[0] = ABORT_TASK_SET;
+	esp->msg_out_len = 1;
+	scsi_esp_cmd(esp, ESP_CMD_SATN);
+}
+
+static void esp_msgin_sdtr(struct esp *esp, struct esp_target_data *tp)
+{
+	u8 period = esp->msg_in[3];
+	u8 offset = esp->msg_in[4];
+	u8 stp;
+
+	if (!(tp->flags & ESP_TGT_NEGO_SYNC))
+		goto do_reject;
+
+	if (offset > 15)
+		goto do_reject;
+
+	if (offset) {
+		int rounded_up, one_clock;
+
+		if (period > esp->max_period) {
+			period = offset = 0;
+			goto do_sdtr;
+		}
+		if (period < esp->min_period)
+			goto do_reject;
+
+		one_clock = esp->ccycle / 1000;
+		rounded_up = (period << 2);
+		rounded_up = (rounded_up + one_clock - 1) / one_clock;
+		stp = rounded_up;
+		if (stp && esp->rev >= FAS236) {
+			if (stp >= 50)
+				stp--;
+		}
+	} else {
+		stp = 0;
+	}
+
+	esp_setsync(esp, tp, period, offset, stp, offset);
+	return;
+
+do_reject:
+	esp->msg_out[0] = MESSAGE_REJECT;
+	esp->msg_out_len = 1;
+	scsi_esp_cmd(esp, ESP_CMD_SATN);
+	return;
+
+do_sdtr:
+	tp->nego_goal_period = period;
+	tp->nego_goal_offset = offset;
+	esp->msg_out_len =
+		spi_populate_sync_msg(&esp->msg_out[0],
+				      tp->nego_goal_period,
+				      tp->nego_goal_offset);
+	scsi_esp_cmd(esp, ESP_CMD_SATN);
+}
+
+static void esp_msgin_wdtr(struct esp *esp, struct esp_target_data *tp)
+{
+	int size = 8 << esp->msg_in[3];
+	u8 cfg3;
+
+	if (esp->rev != FASHME)
+		goto do_reject;
+
+	if (size != 8 && size != 16)
+		goto do_reject;
+
+	if (!(tp->flags & ESP_TGT_NEGO_WIDE))
+		goto do_reject;
+
+	cfg3 = tp->esp_config3;
+	if (size == 16) {
+		tp->flags |= ESP_TGT_WIDE;
+		cfg3 |= ESP_CONFIG3_EWIDE;
+	} else {
+		tp->flags &= ~ESP_TGT_WIDE;
+		cfg3 &= ~ESP_CONFIG3_EWIDE;
+	}
+	tp->esp_config3 = cfg3;
+	esp->prev_cfg3 = cfg3;
+	esp_write8(cfg3, ESP_CFG3);
+
+	tp->flags &= ~ESP_TGT_NEGO_WIDE;
+
+	spi_period(tp->starget) = 0;
+	spi_offset(tp->starget) = 0;
+	if (!esp_need_to_nego_sync(tp)) {
+		tp->flags &= ~ESP_TGT_CHECK_NEGO;
+		scsi_esp_cmd(esp, ESP_CMD_RATN);
+	} else {
+		esp->msg_out_len =
+			spi_populate_sync_msg(&esp->msg_out[0],
+					      tp->nego_goal_period,
+					      tp->nego_goal_offset);
+		tp->flags |= ESP_TGT_NEGO_SYNC;
+		scsi_esp_cmd(esp, ESP_CMD_SATN);
+	}
+	return;
+
+do_reject:
+	esp->msg_out[0] = MESSAGE_REJECT;
+	esp->msg_out_len = 1;
+	scsi_esp_cmd(esp, ESP_CMD_SATN);
+}
+
+static void esp_msgin_extended(struct esp *esp)
+{
+	struct esp_cmd_entry *ent = esp->active_cmd;
+	struct scsi_cmnd *cmd = ent->cmd;
+	struct esp_target_data *tp;
+	int tgt = cmd->device->id;
+
+	tp = &esp->target[tgt];
+	if (esp->msg_in[2] == EXTENDED_SDTR) {
+		esp_msgin_sdtr(esp, tp);
+		return;
+	}
+	if (esp->msg_in[2] == EXTENDED_WDTR) {
+		esp_msgin_wdtr(esp, tp);
+		return;
+	}
+
+	printk("ESP: Unexpected extended msg type %x\n",
+	       esp->msg_in[2]);
+
+	esp->msg_out[0] = ABORT_TASK_SET;
+	esp->msg_out_len = 1;
+	scsi_esp_cmd(esp, ESP_CMD_SATN);
+}
+
+/* Analyze msgin bytes received from target so far.  Return non-zero
+ * if there are more bytes needed to complete the message.
+ */
+static int esp_msgin_process(struct esp *esp)
+{
+	u8 msg0 = esp->msg_in[0];
+	int len = esp->msg_in_len;
+
+	if (msg0 & 0x80) {
+		/* Identify */
+		printk("ESP: Unexpected msgin identify\n");
+		return 0;
+	}
+
+	switch (msg0) {
+	case EXTENDED_MESSAGE:
+		if (len == 1)
+			return 1;
+		if (len < esp->msg_in[1] + 2)
+			return 1;
+		esp_msgin_extended(esp);
+		return 0;
+
+	case IGNORE_WIDE_RESIDUE: {
+		struct esp_cmd_entry *ent;
+		struct esp_cmd_priv *spriv;
+		if (len == 1)
+			return 1;
+
+		if (esp->msg_in[1] != 1)
+			goto do_reject;
+
+		ent = esp->active_cmd;
+		spriv = ESP_CMD_PRIV(ent->cmd);
+
+		if (spriv->cur_residue == sg_dma_len(spriv->cur_sg)) {
+			spriv->cur_sg--;
+			spriv->cur_residue = 1;
+		} else
+			spriv->cur_residue++;
+		spriv->tot_residue++;
+		return 0;
+	}
+	case NOP:
+		return 0;
+	case RESTORE_POINTERS:
+		esp_restore_pointers(esp, esp->active_cmd);
+		return 0;
+	case SAVE_POINTERS:
+		esp_save_pointers(esp, esp->active_cmd);
+		return 0;
+
+	case COMMAND_COMPLETE:
+	case DISCONNECT: {
+		struct esp_cmd_entry *ent = esp->active_cmd;
+
+		ent->message = msg0;
+		esp_event(esp, ESP_EVENT_FREE_BUS);
+		esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+		return 0;
+	}
+	case MESSAGE_REJECT:
+		esp_msgin_reject(esp);
+		return 0;
+
+	default:
+	do_reject:
+		esp->msg_out[0] = MESSAGE_REJECT;
+		esp->msg_out_len = 1;
+		scsi_esp_cmd(esp, ESP_CMD_SATN);
+		return 0;
+	}
+}
+
+static int esp_process_event(struct esp *esp)
+{
+	int write;
+
+again:
+	write = 0;
+	switch (esp->event) {
+	case ESP_EVENT_CHECK_PHASE:
+		switch (esp->sreg & ESP_STAT_PMASK) {
+		case ESP_DOP:
+			esp_event(esp, ESP_EVENT_DATA_OUT);
+			break;
+		case ESP_DIP:
+			esp_event(esp, ESP_EVENT_DATA_IN);
+			break;
+		case ESP_STATP:
+			esp_flush_fifo(esp);
+			scsi_esp_cmd(esp, ESP_CMD_ICCSEQ);
+			esp_event(esp, ESP_EVENT_STATUS);
+			esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+			return 1;
+
+		case ESP_MOP:
+			esp_event(esp, ESP_EVENT_MSGOUT);
+			break;
+
+		case ESP_MIP:
+			esp_event(esp, ESP_EVENT_MSGIN);
+			break;
+
+		case ESP_CMDP:
+			esp_event(esp, ESP_EVENT_CMD_START);
+			break;
+
+		default:
+			printk("ESP: Unexpected phase, sreg=%02x\n",
+			       esp->sreg);
+			esp_schedule_reset(esp);
+			return 0;
+		}
+		goto again;
+		break;
+
+	case ESP_EVENT_DATA_IN:
+		write = 1;
+		/* fallthru */
+
+	case ESP_EVENT_DATA_OUT: {
+		struct esp_cmd_entry *ent = esp->active_cmd;
+		struct scsi_cmnd *cmd = ent->cmd;
+		dma_addr_t dma_addr = esp_cur_dma_addr(ent, cmd);
+		unsigned int dma_len = esp_cur_dma_len(ent, cmd);
+
+		if (esp->rev == ESP100)
+			scsi_esp_cmd(esp, ESP_CMD_NULL);
+
+		if (write)
+			ent->flags |= ESP_CMD_FLAG_WRITE;
+		else
+			ent->flags &= ~ESP_CMD_FLAG_WRITE;
+
+		dma_len = esp_dma_length_limit(esp, dma_addr, dma_len);
+		esp->data_dma_len = dma_len;
+
+		if (!dma_len) {
+			printk(KERN_ERR PFX "esp%d: DMA length is zero!\n",
+			       esp->host->unique_id);
+			printk(KERN_ERR PFX "esp%d: cur adr[%08x] len[%08x]\n",
+			       esp->host->unique_id,
+			       esp_cur_dma_addr(ent, cmd),
+			       esp_cur_dma_len(ent, cmd));
+			esp_schedule_reset(esp);
+			return 0;
+		}
+
+		esp_log_datastart("ESP: start data addr[%08x] len[%u] "
+				  "write(%d)\n",
+				  dma_addr, dma_len, write);
+
+		esp->ops->send_dma_cmd(esp, dma_addr, dma_len, dma_len,
+				       write, ESP_CMD_DMA | ESP_CMD_TI);
+		esp_event(esp, ESP_EVENT_DATA_DONE);
+		break;
+	}
+	case ESP_EVENT_DATA_DONE: {
+		struct esp_cmd_entry *ent = esp->active_cmd;
+		struct scsi_cmnd *cmd = ent->cmd;
+		int bytes_sent;
+
+		if (esp->ops->dma_error(esp)) {
+			printk("ESP: data done, DMA error, resetting\n");
+			esp_schedule_reset(esp);
+			return 0;
+		}
+
+		if (ent->flags & ESP_CMD_FLAG_WRITE) {
+			/* XXX parity errors, etc. XXX */
+
+			esp->ops->dma_drain(esp);
+		}
+		esp->ops->dma_invalidate(esp);
+
+		if (esp->ireg != ESP_INTR_BSERV) {
+			/* We should always see exactly a bus-service
+			 * interrupt at the end of a successful transfer.
+			 */
+			printk("ESP: data done, not BSERV, resetting\n");
+			esp_schedule_reset(esp);
+			return 0;
+		}
+
+		bytes_sent = esp_data_bytes_sent(esp, ent, cmd);
+
+		esp_log_datadone("ESP: data done flgs[%x] sent[%d]\n",
+				 ent->flags, bytes_sent);
+
+		if (bytes_sent < 0) {
+			/* XXX force sync mode for this target XXX */
+			esp_schedule_reset(esp);
+			return 0;
+		}
+
+		esp_advance_dma(esp, ent, cmd, bytes_sent);
+		esp_event(esp, ESP_EVENT_CHECK_PHASE);
+		goto again;
+		break;
+	}
+
+	case ESP_EVENT_STATUS: {
+		struct esp_cmd_entry *ent = esp->active_cmd;
+
+		if (esp->ireg & ESP_INTR_FDONE) {
+			ent->status = esp_read8(ESP_FDATA);
+			ent->message = esp_read8(ESP_FDATA);
+			scsi_esp_cmd(esp, ESP_CMD_MOK);
+		} else if (esp->ireg == ESP_INTR_BSERV) {
+			ent->status = esp_read8(ESP_FDATA);
+			ent->message = 0xff;
+			esp_event(esp, ESP_EVENT_MSGIN);
+			return 0;
+		}
+
+		if (ent->message != COMMAND_COMPLETE) {
+			printk("ESP: Unexpected message %x in status\n",
+			       ent->message);
+			esp_schedule_reset(esp);
+			return 0;
+		}
+
+		esp_event(esp, ESP_EVENT_FREE_BUS);
+		esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+		break;
+	}
+	case ESP_EVENT_FREE_BUS: {
+		struct esp_cmd_entry *ent = esp->active_cmd;
+		struct scsi_cmnd *cmd = ent->cmd;
+
+		if (ent->message == COMMAND_COMPLETE ||
+		    ent->message == DISCONNECT)
+			scsi_esp_cmd(esp, ESP_CMD_ESEL);
+
+		if (ent->message == COMMAND_COMPLETE) {
+			esp_log_cmddone("ESP: Command done status[%x] "
+					"message[%x]\n",
+					ent->status, ent->message);
+			if (ent->status == SAM_STAT_TASK_SET_FULL)
+				esp_event_queue_full(esp, ent);
+
+			if (ent->status == SAM_STAT_CHECK_CONDITION &&
+			    !(ent->flags & ESP_CMD_FLAG_AUTOSENSE)) {
+				ent->flags |= ESP_CMD_FLAG_AUTOSENSE;
+				esp_autosense(esp, ent);
+			} else {
+				esp_cmd_is_done(esp, ent, cmd,
+						compose_result(ent->status,
+							       ent->message,
+							       DID_OK));
+			}
+		} else if (ent->message == DISCONNECT) {
+			esp_log_disconnect("ESP: Disconnecting tgt[%d] "
+					   "tag[%x:%x]\n",
+					   cmd->device->id,
+					   ent->tag[0], ent->tag[1]);
+
+			esp->active_cmd = NULL;
+			esp_maybe_execute_command(esp);
+		} else {
+			printk("ESP: Unexpected message %x in freebus\n",
+			       ent->message);
+			esp_schedule_reset(esp);
+			return 0;
+		}
+		if (esp->active_cmd)
+			esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+		break;
+	}
+	case ESP_EVENT_MSGOUT: {
+		scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+
+		if (esp_debug & ESP_DEBUG_MSGOUT) {
+			int i;
+			printk("ESP: Sending message [ ");
+			for (i = 0; i < esp->msg_out_len; i++)
+				printk("%02x ", esp->msg_out[i]);
+			printk("]\n");
+		}
+
+		if (esp->rev == FASHME) {
+			int i;
+
+			/* Always use the fifo.  */
+			for (i = 0; i < esp->msg_out_len; i++) {
+				esp_write8(esp->msg_out[i], ESP_FDATA);
+				esp_write8(0, ESP_FDATA);
+			}
+			scsi_esp_cmd(esp, ESP_CMD_TI);
+		} else {
+			if (esp->msg_out_len == 1) {
+				esp_write8(esp->msg_out[0], ESP_FDATA);
+				scsi_esp_cmd(esp, ESP_CMD_TI);
+			} else {
+				/* Use DMA. */
+				memcpy(esp->command_block,
+				       esp->msg_out,
+				       esp->msg_out_len);
+
+				esp->ops->send_dma_cmd(esp,
+						       esp->command_block_dma,
+						       esp->msg_out_len,
+						       esp->msg_out_len,
+						       0,
+						       ESP_CMD_DMA|ESP_CMD_TI);
+			}
+		}
+		esp_event(esp, ESP_EVENT_MSGOUT_DONE);
+		break;
+	}
+	case ESP_EVENT_MSGOUT_DONE:
+		if (esp->rev == FASHME) {
+			scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+		} else {
+			if (esp->msg_out_len > 1)
+				esp->ops->dma_invalidate(esp);
+		}
+
+		if (!(esp->ireg & ESP_INTR_DC)) {
+			if (esp->rev != FASHME)
+				scsi_esp_cmd(esp, ESP_CMD_NULL);
+		}
+		esp_event(esp, ESP_EVENT_CHECK_PHASE);
+		goto again;
+	case ESP_EVENT_MSGIN:
+		if (esp->ireg & ESP_INTR_BSERV) {
+			if (esp->rev == FASHME) {
+				if (!(esp_read8(ESP_STATUS2) &
+				      ESP_STAT2_FEMPTY))
+					scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+			} else {
+				scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+				if (esp->rev == ESP100)
+					scsi_esp_cmd(esp, ESP_CMD_NULL);
+			}
+			scsi_esp_cmd(esp, ESP_CMD_TI);
+			esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+			return 1;
+		}
+		if (esp->ireg & ESP_INTR_FDONE) {
+			u8 val;
+
+			if (esp->rev == FASHME)
+				val = esp->fifo[0];
+			else
+				val = esp_read8(ESP_FDATA);
+			esp->msg_in[esp->msg_in_len++] = val;
+
+			esp_log_msgin("ESP: Got msgin byte %x\n", val);
+
+			if (!esp_msgin_process(esp))
+				esp->msg_in_len = 0;
+
+			if (esp->rev == FASHME)
+				scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+
+			scsi_esp_cmd(esp, ESP_CMD_MOK);
+
+			if (esp->event != ESP_EVENT_FREE_BUS)
+				esp_event(esp, ESP_EVENT_CHECK_PHASE);
+		} else {
+			printk("ESP: MSGIN neither BSERV not FDON, resetting");
+			esp_schedule_reset(esp);
+			return 0;
+		}
+		break;
+	case ESP_EVENT_CMD_START:
+		memcpy(esp->command_block, esp->cmd_bytes_ptr,
+		       esp->cmd_bytes_left);
+		if (esp->rev == FASHME)
+			scsi_esp_cmd(esp, ESP_CMD_FLUSH);
+		esp->ops->send_dma_cmd(esp, esp->command_block_dma,
+				       esp->cmd_bytes_left, 16, 0,
+				       ESP_CMD_DMA | ESP_CMD_TI);
+		esp_event(esp, ESP_EVENT_CMD_DONE);
+		esp->flags |= ESP_FLAG_QUICKIRQ_CHECK;
+		break;
+	case ESP_EVENT_CMD_DONE:
+		esp->ops->dma_invalidate(esp);
+		if (esp->ireg & ESP_INTR_BSERV) {
+			esp_event(esp, ESP_EVENT_CHECK_PHASE);
+			goto again;
+		}
+		esp_schedule_reset(esp);
+		return 0;
+		break;
+
+	case ESP_EVENT_RESET:
+		scsi_esp_cmd(esp, ESP_CMD_RS);
+		break;
+
+	default:
+		printk("ESP: Unexpected event %x, resetting\n",
+		       esp->event);
+		esp_schedule_reset(esp);
+		return 0;
+		break;
+	}
+	return 1;
+}
+
+static void esp_reset_cleanup_one(struct esp *esp, struct esp_cmd_entry *ent)
+{
+	struct scsi_cmnd *cmd = ent->cmd;
+
+	esp_unmap_dma(esp, cmd);
+	esp_free_lun_tag(ent, cmd->device->hostdata);
+	cmd->result = DID_RESET << 16;
+
+	if (ent->flags & ESP_CMD_FLAG_AUTOSENSE) {
+		esp->ops->unmap_single(esp, ent->sense_dma,
+				       SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
+		ent->sense_ptr = NULL;
+	}
+
+	cmd->scsi_done(cmd);
+	list_del(&ent->list);
+	esp_put_ent(esp, ent);
+}
+
+static void esp_clear_hold(struct scsi_device *dev, void *data)
+{
+	struct esp_lun_data *lp = dev->hostdata;
+
+	BUG_ON(lp->num_tagged);
+	lp->hold = 0;
+}
+
+static void esp_reset_cleanup(struct esp *esp)
+{
+	struct esp_cmd_entry *ent, *tmp;
+	int i;
+
+	list_for_each_entry_safe(ent, tmp, &esp->queued_cmds, list) {
+		struct scsi_cmnd *cmd = ent->cmd;
+
+		list_del(&ent->list);
+		cmd->result = DID_RESET << 16;
+		cmd->scsi_done(cmd);
+		esp_put_ent(esp, ent);
+	}
+
+	list_for_each_entry_safe(ent, tmp, &esp->active_cmds, list) {
+		if (ent == esp->active_cmd)
+			esp->active_cmd = NULL;
+		esp_reset_cleanup_one(esp, ent);
+	}
+
+	BUG_ON(esp->active_cmd != NULL);
+
+	/* Force renegotiation of sync/wide transfers.  */
+	for (i = 0; i < ESP_MAX_TARGET; i++) {
+		struct esp_target_data *tp = &esp->target[i];
+
+		tp->esp_period = 0;
+		tp->esp_offset = 0;
+		tp->esp_config3 &= ~(ESP_CONFIG3_EWIDE |
+				     ESP_CONFIG3_FSCSI |
+				     ESP_CONFIG3_FAST);
+		tp->flags &= ~ESP_TGT_WIDE;
+		tp->flags |= ESP_TGT_CHECK_NEGO;
+
+		if (tp->starget)
+			starget_for_each_device(tp->starget, NULL,
+						esp_clear_hold);
+	}
+}
+
+/* Runs under host->lock */
+static void __esp_interrupt(struct esp *esp)
+{
+	int finish_reset, intr_done;
+	u8 phase;
+
+	esp->sreg = esp_read8(ESP_STATUS);
+
+	if (esp->flags & ESP_FLAG_RESETTING) {
+		finish_reset = 1;
+	} else {
+		if (esp_check_gross_error(esp))
+			return;
+
+		finish_reset = esp_check_spur_intr(esp);
+		if (finish_reset < 0)
+			return;
+	}
+
+	esp->ireg = esp_read8(ESP_INTRPT);
+
+	if (esp->ireg & ESP_INTR_SR)
+		finish_reset = 1;
+
+	if (finish_reset) {
+		esp_reset_cleanup(esp);
+		if (esp->eh_reset) {
+			complete(esp->eh_reset);
+			esp->eh_reset = NULL;
+		}
+		return;
+	}
+
+	phase = (esp->sreg & ESP_STAT_PMASK);
+	if (esp->rev == FASHME) {
+		if (((phase != ESP_DIP && phase != ESP_DOP) &&
+		     esp->select_state == ESP_SELECT_NONE &&
+		     esp->event != ESP_EVENT_STATUS &&
+		     esp->event != ESP_EVENT_DATA_DONE) ||
+		    (esp->ireg & ESP_INTR_RSEL)) {
+			esp->sreg2 = esp_read8(ESP_STATUS2);
+			if (!(esp->sreg2 & ESP_STAT2_FEMPTY) ||
+			    (esp->sreg2 & ESP_STAT2_F1BYTE))
+				hme_read_fifo(esp);
+		}
+	}
+
+	esp_log_intr("ESP: intr sreg[%02x] seqreg[%02x] "
+		     "sreg2[%02x] ireg[%02x]\n",
+		     esp->sreg, esp->seqreg, esp->sreg2, esp->ireg);
+
+	intr_done = 0;
+
+	if (esp->ireg & (ESP_INTR_S | ESP_INTR_SATN | ESP_INTR_IC)) {
+		printk("ESP: unexpected IREG %02x\n", esp->ireg);
+		if (esp->ireg & ESP_INTR_IC)
+			esp_dump_cmd_log(esp);
+
+		esp_schedule_reset(esp);
+	} else {
+		if (!(esp->ireg & ESP_INTR_RSEL)) {
+			/* Some combination of FDONE, BSERV, DC.  */
+			if (esp->select_state != ESP_SELECT_NONE)
+				intr_done = esp_finish_select(esp);
+		} else if (esp->ireg & ESP_INTR_RSEL) {
+			if (esp->active_cmd)
+				(void) esp_finish_select(esp);
+			intr_done = esp_reconnect(esp);
+		}
+	}
+	while (!intr_done)
+		intr_done = esp_process_event(esp);
+}
+
+irqreturn_t scsi_esp_intr(int irq, void *dev_id)
+{
+	struct esp *esp = dev_id;
+	unsigned long flags;
+	irqreturn_t ret;
+
+	spin_lock_irqsave(esp->host->host_lock, flags);
+	ret = IRQ_NONE;
+	if (esp->ops->irq_pending(esp)) {
+		ret = IRQ_HANDLED;
+		for (;;) {
+			int i;
+
+			__esp_interrupt(esp);
+			if (!(esp->flags & ESP_FLAG_QUICKIRQ_CHECK))
+				break;
+			esp->flags &= ~ESP_FLAG_QUICKIRQ_CHECK;
+
+			for (i = 0; i < ESP_QUICKIRQ_LIMIT; i++) {
+				if (esp->ops->irq_pending(esp))
+					break;
+			}
+			if (i == ESP_QUICKIRQ_LIMIT)
+				break;
+		}
+	}
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(scsi_esp_intr);
+
+static void __devinit esp_get_revision(struct esp *esp)
+{
+	u8 val;
+
+	esp->config1 = (ESP_CONFIG1_PENABLE | (esp->scsi_id & 7));
+	esp->config2 = (ESP_CONFIG2_SCSI2ENAB | ESP_CONFIG2_REGPARITY);
+	esp_write8(esp->config2, ESP_CFG2);
+
+	val = esp_read8(ESP_CFG2);
+	val &= ~ESP_CONFIG2_MAGIC;
+	if (val != (ESP_CONFIG2_SCSI2ENAB | ESP_CONFIG2_REGPARITY)) {
+		/* If what we write to cfg2 does not come back, cfg2 is not
+		 * implemented, therefore this must be a plain esp100.
+		 */
+		esp->rev = ESP100;
+	} else {
+		esp->config2 = 0;
+		esp_set_all_config3(esp, 5);
+		esp->prev_cfg3 = 5;
+		esp_write8(esp->config2, ESP_CFG2);
+		esp_write8(0, ESP_CFG3);
+		esp_write8(esp->prev_cfg3, ESP_CFG3);
+
+		val = esp_read8(ESP_CFG3);
+		if (val != 5) {
+			/* The cfg2 register is implemented, however
+			 * cfg3 is not, must be esp100a.
+			 */
+			esp->rev = ESP100A;
+		} else {
+			esp_set_all_config3(esp, 0);
+			esp->prev_cfg3 = 0;
+			esp_write8(esp->prev_cfg3, ESP_CFG3);
+
+			/* All of cfg{1,2,3} implemented, must be one of
+			 * the fas variants, figure out which one.
+			 */
+			if (esp->cfact == 0 || esp->cfact > ESP_CCF_F5) {
+				esp->rev = FAST;
+				esp->sync_defp = SYNC_DEFP_FAST;
+			} else {
+				esp->rev = ESP236;
+			}
+			esp->config2 = 0;
+			esp_write8(esp->config2, ESP_CFG2);
+		}
+	}
+}
+
+static void __devinit esp_init_swstate(struct esp *esp)
+{
+	int i;
+
+	INIT_LIST_HEAD(&esp->queued_cmds);
+	INIT_LIST_HEAD(&esp->active_cmds);
+	INIT_LIST_HEAD(&esp->esp_cmd_pool);
+
+	/* Start with a clear state, domain validation (via ->slave_configure,
+	 * spi_dv_device()) will attempt to enable SYNC, WIDE, and tagged
+	 * commands.
+	 */
+	for (i = 0 ; i < ESP_MAX_TARGET; i++) {
+		esp->target[i].flags = 0;
+		esp->target[i].nego_goal_period = 0;
+		esp->target[i].nego_goal_offset = 0;
+		esp->target[i].nego_goal_width = 0;
+		esp->target[i].nego_goal_tags = 0;
+	}
+}
+
+/* This places the ESP into a known state at boot time. */
+static void __devinit esp_bootup_reset(struct esp *esp)
+{
+	u8 val;
+
+	/* Reset the DMA */
+	esp->ops->reset_dma(esp);
+
+	/* Reset the ESP */
+	esp_reset_esp(esp);
+
+	/* Reset the SCSI bus, but tell ESP not to generate an irq */
+	val = esp_read8(ESP_CFG1);
+	val |= ESP_CONFIG1_SRRDISAB;
+	esp_write8(val, ESP_CFG1);
+
+	scsi_esp_cmd(esp, ESP_CMD_RS);
+	udelay(400);
+
+	esp_write8(esp->config1, ESP_CFG1);
+
+	/* Eat any bitrot in the chip and we are done... */
+	esp_read8(ESP_INTRPT);
+}
+
+static void __devinit esp_set_clock_params(struct esp *esp)
+{
+	int fmhz;
+	u8 ccf;
+
+	/* This is getting messy but it has to be done correctly or else
+	 * you get weird behavior all over the place.  We are trying to
+	 * basically figure out three pieces of information.
+	 *
+	 * a) Clock Conversion Factor
+	 *
+	 *    This is a representation of the input crystal clock frequency
+	 *    going into the ESP on this machine.  Any operation whose timing
+	 *    is longer than 400ns depends on this value being correct.  For
+	 *    example, you'll get blips for arbitration/selection during high
+	 *    load or with multiple targets if this is not set correctly.
+	 *
+	 * b) Selection Time-Out
+	 *
+	 *    The ESP isn't very bright and will arbitrate for the bus and try
+	 *    to select a target forever if you let it.  This value tells the
+	 *    ESP when it has taken too long to negotiate and that it should
+	 *    interrupt the CPU so we can see what happened.  The value is
+	 *    computed as follows (from NCR/Symbios chip docs).
+	 *
+	 *          (Time Out Period) *  (Input Clock)
+	 *    STO = ----------------------------------
+	 *          (8192) * (Clock Conversion Factor)
+	 *
+	 *    We use a time out period of 250ms (ESP_BUS_TIMEOUT).
+	 *
+	 * c) Imperical constants for synchronous offset and transfer period
+         *    register values
+	 *
+	 *    This entails the smallest and largest sync period we could ever
+	 *    handle on this ESP.
+	 */
+	fmhz = esp->cfreq;
+
+	ccf = ((fmhz / 1000000) + 4) / 5;
+	if (ccf == 1)
+		ccf = 2;
+
+	/* If we can't find anything reasonable, just assume 20MHZ.
+	 * This is the clock frequency of the older sun4c's where I've
+	 * been unable to find the clock-frequency PROM property.  All
+	 * other machines provide useful values it seems.
+	 */
+	if (fmhz <= 5000000 || ccf < 1 || ccf > 8) {
+		fmhz = 20000000;
+		ccf = 4;
+	}
+
+	esp->cfact = (ccf == 8 ? 0 : ccf);
+	esp->cfreq = fmhz;
+	esp->ccycle = ESP_MHZ_TO_CYCLE(fmhz);
+	esp->ctick = ESP_TICK(ccf, esp->ccycle);
+	esp->neg_defp = ESP_NEG_DEFP(fmhz, ccf);
+	esp->sync_defp = SYNC_DEFP_SLOW;
+}
+
+static const char *esp_chip_names[] = {
+	"ESP100",
+	"ESP100A",
+	"ESP236",
+	"FAS236",
+	"FAS100A",
+	"FAST",
+	"FASHME",
+};
+
+static struct scsi_transport_template *esp_transport_template;
+
+int __devinit scsi_esp_register(struct esp *esp, struct device *dev)
+{
+	static int instance;
+	int err;
+
+	esp->host->transportt = esp_transport_template;
+	esp->host->max_lun = ESP_MAX_LUN;
+	esp->host->cmd_per_lun = 2;
+
+	esp_set_clock_params(esp);
+
+	esp_get_revision(esp);
+
+	esp_init_swstate(esp);
+
+	esp_bootup_reset(esp);
+
+	printk(KERN_INFO PFX "esp%u, regs[%1p:%1p] irq[%u]\n",
+	       esp->host->unique_id, esp->regs, esp->dma_regs,
+	       esp->host->irq);
+	printk(KERN_INFO PFX "esp%u is a %s, %u MHz (ccf=%u), SCSI ID %u\n",
+	       esp->host->unique_id, esp_chip_names[esp->rev],
+	       esp->cfreq / 1000000, esp->cfact, esp->scsi_id);
+
+	/* Let the SCSI bus reset settle. */
+	ssleep(esp_bus_reset_settle);
+
+	err = scsi_add_host(esp->host, dev);
+	if (err)
+		return err;
+
+	esp->host->unique_id = instance++;
+
+	scsi_scan_host(esp->host);
+
+	return 0;
+}
+EXPORT_SYMBOL(scsi_esp_register);
+
+void __devexit scsi_esp_unregister(struct esp *esp)
+{
+	scsi_remove_host(esp->host);
+}
+EXPORT_SYMBOL(scsi_esp_unregister);
+
+static int esp_slave_alloc(struct scsi_device *dev)
+{
+	struct esp *esp = host_to_esp(dev->host);
+	struct esp_target_data *tp = &esp->target[dev->id];
+	struct esp_lun_data *lp;
+
+	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
+	if (!lp)
+		return -ENOMEM;
+	dev->hostdata = lp;
+
+	tp->starget = dev->sdev_target;
+
+	spi_min_period(tp->starget) = esp->min_period;
+	spi_max_offset(tp->starget) = 15;
+
+	if (esp->flags & ESP_FLAG_WIDE_CAPABLE)
+		spi_max_width(tp->starget) = 1;
+	else
+		spi_max_width(tp->starget) = 0;
+
+	return 0;
+}
+
+static int esp_slave_configure(struct scsi_device *dev)
+{
+	struct esp *esp = host_to_esp(dev->host);
+	struct esp_target_data *tp = &esp->target[dev->id];
+	int goal_tags, queue_depth;
+
+	goal_tags = 0;
+
+	if (dev->tagged_supported) {
+		/* XXX make this configurable somehow XXX */
+		goal_tags = ESP_DEFAULT_TAGS;
+
+		if (goal_tags > ESP_MAX_TAG)
+			goal_tags = ESP_MAX_TAG;
+	}
+
+	queue_depth = goal_tags;
+	if (queue_depth < dev->host->cmd_per_lun)
+		queue_depth = dev->host->cmd_per_lun;
+
+	if (goal_tags) {
+		scsi_set_tag_type(dev, MSG_ORDERED_TAG);
+		scsi_activate_tcq(dev, queue_depth);
+	} else {
+		scsi_deactivate_tcq(dev, queue_depth);
+	}
+	tp->flags |= ESP_TGT_DISCONNECT;
+
+	if (!spi_initial_dv(dev->sdev_target))
+		spi_dv_device(dev);
+
+	return 0;
+}
+
+static void esp_slave_destroy(struct scsi_device *dev)
+{
+	struct esp_lun_data *lp = dev->hostdata;
+
+	kfree(lp);
+	dev->hostdata = NULL;
+}
+
+static int esp_eh_abort_handler(struct scsi_cmnd *cmd)
+{
+	struct esp *esp = host_to_esp(cmd->device->host);
+	struct esp_cmd_entry *ent, *tmp;
+	struct completion eh_done;
+	unsigned long flags;
+
+	/* XXX This helps a lot with debugging but might be a bit
+	 * XXX much for the final driver.
+	 */
+	spin_lock_irqsave(esp->host->host_lock, flags);
+	printk(KERN_ERR PFX "esp%d: Aborting command [%p:%02x]\n",
+	       esp->host->unique_id, cmd, cmd->cmnd[0]);
+	ent = esp->active_cmd;
+	if (ent)
+		printk(KERN_ERR PFX "esp%d: Current command [%p:%02x]\n",
+		       esp->host->unique_id, ent->cmd, ent->cmd->cmnd[0]);
+	list_for_each_entry(ent, &esp->queued_cmds, list) {
+		printk(KERN_ERR PFX "esp%d: Queued command [%p:%02x]\n",
+		       esp->host->unique_id, ent->cmd, ent->cmd->cmnd[0]);
+	}
+	list_for_each_entry(ent, &esp->active_cmds, list) {
+		printk(KERN_ERR PFX "esp%d: Active command [%p:%02x]\n",
+		       esp->host->unique_id, ent->cmd, ent->cmd->cmnd[0]);
+	}
+	esp_dump_cmd_log(esp);
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+	spin_lock_irqsave(esp->host->host_lock, flags);
+
+	ent = NULL;
+	list_for_each_entry(tmp, &esp->queued_cmds, list) {
+		if (tmp->cmd == cmd) {
+			ent = tmp;
+			break;
+		}
+	}
+
+	if (ent) {
+		/* Easiest case, we didn't even issue the command
+		 * yet so it is trivial to abort.
+		 */
+		list_del(&ent->list);
+
+		cmd->result = DID_ABORT << 16;
+		cmd->scsi_done(cmd);
+
+		esp_put_ent(esp, ent);
+
+		goto out_success;
+	}
+
+	init_completion(&eh_done);
+
+	ent = esp->active_cmd;
+	if (ent && ent->cmd == cmd) {
+		/* Command is the currently active command on
+		 * the bus.  If we already have an output message
+		 * pending, no dice.
+		 */
+		if (esp->msg_out_len)
+			goto out_failure;
+
+		/* Send out an abort, encouraging the target to
+		 * go to MSGOUT phase by asserting ATN.
+		 */
+		esp->msg_out[0] = ABORT_TASK_SET;
+		esp->msg_out_len = 1;
+		ent->eh_done = &eh_done;
+
+		scsi_esp_cmd(esp, ESP_CMD_SATN);
+	} else {
+		/* The command is disconnected.  This is not easy to
+		 * abort.  For now we fail and let the scsi error
+		 * handling layer go try a scsi bus reset or host
+		 * reset.
+		 *
+		 * What we could do is put together a scsi command
+		 * solely for the purpose of sending an abort message
+		 * to the target.  Coming up with all the code to
+		 * cook up scsi commands, special case them everywhere,
+		 * etc. is for questionable gain and it would be better
+		 * if the generic scsi error handling layer could do at
+		 * least some of that for us.
+		 *
+		 * Anyways this is an area for potential future improvement
+		 * in this driver.
+		 */
+		goto out_failure;
+	}
+
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+	if (!wait_for_completion_timeout(&eh_done, 5 * HZ)) {
+		spin_lock_irqsave(esp->host->host_lock, flags);
+		ent->eh_done = NULL;
+		spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+		return FAILED;
+	}
+
+	return SUCCESS;
+
+out_success:
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+	return SUCCESS;
+
+out_failure:
+	/* XXX This might be a good location to set ESP_TGT_BROKEN
+	 * XXX since we know which target/lun in particular is
+	 * XXX causing trouble.
+	 */
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+	return FAILED;
+}
+
+static int esp_eh_bus_reset_handler(struct scsi_cmnd *cmd)
+{
+	struct esp *esp = host_to_esp(cmd->device->host);
+	struct completion eh_reset;
+	unsigned long flags;
+
+	init_completion(&eh_reset);
+
+	spin_lock_irqsave(esp->host->host_lock, flags);
+
+	esp->eh_reset = &eh_reset;
+
+	/* XXX This is too simple... We should add lots of
+	 * XXX checks here so that if we find that the chip is
+	 * XXX very wedged we return failure immediately so
+	 * XXX that we can perform a full chip reset.
+	 */
+	esp->flags |= ESP_FLAG_RESETTING;
+	scsi_esp_cmd(esp, ESP_CMD_RS);
+
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+	ssleep(esp_bus_reset_settle);
+
+	if (!wait_for_completion_timeout(&eh_reset, 5 * HZ)) {
+		spin_lock_irqsave(esp->host->host_lock, flags);
+		esp->eh_reset = NULL;
+		spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+		return FAILED;
+	}
+
+	return SUCCESS;
+}
+
+/* All bets are off, reset the entire device.  */
+static int esp_eh_host_reset_handler(struct scsi_cmnd *cmd)
+{
+	struct esp *esp = host_to_esp(cmd->device->host);
+	unsigned long flags;
+
+	spin_lock_irqsave(esp->host->host_lock, flags);
+	esp_bootup_reset(esp);
+	esp_reset_cleanup(esp);
+	spin_unlock_irqrestore(esp->host->host_lock, flags);
+
+	ssleep(esp_bus_reset_settle);
+
+	return SUCCESS;
+}
+
+static const char *esp_info(struct Scsi_Host *host)
+{
+	return "esp";
+}
+
+struct scsi_host_template scsi_esp_template = {
+	.module			= THIS_MODULE,
+	.name			= "esp",
+	.info			= esp_info,
+	.queuecommand		= esp_queuecommand,
+	.slave_alloc		= esp_slave_alloc,
+	.slave_configure	= esp_slave_configure,
+	.slave_destroy		= esp_slave_destroy,
+	.eh_abort_handler	= esp_eh_abort_handler,
+	.eh_bus_reset_handler	= esp_eh_bus_reset_handler,
+	.eh_host_reset_handler	= esp_eh_host_reset_handler,
+	.can_queue		= 7,
+	.this_id		= 7,
+	.sg_tablesize		= SG_ALL,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.max_sectors		= 0xffff,
+	.skip_settle_delay	= 1,
+};
+EXPORT_SYMBOL(scsi_esp_template);
+
+static void esp_get_signalling(struct Scsi_Host *host)
+{
+	struct esp *esp = host_to_esp(host);
+	enum spi_signal_type type;
+
+	if (esp->flags & ESP_FLAG_DIFFERENTIAL)
+		type = SPI_SIGNAL_HVD;
+	else
+		type = SPI_SIGNAL_SE;
+
+	spi_signalling(host) = type;
+}
+
+static void esp_set_offset(struct scsi_target *target, int offset)
+{
+	struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+	struct esp *esp = host_to_esp(host);
+	struct esp_target_data *tp = &esp->target[target->id];
+
+	tp->nego_goal_offset = offset;
+	tp->flags |= ESP_TGT_CHECK_NEGO;
+}
+
+static void esp_set_period(struct scsi_target *target, int period)
+{
+	struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+	struct esp *esp = host_to_esp(host);
+	struct esp_target_data *tp = &esp->target[target->id];
+
+	tp->nego_goal_period = period;
+	tp->flags |= ESP_TGT_CHECK_NEGO;
+}
+
+static void esp_set_width(struct scsi_target *target, int width)
+{
+	struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+	struct esp *esp = host_to_esp(host);
+	struct esp_target_data *tp = &esp->target[target->id];
+
+	tp->nego_goal_width = (width ? 1 : 0);
+	tp->flags |= ESP_TGT_CHECK_NEGO;
+}
+
+static struct spi_function_template esp_transport_ops = {
+	.set_offset		= esp_set_offset,
+	.show_offset		= 1,
+	.set_period		= esp_set_period,
+	.show_period		= 1,
+	.set_width		= esp_set_width,
+	.show_width		= 1,
+	.get_signalling		= esp_get_signalling,
+};
+
+static int __init esp_init(void)
+{
+	BUILD_BUG_ON(sizeof(struct scsi_pointer) <
+		     sizeof(struct esp_cmd_priv));
+
+	esp_transport_template = spi_attach_transport(&esp_transport_ops);
+	if (!esp_transport_template)
+		return -ENODEV;
+
+	return 0;
+}
+
+static void __exit esp_exit(void)
+{
+	spi_release_transport(esp_transport_template);
+}
+
+MODULE_DESCRIPTION("ESP SCSI driver core");
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_param(esp_bus_reset_settle, int, 0);
+MODULE_PARM_DESC(esp_bus_reset_settle,
+		 "ESP scsi bus reset delay in seconds");
+
+module_param(esp_debug, int, 0);
+MODULE_PARM_DESC(esp_debug,
+"ESP bitmapped debugging message enable value:\n"
+"	0x00000001	Log interrupt events\n"
+"	0x00000002	Log scsi commands\n"
+"	0x00000004	Log resets\n"
+"	0x00000008	Log message in events\n"
+"	0x00000010	Log message out events\n"
+"	0x00000020	Log command completion\n"
+"	0x00000040	Log disconnects\n"
+"	0x00000080	Log data start\n"
+"	0x00000100	Log data done\n"
+"	0x00000200	Log reconnects\n"
+"	0x00000400	Log auto-sense data\n"
+);
+
+module_init(esp_init);
+module_exit(esp_exit);
diff --git a/drivers/scsi/esp_scsi.h b/drivers/scsi/esp_scsi.h
new file mode 100644
index 00000000000..8d4a6690401
--- /dev/null
+++ b/drivers/scsi/esp_scsi.h
@@ -0,0 +1,560 @@
+/* esp_scsi.h: Defines and structures for the ESP drier.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _ESP_SCSI_H
+#define _ESP_SCSI_H
+
+					/* Access    Description      Offset */
+#define ESP_TCLOW	0x00UL		/* rw  Low bits transfer count 0x00  */
+#define ESP_TCMED	0x01UL		/* rw  Mid bits transfer count 0x04  */
+#define ESP_FDATA	0x02UL		/* rw  FIFO data bits          0x08  */
+#define ESP_CMD		0x03UL		/* rw  SCSI command bits       0x0c  */
+#define ESP_STATUS	0x04UL		/* ro  ESP status register     0x10  */
+#define ESP_BUSID	ESP_STATUS	/* wo  BusID for sel/resel     0x10  */
+#define ESP_INTRPT	0x05UL		/* ro  Kind of interrupt       0x14  */
+#define ESP_TIMEO	ESP_INTRPT	/* wo  Timeout for sel/resel   0x14  */
+#define ESP_SSTEP	0x06UL		/* ro  Sequence step register  0x18  */
+#define ESP_STP		ESP_SSTEP	/* wo  Transfer period/sync    0x18  */
+#define ESP_FFLAGS	0x07UL		/* ro  Bits current FIFO info  0x1c  */
+#define ESP_SOFF	ESP_FFLAGS	/* wo  Sync offset             0x1c  */
+#define ESP_CFG1	0x08UL		/* rw  First cfg register      0x20  */
+#define ESP_CFACT	0x09UL		/* wo  Clock conv factor       0x24  */
+#define ESP_STATUS2	ESP_CFACT	/* ro  HME status2 register    0x24  */
+#define ESP_CTEST	0x0aUL		/* wo  Chip test register      0x28  */
+#define ESP_CFG2	0x0bUL		/* rw  Second cfg register     0x2c  */
+#define ESP_CFG3	0x0cUL		/* rw  Third cfg register      0x30  */
+#define ESP_TCHI	0x0eUL		/* rw  High bits transf count  0x38  */
+#define ESP_UID		ESP_TCHI	/* ro  Unique ID code          0x38  */
+#define FAS_RLO		ESP_TCHI	/* rw  HME extended counter    0x38  */
+#define ESP_FGRND	0x0fUL		/* rw  Data base for fifo      0x3c  */
+#define FAS_RHI		ESP_FGRND	/* rw  HME extended counter    0x3c  */
+
+#define SBUS_ESP_REG_SIZE	0x40UL
+
+/* Bitfield meanings for the above registers. */
+
+/* ESP config reg 1, read-write, found on all ESP chips */
+#define ESP_CONFIG1_ID        0x07      /* My BUS ID bits */
+#define ESP_CONFIG1_CHTEST    0x08      /* Enable ESP chip tests */
+#define ESP_CONFIG1_PENABLE   0x10      /* Enable parity checks */
+#define ESP_CONFIG1_PARTEST   0x20      /* Parity test mode enabled? */
+#define ESP_CONFIG1_SRRDISAB  0x40      /* Disable SCSI reset reports */
+#define ESP_CONFIG1_SLCABLE   0x80      /* Enable slow cable mode */
+
+/* ESP config reg 2, read-write, found only on esp100a+esp200+esp236 chips */
+#define ESP_CONFIG2_DMAPARITY 0x01      /* enable DMA Parity (200,236) */
+#define ESP_CONFIG2_REGPARITY 0x02      /* enable reg Parity (200,236) */
+#define ESP_CONFIG2_BADPARITY 0x04      /* Bad parity target abort  */
+#define ESP_CONFIG2_SCSI2ENAB 0x08      /* Enable SCSI-2 features (tgtmode) */
+#define ESP_CONFIG2_HI        0x10      /* High Impedance DREQ ???  */
+#define ESP_CONFIG2_HMEFENAB  0x10      /* HME features enable */
+#define ESP_CONFIG2_BCM       0x20      /* Enable byte-ctrl (236)   */
+#define ESP_CONFIG2_DISPINT   0x20      /* Disable pause irq (hme) */
+#define ESP_CONFIG2_FENAB     0x40      /* Enable features (fas100,216) */
+#define ESP_CONFIG2_SPL       0x40      /* Enable status-phase latch (236) */
+#define ESP_CONFIG2_MKDONE    0x40      /* HME magic feature */
+#define ESP_CONFIG2_HME32     0x80      /* HME 32 extended */
+#define ESP_CONFIG2_MAGIC     0xe0      /* Invalid bits... */
+
+/* ESP config register 3 read-write, found only esp236+fas236+fas100a+hme chips */
+#define ESP_CONFIG3_FCLOCK    0x01     /* FAST SCSI clock rate (esp100a/hme) */
+#define ESP_CONFIG3_TEM       0x01     /* Enable thresh-8 mode (esp/fas236)  */
+#define ESP_CONFIG3_FAST      0x02     /* Enable FAST SCSI     (esp100a/hme) */
+#define ESP_CONFIG3_ADMA      0x02     /* Enable alternate-dma (esp/fas236)  */
+#define ESP_CONFIG3_TENB      0x04     /* group2 SCSI2 support (esp100a/hme) */
+#define ESP_CONFIG3_SRB       0x04     /* Save residual byte   (esp/fas236)  */
+#define ESP_CONFIG3_TMS       0x08     /* Three-byte msg's ok  (esp100a/hme) */
+#define ESP_CONFIG3_FCLK      0x08     /* Fast SCSI clock rate (esp/fas236)  */
+#define ESP_CONFIG3_IDMSG     0x10     /* ID message checking  (esp100a/hme) */
+#define ESP_CONFIG3_FSCSI     0x10     /* Enable FAST SCSI     (esp/fas236)  */
+#define ESP_CONFIG3_GTM       0x20     /* group2 SCSI2 support (esp/fas236)  */
+#define ESP_CONFIG3_IDBIT3    0x20     /* Bit 3 of HME SCSI-ID (hme)         */
+#define ESP_CONFIG3_TBMS      0x40     /* Three-byte msg's ok  (esp/fas236)  */
+#define ESP_CONFIG3_EWIDE     0x40     /* Enable Wide-SCSI     (hme)         */
+#define ESP_CONFIG3_IMS       0x80     /* ID msg chk'ng        (esp/fas236)  */
+#define ESP_CONFIG3_OBPUSH    0x80     /* Push odd-byte to dma (hme)         */
+
+/* ESP command register read-write */
+/* Group 1 commands:  These may be sent at any point in time to the ESP
+ *                    chip.  None of them can generate interrupts 'cept
+ *                    the "SCSI bus reset" command if you have not disabled
+ *                    SCSI reset interrupts in the config1 ESP register.
+ */
+#define ESP_CMD_NULL          0x00     /* Null command, ie. a nop */
+#define ESP_CMD_FLUSH         0x01     /* FIFO Flush */
+#define ESP_CMD_RC            0x02     /* Chip reset */
+#define ESP_CMD_RS            0x03     /* SCSI bus reset */
+
+/* Group 2 commands:  ESP must be an initiator and connected to a target
+ *                    for these commands to work.
+ */
+#define ESP_CMD_TI            0x10     /* Transfer Information */
+#define ESP_CMD_ICCSEQ        0x11     /* Initiator cmd complete sequence */
+#define ESP_CMD_MOK           0x12     /* Message okie-dokie */
+#define ESP_CMD_TPAD          0x18     /* Transfer Pad */
+#define ESP_CMD_SATN          0x1a     /* Set ATN */
+#define ESP_CMD_RATN          0x1b     /* De-assert ATN */
+
+/* Group 3 commands:  ESP must be in the MSGOUT or MSGIN state and be connected
+ *                    to a target as the initiator for these commands to work.
+ */
+#define ESP_CMD_SMSG          0x20     /* Send message */
+#define ESP_CMD_SSTAT         0x21     /* Send status */
+#define ESP_CMD_SDATA         0x22     /* Send data */
+#define ESP_CMD_DSEQ          0x23     /* Discontinue Sequence */
+#define ESP_CMD_TSEQ          0x24     /* Terminate Sequence */
+#define ESP_CMD_TCCSEQ        0x25     /* Target cmd cmplt sequence */
+#define ESP_CMD_DCNCT         0x27     /* Disconnect */
+#define ESP_CMD_RMSG          0x28     /* Receive Message */
+#define ESP_CMD_RCMD          0x29     /* Receive Command */
+#define ESP_CMD_RDATA         0x2a     /* Receive Data */
+#define ESP_CMD_RCSEQ         0x2b     /* Receive cmd sequence */
+
+/* Group 4 commands:  The ESP must be in the disconnected state and must
+ *                    not be connected to any targets as initiator for
+ *                    these commands to work.
+ */
+#define ESP_CMD_RSEL          0x40     /* Reselect */
+#define ESP_CMD_SEL           0x41     /* Select w/o ATN */
+#define ESP_CMD_SELA          0x42     /* Select w/ATN */
+#define ESP_CMD_SELAS         0x43     /* Select w/ATN & STOP */
+#define ESP_CMD_ESEL          0x44     /* Enable selection */
+#define ESP_CMD_DSEL          0x45     /* Disable selections */
+#define ESP_CMD_SA3           0x46     /* Select w/ATN3 */
+#define ESP_CMD_RSEL3         0x47     /* Reselect3 */
+
+/* This bit enables the ESP's DMA on the SBus */
+#define ESP_CMD_DMA           0x80     /* Do DMA? */
+
+/* ESP status register read-only */
+#define ESP_STAT_PIO          0x01     /* IO phase bit */
+#define ESP_STAT_PCD          0x02     /* CD phase bit */
+#define ESP_STAT_PMSG         0x04     /* MSG phase bit */
+#define ESP_STAT_PMASK        0x07     /* Mask of phase bits */
+#define ESP_STAT_TDONE        0x08     /* Transfer Completed */
+#define ESP_STAT_TCNT         0x10     /* Transfer Counter Is Zero */
+#define ESP_STAT_PERR         0x20     /* Parity error */
+#define ESP_STAT_SPAM         0x40     /* Real bad error */
+/* This indicates the 'interrupt pending' condition on esp236, it is a reserved
+ * bit on other revs of the ESP.
+ */
+#define ESP_STAT_INTR         0x80             /* Interrupt */
+
+/* The status register can be masked with ESP_STAT_PMASK and compared
+ * with the following values to determine the current phase the ESP
+ * (at least thinks it) is in.  For our purposes we also add our own
+ * software 'done' bit for our phase management engine.
+ */
+#define ESP_DOP   (0)                                       /* Data Out  */
+#define ESP_DIP   (ESP_STAT_PIO)                            /* Data In   */
+#define ESP_CMDP  (ESP_STAT_PCD)                            /* Command   */
+#define ESP_STATP (ESP_STAT_PCD|ESP_STAT_PIO)               /* Status    */
+#define ESP_MOP   (ESP_STAT_PMSG|ESP_STAT_PCD)              /* Message Out */
+#define ESP_MIP   (ESP_STAT_PMSG|ESP_STAT_PCD|ESP_STAT_PIO) /* Message In */
+
+/* HME only: status 2 register */
+#define ESP_STAT2_SCHBIT      0x01 /* Upper bits 3-7 of sstep enabled */
+#define ESP_STAT2_FFLAGS      0x02 /* The fifo flags are now latched */
+#define ESP_STAT2_XCNT        0x04 /* The transfer counter is latched */
+#define ESP_STAT2_CREGA       0x08 /* The command reg is active now */
+#define ESP_STAT2_WIDE        0x10 /* Interface on this adapter is wide */
+#define ESP_STAT2_F1BYTE      0x20 /* There is one byte at top of fifo */
+#define ESP_STAT2_FMSB        0x40 /* Next byte in fifo is most significant */
+#define ESP_STAT2_FEMPTY      0x80 /* FIFO is empty */
+
+/* ESP interrupt register read-only */
+#define ESP_INTR_S            0x01     /* Select w/o ATN */
+#define ESP_INTR_SATN         0x02     /* Select w/ATN */
+#define ESP_INTR_RSEL         0x04     /* Reselected */
+#define ESP_INTR_FDONE        0x08     /* Function done */
+#define ESP_INTR_BSERV        0x10     /* Bus service */
+#define ESP_INTR_DC           0x20     /* Disconnect */
+#define ESP_INTR_IC           0x40     /* Illegal command given */
+#define ESP_INTR_SR           0x80     /* SCSI bus reset detected */
+
+/* ESP sequence step register read-only */
+#define ESP_STEP_VBITS        0x07     /* Valid bits */
+#define ESP_STEP_ASEL         0x00     /* Selection&Arbitrate cmplt */
+#define ESP_STEP_SID          0x01     /* One msg byte sent */
+#define ESP_STEP_NCMD         0x02     /* Was not in command phase */
+#define ESP_STEP_PPC          0x03     /* Early phase chg caused cmnd
+                                        * bytes to be lost
+                                        */
+#define ESP_STEP_FINI4        0x04     /* Command was sent ok */
+
+/* Ho hum, some ESP's set the step register to this as well... */
+#define ESP_STEP_FINI5        0x05
+#define ESP_STEP_FINI6        0x06
+#define ESP_STEP_FINI7        0x07
+
+/* ESP chip-test register read-write */
+#define ESP_TEST_TARG         0x01     /* Target test mode */
+#define ESP_TEST_INI          0x02     /* Initiator test mode */
+#define ESP_TEST_TS           0x04     /* Tristate test mode */
+
+/* ESP unique ID register read-only, found on fas236+fas100a only */
+#define ESP_UID_F100A         0x00     /* ESP FAS100A  */
+#define ESP_UID_F236          0x02     /* ESP FAS236   */
+#define ESP_UID_REV           0x07     /* ESP revision */
+#define ESP_UID_FAM           0xf8     /* ESP family   */
+
+/* ESP fifo flags register read-only */
+/* Note that the following implies a 16 byte FIFO on the ESP. */
+#define ESP_FF_FBYTES         0x1f     /* Num bytes in FIFO */
+#define ESP_FF_ONOTZERO       0x20     /* offset ctr not zero (esp100) */
+#define ESP_FF_SSTEP          0xe0     /* Sequence step */
+
+/* ESP clock conversion factor register write-only */
+#define ESP_CCF_F0            0x00     /* 35.01MHz - 40MHz */
+#define ESP_CCF_NEVER         0x01     /* Set it to this and die */
+#define ESP_CCF_F2            0x02     /* 10MHz */
+#define ESP_CCF_F3            0x03     /* 10.01MHz - 15MHz */
+#define ESP_CCF_F4            0x04     /* 15.01MHz - 20MHz */
+#define ESP_CCF_F5            0x05     /* 20.01MHz - 25MHz */
+#define ESP_CCF_F6            0x06     /* 25.01MHz - 30MHz */
+#define ESP_CCF_F7            0x07     /* 30.01MHz - 35MHz */
+
+/* HME only... */
+#define ESP_BUSID_RESELID     0x10
+#define ESP_BUSID_CTR32BIT    0x40
+
+#define ESP_BUS_TIMEOUT        250     /* In milli-seconds */
+#define ESP_TIMEO_CONST       8192
+#define ESP_NEG_DEFP(mhz, cfact) \
+        ((ESP_BUS_TIMEOUT * ((mhz) / 1000)) / (8192 * (cfact)))
+#define ESP_MHZ_TO_CYCLE(mhertz)  ((1000000000) / ((mhertz) / 1000))
+#define ESP_TICK(ccf, cycle)  ((7682 * (ccf) * (cycle) / 1000))
+
+/* For slow to medium speed input clock rates we shoot for 5mb/s, but for high
+ * input clock rates we try to do 10mb/s although I don't think a transfer can
+ * even run that fast with an ESP even with DMA2 scatter gather pipelining.
+ */
+#define SYNC_DEFP_SLOW            0x32   /* 5mb/s  */
+#define SYNC_DEFP_FAST            0x19   /* 10mb/s */
+
+struct esp_cmd_priv {
+	union {
+		dma_addr_t	dma_addr;
+		int		num_sg;
+	} u;
+
+	unsigned int		cur_residue;
+	struct scatterlist	*cur_sg;
+	unsigned int		tot_residue;
+};
+#define ESP_CMD_PRIV(CMD)	((struct esp_cmd_priv *)(&(CMD)->SCp))
+
+enum esp_rev {
+	ESP100     = 0x00,  /* NCR53C90 - very broken */
+	ESP100A    = 0x01,  /* NCR53C90A */
+	ESP236     = 0x02,
+	FAS236     = 0x03,
+	FAS100A    = 0x04,
+	FAST       = 0x05,
+	FASHME     = 0x06,
+};
+
+struct esp_cmd_entry {
+	struct list_head	list;
+
+	struct scsi_cmnd	*cmd;
+
+	unsigned int		saved_cur_residue;
+	struct scatterlist	*saved_cur_sg;
+	unsigned int		saved_tot_residue;
+
+	u8			flags;
+#define ESP_CMD_FLAG_WRITE	0x01 /* DMA is a write */
+#define ESP_CMD_FLAG_ABORT	0x02 /* being aborted */
+#define ESP_CMD_FLAG_AUTOSENSE	0x04 /* Doing automatic REQUEST_SENSE */
+
+	u8			tag[2];
+
+	u8			status;
+	u8			message;
+
+	unsigned char		*sense_ptr;
+	unsigned char		*saved_sense_ptr;
+	dma_addr_t		sense_dma;
+
+	struct completion	*eh_done;
+};
+
+/* XXX make this configurable somehow XXX */
+#define ESP_DEFAULT_TAGS	16
+
+#define ESP_MAX_TARGET		16
+#define ESP_MAX_LUN		8
+#define ESP_MAX_TAG		256
+
+struct esp_lun_data {
+	struct esp_cmd_entry	*non_tagged_cmd;
+	int			num_tagged;
+	int			hold;
+	struct esp_cmd_entry	*tagged_cmds[ESP_MAX_TAG];
+};
+
+struct esp_target_data {
+	/* These are the ESP_STP, ESP_SOFF, and ESP_CFG3 register values which
+	 * match the currently negotiated settings for this target.  The SCSI
+	 * protocol values are maintained in spi_{offset,period,wide}(starget).
+	 */
+	u8			esp_period;
+	u8			esp_offset;
+	u8			esp_config3;
+
+	u8			flags;
+#define ESP_TGT_WIDE		0x01
+#define ESP_TGT_DISCONNECT	0x02
+#define ESP_TGT_NEGO_WIDE	0x04
+#define ESP_TGT_NEGO_SYNC	0x08
+#define ESP_TGT_CHECK_NEGO	0x40
+#define ESP_TGT_BROKEN		0x80
+
+	/* When ESP_TGT_CHECK_NEGO is set, on the next scsi command to this
+	 * device we will try to negotiate the following parameters.
+	 */
+	u8			nego_goal_period;
+	u8			nego_goal_offset;
+	u8			nego_goal_width;
+	u8			nego_goal_tags;
+
+	struct scsi_target	*starget;
+};
+
+struct esp_event_ent {
+	u8			type;
+#define ESP_EVENT_TYPE_EVENT	0x01
+#define ESP_EVENT_TYPE_CMD	0x02
+	u8			val;
+
+	u8			sreg;
+	u8			seqreg;
+	u8			sreg2;
+	u8			ireg;
+	u8			select_state;
+	u8			event;
+	u8			__pad;
+};
+
+struct esp;
+struct esp_driver_ops {
+	/* Read and write the ESP 8-bit registers.  On some
+	 * applications of the ESP chip the registers are at 4-byte
+	 * instead of 1-byte intervals.
+	 */
+	void (*esp_write8)(struct esp *esp, u8 val, unsigned long reg);
+	u8 (*esp_read8)(struct esp *esp, unsigned long reg);
+
+	/* Map and unmap DMA memory.  Eventually the driver will be
+	 * converted to the generic DMA API as soon as SBUS is able to
+	 * cope with that.  At such time we can remove this.
+	 */
+	dma_addr_t (*map_single)(struct esp *esp, void *buf,
+				 size_t sz, int dir);
+	int (*map_sg)(struct esp *esp, struct scatterlist *sg,
+		      int num_sg, int dir);
+	void (*unmap_single)(struct esp *esp, dma_addr_t addr,
+			     size_t sz, int dir);
+	void (*unmap_sg)(struct esp *esp, struct scatterlist *sg,
+			 int num_sg, int dir);
+
+	/* Return non-zero if there is an IRQ pending.  Usually this
+	 * status bit lives in the DMA controller sitting in front of
+	 * the ESP.  This has to be accurate or else the ESP interrupt
+	 * handler will not run.
+	 */
+	int (*irq_pending)(struct esp *esp);
+
+	/* Reset the DMA engine entirely.  On return, ESP interrupts
+	 * should be enabled.  Often the interrupt enabling is
+	 * controlled in the DMA engine.
+	 */
+	void (*reset_dma)(struct esp *esp);
+
+	/* Drain any pending DMA in the DMA engine after a transfer.
+	 * This is for writes to memory.
+	 */
+	void (*dma_drain)(struct esp *esp);
+
+	/* Invalidate the DMA engine after a DMA transfer.  */
+	void (*dma_invalidate)(struct esp *esp);
+
+	/* Setup an ESP command that will use a DMA transfer.
+	 * The 'esp_count' specifies what transfer length should be
+	 * programmed into the ESP transfer counter registers, whereas
+	 * the 'dma_count' is the length that should be programmed into
+	 * the DMA controller.  Usually they are the same.  If 'write'
+	 * is non-zero, this transfer is a write into memory.  'cmd'
+	 * holds the ESP command that should be issued by calling
+	 * scsi_esp_cmd() at the appropriate time while programming
+	 * the DMA hardware.
+	 */
+	void (*send_dma_cmd)(struct esp *esp, u32 dma_addr, u32 esp_count,
+			     u32 dma_count, int write, u8 cmd);
+
+	/* Return non-zero if the DMA engine is reporting an error
+	 * currently.
+	 */
+	int (*dma_error)(struct esp *esp);
+};
+
+#define ESP_MAX_MSG_SZ		8
+#define ESP_EVENT_LOG_SZ	32
+
+#define ESP_QUICKIRQ_LIMIT	100
+#define ESP_RESELECT_TAG_LIMIT	2500
+
+struct esp {
+	void __iomem		*regs;
+	void __iomem		*dma_regs;
+
+	const struct esp_driver_ops *ops;
+
+	struct Scsi_Host	*host;
+	void			*dev;
+
+	struct esp_cmd_entry	*active_cmd;
+
+	struct list_head	queued_cmds;
+	struct list_head	active_cmds;
+
+	u8			*command_block;
+	dma_addr_t		command_block_dma;
+
+	unsigned int		data_dma_len;
+
+	/* The following are used to determine the cause of an IRQ. Upon every
+	 * IRQ entry we synchronize these with the hardware registers.
+	 */
+	u8			sreg;
+	u8			seqreg;
+	u8			sreg2;
+	u8			ireg;
+
+	u32			prev_hme_dmacsr;
+	u8			prev_soff;
+	u8			prev_stp;
+	u8			prev_cfg3;
+	u8			__pad;
+
+	struct list_head	esp_cmd_pool;
+
+	struct esp_target_data	target[ESP_MAX_TARGET];
+
+	int			fifo_cnt;
+	u8			fifo[16];
+
+	struct esp_event_ent	esp_event_log[ESP_EVENT_LOG_SZ];
+	int			esp_event_cur;
+
+	u8			msg_out[ESP_MAX_MSG_SZ];
+	int			msg_out_len;
+
+	u8			msg_in[ESP_MAX_MSG_SZ];
+	int			msg_in_len;
+
+	u8			bursts;
+	u8			config1;
+	u8			config2;
+
+	u8			scsi_id;
+	u32			scsi_id_mask;
+
+	enum esp_rev		rev;
+
+	u32			flags;
+#define ESP_FLAG_DIFFERENTIAL	0x00000001
+#define ESP_FLAG_RESETTING	0x00000002
+#define ESP_FLAG_DOING_SLOWCMD	0x00000004
+#define ESP_FLAG_WIDE_CAPABLE	0x00000008
+#define ESP_FLAG_QUICKIRQ_CHECK	0x00000010
+
+	u8			select_state;
+#define ESP_SELECT_NONE		0x00 /* Not selecting */
+#define ESP_SELECT_BASIC	0x01 /* Select w/o MSGOUT phase */
+#define ESP_SELECT_MSGOUT	0x02 /* Select with MSGOUT */
+
+	/* When we are not selecting, we are expecting an event.  */
+	u8			event;
+#define ESP_EVENT_NONE		0x00
+#define ESP_EVENT_CMD_START	0x01
+#define ESP_EVENT_CMD_DONE	0x02
+#define ESP_EVENT_DATA_IN	0x03
+#define ESP_EVENT_DATA_OUT	0x04
+#define ESP_EVENT_DATA_DONE	0x05
+#define ESP_EVENT_MSGIN		0x06
+#define ESP_EVENT_MSGIN_MORE	0x07
+#define ESP_EVENT_MSGIN_DONE	0x08
+#define ESP_EVENT_MSGOUT	0x09
+#define ESP_EVENT_MSGOUT_DONE	0x0a
+#define ESP_EVENT_STATUS	0x0b
+#define ESP_EVENT_FREE_BUS	0x0c
+#define ESP_EVENT_CHECK_PHASE	0x0d
+#define ESP_EVENT_RESET		0x10
+
+	/* Probed in esp_get_clock_params() */
+	u32			cfact;
+	u32			cfreq;
+	u32			ccycle;
+	u32			ctick;
+	u32			neg_defp;
+	u32			sync_defp;
+
+	/* Computed in esp_reset_esp() */
+	u32			max_period;
+	u32			min_period;
+	u32			radelay;
+
+	/* Slow command state.  */
+	u8			*cmd_bytes_ptr;
+	int			cmd_bytes_left;
+
+	struct completion	*eh_reset;
+
+	struct sbus_dma		*dma;
+};
+
+#define host_to_esp(host)	((struct esp *)(host)->hostdata)
+
+/* A front-end driver for the ESP chip should do the following in
+ * it's device probe routine:
+ * 1) Allocate the host and private area using scsi_host_alloc()
+ *    with size 'sizeof(struct esp)'.  The first argument to
+ *    scsi_host_alloc() should be &scsi_esp_template.
+ * 2) Set host->max_id as appropriate.
+ * 3) Set esp->host to the scsi_host itself, and esp->dev
+ *    to the device object pointer.
+ * 4) Hook up esp->ops to the front-end implementation.
+ * 5) If the ESP chip supports wide transfers, set ESP_FLAG_WIDE_CAPABLE
+ *    in esp->flags.
+ * 6) Map the DMA and ESP chip registers.
+ * 7) DMA map the ESP command block, store the DMA address
+ *    in esp->command_block_dma.
+ * 8) Register the scsi_esp_intr() interrupt handler.
+ * 9) Probe for and provide the following chip properties:
+ *    esp->scsi_id (assign to esp->host->this_id too)
+ *    esp->scsi_id_mask
+ *    If ESP bus is differential, set ESP_FLAG_DIFFERENTIAL
+ *    esp->cfreq
+ *    DMA burst bit mask in esp->bursts, if necessary
+ * 10) Perform any actions necessary before the ESP device can
+ *     be programmed for the first time.  On some configs, for
+ *     example, the DMA engine has to be reset before ESP can
+ *     be programmed.
+ * 11) If necessary, call dev_set_drvdata() as needed.
+ * 12) Call scsi_esp_register() with prepared 'esp' structure
+ *     and a device pointer if possible.
+ * 13) Check scsi_esp_register() return value, release all resources
+ *     if an error was returned.
+ */
+extern struct scsi_host_template scsi_esp_template;
+extern int scsi_esp_register(struct esp *, struct device *);
+
+extern void scsi_esp_unregister(struct esp *);
+extern irqreturn_t scsi_esp_intr(int, void *);
+extern void scsi_esp_cmd(struct esp *, u8);
+
+#endif /* !(_ESP_SCSI_H) */
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index 9f10689905a..c4195ea869e 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -1403,7 +1403,7 @@ static int __devinit qpti_sbus_probe(struct of_device *dev, const struct of_devi
 	struct scsi_host_template *tpnt = match->data;
 	struct Scsi_Host *host;
 	struct qlogicpti *qpti;
-	char *fcode;
+	const char *fcode;
 
 	/* Sometimes Antares cards come up not completely
 	 * setup, and we get a report of a zero IRQ.
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 1b59b27e887..4bf9aa547c7 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -50,7 +50,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
 	while (skb->len >= NLMSG_SPACE(0)) {
 		err = 0;
 
-		nlh = (struct nlmsghdr *) skb->data;
+		nlh = nlmsg_hdr(skb);
 		if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*hdr))) ||
 		    (skb->len < nlh->nlmsg_len)) {
 			printk(KERN_WARNING "%s: discarding partial skb\n",
@@ -168,7 +168,8 @@ scsi_netlink_init(void)
 	}
 
 	scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT,
-				SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE);
+				SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL,
+				THIS_MODULE);
 	if (!scsi_nl_sock) {
 		printk(KERN_ERR "%s: register of recieve handler failed\n",
 				__FUNCTION__);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index ce0d14af33c..aabaa0576ab 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1081,7 +1081,7 @@ iscsi_if_rx(struct sock *sk, int len)
 			struct nlmsghdr	*nlh;
 			struct iscsi_uevent *ev;
 
-			nlh = (struct nlmsghdr *)skb->data;
+			nlh = nlmsg_hdr(skb);
 			if (nlh->nlmsg_len < sizeof(*nlh) ||
 			    skb->len < nlh->nlmsg_len) {
 				break;
@@ -1435,7 +1435,7 @@ static __init int iscsi_transport_init(void)
 	if (err)
 		goto unregister_conn_class;
 
-	nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx,
+	nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL,
 			THIS_MODULE);
 	if (!nls) {
 		err = -ENOBUFS;
diff --git a/drivers/scsi/sun_esp.c b/drivers/scsi/sun_esp.c
new file mode 100644
index 00000000000..8c766bcd109
--- /dev/null
+++ b/drivers/scsi/sun_esp.c
@@ -0,0 +1,634 @@
+/* sun_esp.c: ESP front-end for Sparc SBUS systems.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/dma.h>
+
+#include <asm/sbus.h>
+
+#include <scsi/scsi_host.h>
+
+#include "esp_scsi.h"
+
+#define DRV_MODULE_NAME		"sun_esp"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_VERSION		"1.000"
+#define DRV_MODULE_RELDATE	"April 19, 2007"
+
+#define dma_read32(REG) \
+	sbus_readl(esp->dma_regs + (REG))
+#define dma_write32(VAL, REG) \
+	sbus_writel((VAL), esp->dma_regs + (REG))
+
+static int __devinit esp_sbus_find_dma(struct esp *esp, struct sbus_dev *dma_sdev)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct sbus_dma *dma;
+
+	if (dma_sdev != NULL) {
+		for_each_dvma(dma) {
+			if (dma->sdev == dma_sdev)
+				break;
+		}
+	} else {
+		for_each_dvma(dma) {
+			if (dma->sdev == NULL)
+				break;
+
+			/* If bus + slot are the same and it has the
+			 * correct OBP name, it's ours.
+			 */
+			if (sdev->bus == dma->sdev->bus &&
+			    sdev->slot == dma->sdev->slot &&
+			    (!strcmp(dma->sdev->prom_name, "dma") ||
+			     !strcmp(dma->sdev->prom_name, "espdma")))
+				break;
+		}
+	}
+
+	if (dma == NULL) {
+		printk(KERN_ERR PFX "[%s] Cannot find dma.\n",
+		       sdev->ofdev.node->full_name);
+		return -ENODEV;
+	}
+	esp->dma = dma;
+	esp->dma_regs = dma->regs;
+
+	return 0;
+
+}
+
+static int __devinit esp_sbus_map_regs(struct esp *esp, int hme)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct resource *res;
+
+	/* On HME, two reg sets exist, first is DVMA,
+	 * second is ESP registers.
+	 */
+	if (hme)
+		res = &sdev->resource[1];
+	else
+		res = &sdev->resource[0];
+
+	esp->regs = sbus_ioremap(res, 0, SBUS_ESP_REG_SIZE, "ESP");
+	if (!esp->regs)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int __devinit esp_sbus_map_command_block(struct esp *esp)
+{
+	struct sbus_dev *sdev = esp->dev;
+
+	esp->command_block = sbus_alloc_consistent(sdev, 16,
+						   &esp->command_block_dma);
+	if (!esp->command_block)
+		return -ENOMEM;
+	return 0;
+}
+
+static int __devinit esp_sbus_register_irq(struct esp *esp)
+{
+	struct Scsi_Host *host = esp->host;
+	struct sbus_dev *sdev = esp->dev;
+
+	host->irq = sdev->irqs[0];
+	return request_irq(host->irq, scsi_esp_intr, IRQF_SHARED, "ESP", esp);
+}
+
+static void __devinit esp_get_scsi_id(struct esp *esp)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct device_node *dp = sdev->ofdev.node;
+
+	esp->scsi_id = of_getintprop_default(dp, "initiator-id", 0xff);
+	if (esp->scsi_id != 0xff)
+		goto done;
+
+	esp->scsi_id = of_getintprop_default(dp, "scsi-initiator-id", 0xff);
+	if (esp->scsi_id != 0xff)
+		goto done;
+
+	if (!sdev->bus) {
+		/* SUN4 */
+		esp->scsi_id = 7;
+		goto done;
+	}
+
+	esp->scsi_id = of_getintprop_default(sdev->bus->ofdev.node,
+					     "scsi-initiator-id", 7);
+
+done:
+	esp->host->this_id = esp->scsi_id;
+	esp->scsi_id_mask = (1 << esp->scsi_id);
+}
+
+static void __devinit esp_get_differential(struct esp *esp)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct device_node *dp = sdev->ofdev.node;
+
+	if (of_find_property(dp, "differential", NULL))
+		esp->flags |= ESP_FLAG_DIFFERENTIAL;
+	else
+		esp->flags &= ~ESP_FLAG_DIFFERENTIAL;
+}
+
+static void __devinit esp_get_clock_params(struct esp *esp)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct device_node *dp = sdev->ofdev.node;
+	struct device_node *bus_dp;
+	int fmhz;
+
+	bus_dp = NULL;
+	if (sdev != NULL && sdev->bus != NULL)
+		bus_dp = sdev->bus->ofdev.node;
+
+	fmhz = of_getintprop_default(dp, "clock-frequency", 0);
+	if (fmhz == 0)
+		fmhz = (!bus_dp) ? 0 :
+			of_getintprop_default(bus_dp, "clock-frequency", 0);
+
+	esp->cfreq = fmhz;
+}
+
+static void __devinit esp_get_bursts(struct esp *esp, struct sbus_dev *dma)
+{
+	struct sbus_dev *sdev = esp->dev;
+	struct device_node *dp = sdev->ofdev.node;
+	u8 bursts;
+
+	bursts = of_getintprop_default(dp, "burst-sizes", 0xff);
+	if (dma) {
+		struct device_node *dma_dp = dma->ofdev.node;
+		u8 val = of_getintprop_default(dma_dp, "burst-sizes", 0xff);
+		if (val != 0xff)
+			bursts &= val;
+	}
+
+	if (sdev->bus) {
+		u8 val = of_getintprop_default(sdev->bus->ofdev.node,
+					       "burst-sizes", 0xff);
+		if (val != 0xff)
+			bursts &= val;
+	}
+
+	if (bursts == 0xff ||
+	    (bursts & DMA_BURST16) == 0 ||
+	    (bursts & DMA_BURST32) == 0)
+		bursts = (DMA_BURST32 - 1);
+
+	esp->bursts = bursts;
+}
+
+static void __devinit esp_sbus_get_props(struct esp *esp, struct sbus_dev *espdma)
+{
+	esp_get_scsi_id(esp);
+	esp_get_differential(esp);
+	esp_get_clock_params(esp);
+	esp_get_bursts(esp, espdma);
+}
+
+static void sbus_esp_write8(struct esp *esp, u8 val, unsigned long reg)
+{
+	sbus_writeb(val, esp->regs + (reg * 4UL));
+}
+
+static u8 sbus_esp_read8(struct esp *esp, unsigned long reg)
+{
+	return sbus_readb(esp->regs + (reg * 4UL));
+}
+
+static dma_addr_t sbus_esp_map_single(struct esp *esp, void *buf,
+				      size_t sz, int dir)
+{
+	return sbus_map_single(esp->dev, buf, sz, dir);
+}
+
+static int sbus_esp_map_sg(struct esp *esp, struct scatterlist *sg,
+				  int num_sg, int dir)
+{
+	return sbus_map_sg(esp->dev, sg, num_sg, dir);
+}
+
+static void sbus_esp_unmap_single(struct esp *esp, dma_addr_t addr,
+				  size_t sz, int dir)
+{
+	sbus_unmap_single(esp->dev, addr, sz, dir);
+}
+
+static void sbus_esp_unmap_sg(struct esp *esp, struct scatterlist *sg,
+			      int num_sg, int dir)
+{
+	sbus_unmap_sg(esp->dev, sg, num_sg, dir);
+}
+
+static int sbus_esp_irq_pending(struct esp *esp)
+{
+	if (dma_read32(DMA_CSR) & (DMA_HNDL_INTR | DMA_HNDL_ERROR))
+		return 1;
+	return 0;
+}
+
+static void sbus_esp_reset_dma(struct esp *esp)
+{
+	int can_do_burst16, can_do_burst32, can_do_burst64;
+	int can_do_sbus64, lim;
+	u32 val;
+
+	can_do_burst16 = (esp->bursts & DMA_BURST16) != 0;
+	can_do_burst32 = (esp->bursts & DMA_BURST32) != 0;
+	can_do_burst64 = 0;
+	can_do_sbus64 = 0;
+	if (sbus_can_dma_64bit(esp->dev))
+		can_do_sbus64 = 1;
+	if (sbus_can_burst64(esp->sdev))
+		can_do_burst64 = (esp->bursts & DMA_BURST64) != 0;
+
+	/* Put the DVMA into a known state. */
+	if (esp->dma->revision != dvmahme) {
+		val = dma_read32(DMA_CSR);
+		dma_write32(val | DMA_RST_SCSI, DMA_CSR);
+		dma_write32(val & ~DMA_RST_SCSI, DMA_CSR);
+	}
+	switch (esp->dma->revision) {
+	case dvmahme:
+		dma_write32(DMA_RESET_FAS366, DMA_CSR);
+		dma_write32(DMA_RST_SCSI, DMA_CSR);
+
+		esp->prev_hme_dmacsr = (DMA_PARITY_OFF | DMA_2CLKS |
+					DMA_SCSI_DISAB | DMA_INT_ENAB);
+
+		esp->prev_hme_dmacsr &= ~(DMA_ENABLE | DMA_ST_WRITE |
+					  DMA_BRST_SZ);
+
+		if (can_do_burst64)
+			esp->prev_hme_dmacsr |= DMA_BRST64;
+		else if (can_do_burst32)
+			esp->prev_hme_dmacsr |= DMA_BRST32;
+
+		if (can_do_sbus64) {
+			esp->prev_hme_dmacsr |= DMA_SCSI_SBUS64;
+			sbus_set_sbus64(esp->dev, esp->bursts);
+		}
+
+		lim = 1000;
+		while (dma_read32(DMA_CSR) & DMA_PEND_READ) {
+			if (--lim == 0) {
+				printk(KERN_ALERT PFX "esp%d: DMA_PEND_READ "
+				       "will not clear!\n",
+				       esp->host->unique_id);
+				break;
+			}
+			udelay(1);
+		}
+
+		dma_write32(0, DMA_CSR);
+		dma_write32(esp->prev_hme_dmacsr, DMA_CSR);
+
+		dma_write32(0, DMA_ADDR);
+		break;
+
+	case dvmarev2:
+		if (esp->rev != ESP100) {
+			val = dma_read32(DMA_CSR);
+			dma_write32(val | DMA_3CLKS, DMA_CSR);
+		}
+		break;
+
+	case dvmarev3:
+		val = dma_read32(DMA_CSR);
+		val &= ~DMA_3CLKS;
+		val |= DMA_2CLKS;
+		if (can_do_burst32) {
+			val &= ~DMA_BRST_SZ;
+			val |= DMA_BRST32;
+		}
+		dma_write32(val, DMA_CSR);
+		break;
+
+	case dvmaesc1:
+		val = dma_read32(DMA_CSR);
+		val |= DMA_ADD_ENABLE;
+		val &= ~DMA_BCNT_ENAB;
+		if (!can_do_burst32 && can_do_burst16) {
+			val |= DMA_ESC_BURST;
+		} else {
+			val &= ~(DMA_ESC_BURST);
+		}
+		dma_write32(val, DMA_CSR);
+		break;
+
+	default:
+		break;
+	}
+
+	/* Enable interrupts.  */
+	val = dma_read32(DMA_CSR);
+	dma_write32(val | DMA_INT_ENAB, DMA_CSR);
+}
+
+static void sbus_esp_dma_drain(struct esp *esp)
+{
+	u32 csr;
+	int lim;
+
+	if (esp->dma->revision == dvmahme)
+		return;
+
+	csr = dma_read32(DMA_CSR);
+	if (!(csr & DMA_FIFO_ISDRAIN))
+		return;
+
+	if (esp->dma->revision != dvmarev3 && esp->dma->revision != dvmaesc1)
+		dma_write32(csr | DMA_FIFO_STDRAIN, DMA_CSR);
+
+	lim = 1000;
+	while (dma_read32(DMA_CSR) & DMA_FIFO_ISDRAIN) {
+		if (--lim == 0) {
+			printk(KERN_ALERT PFX "esp%d: DMA will not drain!\n",
+			       esp->host->unique_id);
+			break;
+		}
+		udelay(1);
+	}
+}
+
+static void sbus_esp_dma_invalidate(struct esp *esp)
+{
+	if (esp->dma->revision == dvmahme) {
+		dma_write32(DMA_RST_SCSI, DMA_CSR);
+
+		esp->prev_hme_dmacsr = ((esp->prev_hme_dmacsr |
+					 (DMA_PARITY_OFF | DMA_2CLKS |
+					  DMA_SCSI_DISAB | DMA_INT_ENAB)) &
+					~(DMA_ST_WRITE | DMA_ENABLE));
+
+		dma_write32(0, DMA_CSR);
+		dma_write32(esp->prev_hme_dmacsr, DMA_CSR);
+
+		/* This is necessary to avoid having the SCSI channel
+		 * engine lock up on us.
+		 */
+		dma_write32(0, DMA_ADDR);
+	} else {
+		u32 val;
+		int lim;
+
+		lim = 1000;
+		while ((val = dma_read32(DMA_CSR)) & DMA_PEND_READ) {
+			if (--lim == 0) {
+				printk(KERN_ALERT PFX "esp%d: DMA will not "
+				       "invalidate!\n", esp->host->unique_id);
+				break;
+			}
+			udelay(1);
+		}
+
+		val &= ~(DMA_ENABLE | DMA_ST_WRITE | DMA_BCNT_ENAB);
+		val |= DMA_FIFO_INV;
+		dma_write32(val, DMA_CSR);
+		val &= ~DMA_FIFO_INV;
+		dma_write32(val, DMA_CSR);
+	}
+}
+
+static void sbus_esp_send_dma_cmd(struct esp *esp, u32 addr, u32 esp_count,
+				  u32 dma_count, int write, u8 cmd)
+{
+	u32 csr;
+
+	BUG_ON(!(cmd & ESP_CMD_DMA));
+
+	sbus_esp_write8(esp, (esp_count >> 0) & 0xff, ESP_TCLOW);
+	sbus_esp_write8(esp, (esp_count >> 8) & 0xff, ESP_TCMED);
+	if (esp->rev == FASHME) {
+		sbus_esp_write8(esp, (esp_count >> 16) & 0xff, FAS_RLO);
+		sbus_esp_write8(esp, 0, FAS_RHI);
+
+		scsi_esp_cmd(esp, cmd);
+
+		csr = esp->prev_hme_dmacsr;
+		csr |= DMA_SCSI_DISAB | DMA_ENABLE;
+		if (write)
+			csr |= DMA_ST_WRITE;
+		else
+			csr &= ~DMA_ST_WRITE;
+		esp->prev_hme_dmacsr = csr;
+
+		dma_write32(dma_count, DMA_COUNT);
+		dma_write32(addr, DMA_ADDR);
+		dma_write32(csr, DMA_CSR);
+	} else {
+		csr = dma_read32(DMA_CSR);
+		csr |= DMA_ENABLE;
+		if (write)
+			csr |= DMA_ST_WRITE;
+		else
+			csr &= ~DMA_ST_WRITE;
+		dma_write32(csr, DMA_CSR);
+		if (esp->dma->revision == dvmaesc1) {
+			u32 end = PAGE_ALIGN(addr + dma_count + 16U);
+			dma_write32(end - addr, DMA_COUNT);
+		}
+		dma_write32(addr, DMA_ADDR);
+
+		scsi_esp_cmd(esp, cmd);
+	}
+
+}
+
+static int sbus_esp_dma_error(struct esp *esp)
+{
+	u32 csr = dma_read32(DMA_CSR);
+
+	if (csr & DMA_HNDL_ERROR)
+		return 1;
+
+	return 0;
+}
+
+static const struct esp_driver_ops sbus_esp_ops = {
+	.esp_write8	=	sbus_esp_write8,
+	.esp_read8	=	sbus_esp_read8,
+	.map_single	=	sbus_esp_map_single,
+	.map_sg		=	sbus_esp_map_sg,
+	.unmap_single	=	sbus_esp_unmap_single,
+	.unmap_sg	=	sbus_esp_unmap_sg,
+	.irq_pending	=	sbus_esp_irq_pending,
+	.reset_dma	=	sbus_esp_reset_dma,
+	.dma_drain	=	sbus_esp_dma_drain,
+	.dma_invalidate	=	sbus_esp_dma_invalidate,
+	.send_dma_cmd	=	sbus_esp_send_dma_cmd,
+	.dma_error	=	sbus_esp_dma_error,
+};
+
+static int __devinit esp_sbus_probe_one(struct device *dev,
+					struct sbus_dev *esp_dev,
+					struct sbus_dev *espdma,
+					struct sbus_bus *sbus,
+					int hme)
+{
+	struct scsi_host_template *tpnt = &scsi_esp_template;
+	struct Scsi_Host *host;
+	struct esp *esp;
+	int err;
+
+	host = scsi_host_alloc(tpnt, sizeof(struct esp));
+
+	err = -ENOMEM;
+	if (!host)
+		goto fail;
+
+	host->max_id = (hme ? 16 : 8);
+	esp = host_to_esp(host);
+
+	esp->host = host;
+	esp->dev = esp_dev;
+	esp->ops = &sbus_esp_ops;
+
+	if (hme)
+		esp->flags |= ESP_FLAG_WIDE_CAPABLE;
+
+	err = esp_sbus_find_dma(esp, espdma);
+	if (err < 0)
+		goto fail_unlink;
+
+	err = esp_sbus_map_regs(esp, hme);
+	if (err < 0)
+		goto fail_unlink;
+
+	err = esp_sbus_map_command_block(esp);
+	if (err < 0)
+		goto fail_unmap_regs;
+
+	err = esp_sbus_register_irq(esp);
+	if (err < 0)
+		goto fail_unmap_command_block;
+
+	esp_sbus_get_props(esp, espdma);
+
+	/* Before we try to touch the ESP chip, ESC1 dma can
+	 * come up with the reset bit set, so make sure that
+	 * is clear first.
+	 */
+	if (esp->dma->revision == dvmaesc1) {
+		u32 val = dma_read32(DMA_CSR);
+
+		dma_write32(val & ~DMA_RST_SCSI, DMA_CSR);
+	}
+
+	dev_set_drvdata(&esp_dev->ofdev.dev, esp);
+
+	err = scsi_esp_register(esp, dev);
+	if (err)
+		goto fail_free_irq;
+
+	return 0;
+
+fail_free_irq:
+	free_irq(host->irq, esp);
+fail_unmap_command_block:
+	sbus_free_consistent(esp->dev, 16,
+			     esp->command_block,
+			     esp->command_block_dma);
+fail_unmap_regs:
+	sbus_iounmap(esp->regs, SBUS_ESP_REG_SIZE);
+fail_unlink:
+	scsi_host_put(host);
+fail:
+	return err;
+}
+
+static int __devinit esp_sbus_probe(struct of_device *dev, const struct of_device_id *match)
+{
+	struct sbus_dev *sdev = to_sbus_device(&dev->dev);
+	struct device_node *dp = dev->node;
+	struct sbus_dev *dma_sdev = NULL;
+	int hme = 0;
+
+	if (dp->parent &&
+	    (!strcmp(dp->parent->name, "espdma") ||
+	     !strcmp(dp->parent->name, "dma")))
+		dma_sdev = sdev->parent;
+	else if (!strcmp(dp->name, "SUNW,fas")) {
+		dma_sdev = sdev;
+		hme = 1;
+	}
+
+	return esp_sbus_probe_one(&dev->dev, sdev, dma_sdev,
+				  sdev->bus, hme);
+}
+
+static int __devexit esp_sbus_remove(struct of_device *dev)
+{
+	struct esp *esp = dev_get_drvdata(&dev->dev);
+	unsigned int irq = esp->host->irq;
+	u32 val;
+
+	scsi_esp_unregister(esp);
+
+	/* Disable interrupts.  */
+	val = dma_read32(DMA_CSR);
+	dma_write32(val & ~DMA_INT_ENAB, DMA_CSR);
+
+	free_irq(irq, esp);
+	sbus_free_consistent(esp->dev, 16,
+			     esp->command_block,
+			     esp->command_block_dma);
+	sbus_iounmap(esp->regs, SBUS_ESP_REG_SIZE);
+
+	scsi_host_put(esp->host);
+
+	return 0;
+}
+
+static struct of_device_id esp_match[] = {
+	{
+		.name = "SUNW,esp",
+	},
+	{
+		.name = "SUNW,fas",
+	},
+	{
+		.name = "esp",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, esp_match);
+
+static struct of_platform_driver esp_sbus_driver = {
+	.name		= "esp",
+	.match_table	= esp_match,
+	.probe		= esp_sbus_probe,
+	.remove		= __devexit_p(esp_sbus_remove),
+};
+
+static int __init sunesp_init(void)
+{
+	return of_register_driver(&esp_sbus_driver, &sbus_bus_type);
+}
+
+static void __exit sunesp_exit(void)
+{
+	of_unregister_driver(&esp_sbus_driver);
+}
+
+MODULE_DESCRIPTION("Sun ESP SCSI driver");
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+module_init(sunesp_init);
+module_exit(sunesp_exit);
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 96a852aa190..bfd44177a21 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -1387,8 +1387,8 @@ static enum su_type __devinit su_get_type(struct device_node *dp)
 	struct device_node *ap = of_find_node_by_path("/aliases");
 
 	if (ap) {
-		char *keyb = of_get_property(ap, "keyboard", NULL);
-		char *ms = of_get_property(ap, "mouse", NULL);
+		const char *keyb = of_get_property(ap, "keyboard", NULL);
+		const char *ms = of_get_property(ap, "mouse", NULL);
 
 		if (keyb) {
 			if (dp == of_find_node_by_path(keyb))
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index ec63b0ee074..d3e2c5f90a2 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -343,7 +343,7 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 		UDSL_ASSERT(sarb->tail + ATM_CELL_PAYLOAD <= sarb->end);
 	}
 
-	memcpy(sarb->tail, source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD);
+	memcpy(skb_tail_pointer(sarb), source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD);
 	__skb_put(sarb, ATM_CELL_PAYLOAD);
 
 	if (pti & 1) {
@@ -370,7 +370,7 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 			goto out;
 		}
 
-		if (crc32_be(~0, sarb->tail - pdu_length, pdu_length) != 0xc704dd7b) {
+		if (crc32_be(~0, skb_tail_pointer(sarb) - pdu_length, pdu_length) != 0xc704dd7b) {
 			atm_rldbg(instance, "%s: packet failed crc check (vcc: 0x%p)!\n",
 				  __func__, vcc);
 			atomic_inc(&vcc->stats->rx_err);
@@ -396,7 +396,9 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 			goto out;	/* atm_charge increments rx_drop */
 		}
 
-		memcpy(skb->data, sarb->tail - pdu_length, length);
+		skb_copy_to_linear_data(skb,
+					skb_tail_pointer(sarb) - pdu_length,
+					length);
 		__skb_put(skb, length);
 
 		vdbg("%s: sending skb 0x%p, skb->len %u, skb->truesize %u",
@@ -484,7 +486,7 @@ static unsigned int usbatm_write_cells(struct usbatm_data *instance,
 		ptr[4] = 0xec;
 		ptr += ATM_CELL_HEADER;
 
-		memcpy(ptr, skb->data, data_len);
+		skb_copy_from_linear_data(skb, ptr, data_len);
 		ptr += data_len;
 		__skb_pull(skb, data_len);
 
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 04e6b8508fb..8f9f217e0a6 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -1766,7 +1766,6 @@ static void rx_complete (struct usb_ep *ep, struct usb_request *req)
 			break;
 		}
 
-		skb->dev = dev->net;
 		skb->protocol = eth_type_trans (skb, dev->net);
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += skb->len;
diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c
index 5808ea08245..d5ef97bc4d0 100644
--- a/drivers/usb/net/asix.c
+++ b/drivers/usb/net/asix.c
@@ -298,7 +298,7 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 		if (ax_skb) {
 			ax_skb->len = size;
 			ax_skb->data = packet;
-			ax_skb->tail = packet + size;
+			skb_set_tail_pointer(ax_skb, size);
 			usbnet_skb_return(dev, ax_skb);
 		} else {
 			return 0;
@@ -338,7 +338,7 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	    && ((headroom + tailroom) >= (4 + padlen))) {
 		if ((headroom < 4) || (tailroom < padlen)) {
 			skb->data = memmove(skb->head + 4, skb->data, skb->len);
-			skb->tail = skb->data + skb->len;
+			skb_set_tail_pointer(skb, skb->len);
 		}
 	} else {
 		struct sk_buff *skb2;
@@ -352,11 +352,11 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	skb_push(skb, 4);
 	packet_len = (((skb->len - 4) ^ 0x0000ffff) << 16) + (skb->len - 4);
 	cpu_to_le32s(&packet_len);
-	memcpy(skb->data, &packet_len, sizeof(packet_len));
+	skb_copy_to_linear_data(skb, &packet_len, sizeof(packet_len));
 
 	if ((skb->len % 512) == 0) {
 		cpu_to_le32s(&padbytes);
-		memcpy( skb->tail, &padbytes, sizeof(padbytes));
+		memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes));
 		skb_put(skb, sizeof(padbytes));
 	}
 	return skb;
diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c
index 4852012735f..ffec2e01b89 100644
--- a/drivers/usb/net/catc.c
+++ b/drivers/usb/net/catc.c
@@ -255,7 +255,6 @@ static void catc_rx_done(struct urb *urb)
 		if (!(skb = dev_alloc_skb(pkt_len)))
 			return;
 
-		skb->dev = catc->netdev;
 		eth_copy_and_sum(skb, pkt_start + pkt_offset, pkt_len, 0);
 		skb_put(skb, pkt_len);
 
@@ -419,7 +418,7 @@ static int catc_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 	catc->tx_ptr = (((catc->tx_ptr - 1) >> 6) + 1) << 6;
 	tx_buf = catc->tx_buf[catc->tx_idx] + catc->tx_ptr;
 	*((u16*)tx_buf) = (catc->is_f5u011) ? cpu_to_be16((u16)skb->len) : cpu_to_le16((u16)skb->len);
-	memcpy(tx_buf + 2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, tx_buf + 2, skb->len);
 	catc->tx_ptr += skb->len + 2;
 
 	if (!test_and_set_bit(TX_RUNNING, &catc->flags))
diff --git a/drivers/usb/net/gl620a.c b/drivers/usb/net/gl620a.c
index d257a8e026d..031cf5ca4db 100644
--- a/drivers/usb/net/gl620a.c
+++ b/drivers/usb/net/gl620a.c
@@ -157,7 +157,7 @@ genelink_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 		if ((headroom < (4 + 4*1)) || (tailroom < padlen)) {
 			skb->data = memmove(skb->head + (4 + 4*1),
 					     skb->data, skb->len);
-			skb->tail = skb->data + skb->len;
+			skb_set_tail_pointer(skb, skb->len);
 		}
 	} else {
 		struct sk_buff	*skb2;
diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c
index de95268ae4b..a0cc05d21a6 100644
--- a/drivers/usb/net/kaweth.c
+++ b/drivers/usb/net/kaweth.c
@@ -636,8 +636,6 @@ static void kaweth_usb_receive(struct urb *urb)
 
 		skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
 
-		skb->dev = net;
-
 		eth_copy_and_sum(skb, kaweth->rx_buf + 2, pkt_len, 0);
 
 		skb_put(skb, pkt_len);
diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c
index ccebfdef475..19bf8dae70c 100644
--- a/drivers/usb/net/net1080.c
+++ b/drivers/usb/net/net1080.c
@@ -520,7 +520,7 @@ net1080_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 			skb->data = memmove(skb->head
 						+ sizeof (struct nc_header),
 					    skb->data, skb->len);
-			skb->tail = skb->data + len;
+			skb_set_tail_pointer(skb, len);
 			goto encapsulate;
 		}
 	}
diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c
index 6d12961cf9f..1ad4ee54b18 100644
--- a/drivers/usb/net/pegasus.c
+++ b/drivers/usb/net/pegasus.c
@@ -575,7 +575,6 @@ static void fill_skb_pool(pegasus_t * pegasus)
 		 */
 		if (pegasus->rx_pool[i] == NULL)
 			return;
-		pegasus->rx_pool[i]->dev = pegasus->net;
 		skb_reserve(pegasus->rx_pool[i], 2);
 	}
 }
@@ -890,7 +889,7 @@ static int pegasus_start_xmit(struct sk_buff *skb, struct net_device *net)
 	netif_stop_queue(net);
 
 	((__le16 *) pegasus->tx_buff)[0] = cpu_to_le16(l16);
-	memcpy(pegasus->tx_buff + 2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, pegasus->tx_buff + 2, skb->len);
 	usb_fill_bulk_urb(pegasus->tx_urb, pegasus->usb,
 			  usb_sndbulkpipe(pegasus->usb, 2),
 			  pegasus->tx_buff, count,
@@ -1415,8 +1414,10 @@ static void pegasus_disconnect(struct usb_interface *intf)
 	unlink_all_urbs(pegasus);
 	free_all_urbs(pegasus);
 	free_skb_pool(pegasus);
-	if (pegasus->rx_skb)
+	if (pegasus->rx_skb != NULL) {
 		dev_kfree_skb(pegasus->rx_skb);
+		pegasus->rx_skb = NULL;
+	}
 	free_netdev(pegasus->net);
 }
 
diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c
index 39a21c74fdf..1d36772ba6e 100644
--- a/drivers/usb/net/rndis_host.c
+++ b/drivers/usb/net/rndis_host.c
@@ -588,7 +588,7 @@ rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 		if (likely((sizeof *hdr) <= room)) {
 			skb->data = memmove(skb->head + sizeof *hdr,
 					    skb->data, len);
-			skb->tail = skb->data + len;
+			skb_set_tail_pointer(skb, len);
 			goto fill;
 		}
 	}
diff --git a/drivers/usb/net/rtl8150.c b/drivers/usb/net/rtl8150.c
index ea153dc9b0a..fa598f0340c 100644
--- a/drivers/usb/net/rtl8150.c
+++ b/drivers/usb/net/rtl8150.c
@@ -646,7 +646,6 @@ static void fill_skb_pool(rtl8150_t *dev)
 		if (!skb) {
 			return;
 		}
-		skb->dev = dev->netdev;
 		skb_reserve(skb, 2);
 		dev->rx_skb_pool[i] = skb;
 	}
diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c
index de69b183bd2..0c5465a7909 100644
--- a/drivers/usb/net/usbnet.c
+++ b/drivers/usb/net/usbnet.c
@@ -203,7 +203,6 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
 {
 	int	status;
 
-	skb->dev = dev->net;
 	skb->protocol = eth_type_trans (skb, dev->net);
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += skb->len;
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index d7627fc4f11..8514f2a6f06 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -2899,7 +2899,7 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev,
 			struct fb_info *info, unsigned long addr)
 {
 	struct atyfb_par *par = info->par;
-	struct pcidev_cookie *pcp;
+	struct device_node *dp;
 	char prop[128];
 	int node, len, i, j, ret;
 	u32 mem, chip_id;
@@ -3037,8 +3037,8 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev,
 			node = 0;
 	}
 
-	pcp = pdev->sysdata;
-	if (node == pcp->prom_node->node) {
+	dp = pci_device_to_OF_node(pdev);
+	if (node == dp->node) {
 		struct fb_var_screeninfo *var = &default_var;
 		unsigned int N, P, Q, M, T, R;
 		u32 v_total, h_total;
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 1bf6f42eb40..a4b3fd185de 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -410,7 +410,7 @@ static int  __devinit radeon_find_mem_vbios(struct radeonfb_info *rinfo)
 }
 #endif
 
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 /*
  * Read XTAL (ref clock), SCLK and MCLK from Open Firmware device
  * tree. Hopefully, ATI OF driver is kind enough to fill these
@@ -440,7 +440,7 @@ static int __devinit radeon_read_xtal_OF (struct radeonfb_info *rinfo)
 
        	return 0;
 }
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
 
 /*
  * Read PLL infos from chip registers
@@ -645,7 +645,7 @@ static void __devinit radeon_get_pllinfo(struct radeonfb_info *rinfo)
 	rinfo->pll.ref_div = INPLL(PPLL_REF_DIV) & PPLL_REF_DIV_MASK;
 
 
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 	/*
 	 * Retrieve PLL infos from Open Firmware first
 	 */
@@ -653,7 +653,7 @@ static void __devinit radeon_get_pllinfo(struct radeonfb_info *rinfo)
        		printk(KERN_INFO "radeonfb: Retrieved PLL infos from Open Firmware\n");
 		goto found;
 	}
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
 
 	/*
 	 * Check out if we have an X86 which gave us some PLL informations
@@ -2231,7 +2231,7 @@ static int __devinit radeonfb_pci_register (struct pci_dev *pdev,
 	    rinfo->family == CHIP_FAMILY_RS200)
 		rinfo->errata |= CHIP_ERRATA_PLL_DELAY;
 
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 	/* On PPC, we obtain the OF device-node pointer to the firmware
 	 * data for this chip
 	 */
@@ -2240,6 +2240,8 @@ static int __devinit radeonfb_pci_register (struct pci_dev *pdev,
 		printk(KERN_WARNING "radeonfb (%s): Cannot match card to OF node !\n",
 		       pci_name(rinfo->pdev));
 
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
+#ifdef CONFIG_PPC_OF
 	/* On PPC, the firmware sets up a memory mapping that tends
 	 * to cause lockups when enabling the engine. We reconfigure
 	 * the card internal memory mappings properly
diff --git a/drivers/video/aty/radeon_monitor.c b/drivers/video/aty/radeon_monitor.c
index 38c7dbf8c15..737b5c09dbd 100644
--- a/drivers/video/aty/radeon_monitor.c
+++ b/drivers/video/aty/radeon_monitor.c
@@ -52,7 +52,7 @@ static char *radeon_get_mon_name(int type)
 }
 
 
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 /*
  * Try to find monitor informations & EDID data out of the Open Firmware
  * device-tree. This also contains some "hacks" to work around a few machine
@@ -156,7 +156,7 @@ static int __devinit radeon_probe_OF_head(struct radeonfb_info *rinfo, int head_
 	}
         return MT_NONE;
 }
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
 
 
 static int __devinit radeon_get_panel_info_BIOS(struct radeonfb_info *rinfo)
@@ -495,11 +495,11 @@ void __devinit radeon_probe_screens(struct radeonfb_info *rinfo,
 		 * Old single head cards
 		 */
 		if (!rinfo->has_CRTC2) {
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 			if (rinfo->mon1_type == MT_NONE)
 				rinfo->mon1_type = radeon_probe_OF_head(rinfo, 0,
 									&rinfo->mon1_EDID);
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
 #ifdef CONFIG_FB_RADEON_I2C
 			if (rinfo->mon1_type == MT_NONE)
 				rinfo->mon1_type =
@@ -544,11 +544,11 @@ void __devinit radeon_probe_screens(struct radeonfb_info *rinfo,
 		/*
 		 * Probe primary head (DVI or laptop internal panel)
 		 */
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 		if (rinfo->mon1_type == MT_NONE)
 			rinfo->mon1_type = radeon_probe_OF_head(rinfo, 0,
 								&rinfo->mon1_EDID);
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || CONFIG_SPARC */
 #ifdef CONFIG_FB_RADEON_I2C
 		if (rinfo->mon1_type == MT_NONE)
 			rinfo->mon1_type = radeon_probe_i2c_connector(rinfo, ddc_dvi,
@@ -572,11 +572,11 @@ void __devinit radeon_probe_screens(struct radeonfb_info *rinfo,
 		/*
 		 * Probe secondary head (mostly VGA, can be DVI)
 		 */
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 		if (rinfo->mon2_type == MT_NONE)
 			rinfo->mon2_type = radeon_probe_OF_head(rinfo, 1,
 								&rinfo->mon2_EDID);
-#endif /* CONFIG_PPC_OF */
+#endif /* CONFIG_PPC_OF || defined(CONFIG_SPARC) */
 #ifdef CONFIG_FB_RADEON_I2C
 		if (rinfo->mon2_type == MT_NONE)
 			rinfo->mon2_type = radeon_probe_i2c_connector(rinfo, ddc_vga,
diff --git a/drivers/video/aty/radeonfb.h b/drivers/video/aty/radeonfb.h
index d5ff224a625..31900036028 100644
--- a/drivers/video/aty/radeonfb.h
+++ b/drivers/video/aty/radeonfb.h
@@ -16,7 +16,7 @@
 
 #include <asm/io.h>
 
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 #include <asm/prom.h>
 #endif
 
@@ -292,7 +292,7 @@ struct radeonfb_info {
 	unsigned long		fb_local_base;
 
 	struct pci_dev		*pdev;
-#ifdef CONFIG_PPC_OF
+#if defined(CONFIG_PPC_OF) || defined(CONFIG_SPARC)
 	struct device_node	*of_node;
 #endif
 
diff --git a/drivers/video/cg3.c b/drivers/video/cg3.c
index 767c850f8eb..f042428a84f 100644
--- a/drivers/video/cg3.c
+++ b/drivers/video/cg3.c
@@ -266,7 +266,7 @@ static void __devinit cg3_init_fix(struct fb_info *info, int linebytes,
 static void __devinit cg3_rdi_maybe_fixup_var(struct fb_var_screeninfo *var,
 					      struct device_node *dp)
 {
-	char *params;
+	const char *params;
 	char *p;
 	int ww, hh;
 
diff --git a/drivers/video/igafb.c b/drivers/video/igafb.c
index 90592fb5915..eb1a4812ad1 100644
--- a/drivers/video/igafb.c
+++ b/drivers/video/igafb.c
@@ -44,8 +44,8 @@
 
 #include <asm/io.h>
 
-#ifdef __sparc__
-#include <asm/pbm.h>
+#ifdef CONFIG_SPARC
+#include <asm/prom.h>
 #include <asm/pcic.h>
 #endif
 
@@ -96,7 +96,7 @@ struct fb_var_screeninfo default_var = {
 	.vmode		= FB_VMODE_NONINTERLACED
 };
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 struct fb_var_screeninfo default_var_1024x768 __initdata = {
 	/* 1024x768, 75 Hz, Non-Interlaced (78.75 MHz dotclock) */
 	.xres		= 1024,
@@ -188,7 +188,7 @@ static inline void iga_outb(struct iga_par *par, unsigned char val,
         pci_outb(par, val, reg+1);
 }
 
-#endif /* __sparc__ */
+#endif /* CONFIG_SPARC */
 
 /*
  *  Very important functionality for the JavaEngine1 computer:
@@ -217,7 +217,7 @@ static void iga_blank_border(struct iga_par *par)
 		iga_outb(par, 0, IGA_EXT_CNTRL, IGA_IDX_OVERSCAN_COLOR + i);
 }
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 static int igafb_mmap(struct fb_info *info,
 		      struct vm_area_struct *vma)
 {
@@ -271,7 +271,7 @@ static int igafb_mmap(struct fb_info *info,
 	vma->vm_flags |= VM_IO;
 	return 0;
 }
-#endif /* __sparc__ */
+#endif /* CONFIG_SPARC */
 
 static int igafb_setcolreg(unsigned regno, unsigned red, unsigned green,
                            unsigned blue, unsigned transp,
@@ -323,7 +323,7 @@ static struct fb_ops igafb_ops = {
 	.fb_fillrect	= cfb_fillrect,
 	.fb_copyarea	= cfb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 	.fb_mmap 	= igafb_mmap,
 #endif
 };
@@ -424,7 +424,7 @@ int __init igafb_init(void)
 
 	par->frame_buffer_phys = addr & PCI_BASE_ADDRESS_MEM_MASK;
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 	/*
 	 * The following is sparc specific and this is why:
 	 *
@@ -477,8 +477,8 @@ int __init igafb_init(void)
 	 * Set default vmode and cmode from PROM properties.
 	 */
 	{
-                struct pcidev_cookie *cookie = pdev->sysdata;
-                int node = cookie->prom_node;
+		struct device_node *dp = pci_device_to_OF_node(pdev);
+                int node = dp->node;
                 int width = prom_getintdefault(node, "width", 1024);
                 int height = prom_getintdefault(node, "height", 768);
                 int depth = prom_getintdefault(node, "depth", 8);
@@ -534,7 +534,7 @@ int __init igafb_init(void)
 		kfree(info);
         }
 
-#ifdef __sparc__
+#ifdef CONFIG_SPARC
 	    /*
 	     * Add /dev/fb mmap values.
 	     */
@@ -552,7 +552,7 @@ int __init igafb_init(void)
 	    par->mmap_map[1].size = PAGE_SIZE * 2; /* X wants 2 pages */
 	    par->mmap_map[1].prot_mask = SRMMU_CACHE;
 	    par->mmap_map[1].prot_flag = SRMMU_WRITE;
-#endif /* __sparc__ */
+#endif /* CONFIG_SPARC */
 
 	return 0;
 }