aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/base/iommu.c7
-rw-r--r--drivers/block/aoe/aoecmd.c23
-rw-r--r--drivers/block/hd.c2
-rw-r--r--drivers/block/xsysace.c30
-rw-r--r--drivers/char/hw_random/timeriomem-rng.c39
-rw-r--r--drivers/crypto/ixp4xx_crypto.c182
-rw-r--r--drivers/dma/Kconfig11
-rw-r--r--drivers/dma/dmaengine.c60
-rw-r--r--drivers/dma/dmatest.c307
-rw-r--r--drivers/dma/dw_dmac.c333
-rw-r--r--drivers/dma/dw_dmac_regs.h7
-rw-r--r--drivers/dma/fsldma.c1
-rw-r--r--drivers/dma/ioat_dma.c1
-rw-r--r--drivers/dma/iop-adma.c1
-rw-r--r--drivers/dma/ipu/ipu_idmac.c371
-rw-r--r--drivers/dma/ipu/ipu_irq.c2
-rw-r--r--drivers/dma/mv_xor.c1
-rw-r--r--drivers/md/Kconfig31
-rw-r--r--drivers/md/Makefile16
-rw-r--r--drivers/md/bitmap.c49
-rw-r--r--drivers/md/bitmap.h288
-rw-r--r--drivers/md/dm-bio-list.h10
-rw-r--r--drivers/md/dm-bio-record.h26
-rw-r--r--drivers/md/dm-crypt.c6
-rw-r--r--drivers/md/dm-exception-store.c252
-rw-r--r--drivers/md/dm-exception-store.h58
-rw-r--r--drivers/md/dm-io.c5
-rw-r--r--drivers/md/dm-log.c75
-rw-r--r--drivers/md/dm-path-selector.c21
-rw-r--r--drivers/md/dm-raid1.c50
-rw-r--r--drivers/md/dm-snap-persistent.c153
-rw-r--r--drivers/md/dm-snap-transient.c86
-rw-r--r--drivers/md/dm-snap.c384
-rw-r--r--drivers/md/dm-snap.h105
-rw-r--r--drivers/md/dm-table.c26
-rw-r--r--drivers/md/dm-target.c104
-rw-r--r--drivers/md/dm.c134
-rw-r--r--drivers/md/dm.h2
-rw-r--r--drivers/md/faulty.c19
-rw-r--r--drivers/md/linear.c25
-rw-r--r--drivers/md/linear.h29
-rw-r--r--drivers/md/md.c615
-rw-r--r--drivers/md/md.h436
-rw-r--r--drivers/md/mktables.c14
-rw-r--r--drivers/md/multipath.c17
-rw-r--r--drivers/md/multipath.h40
-rw-r--r--drivers/md/raid0.c66
-rw-r--r--drivers/md/raid0.h28
-rw-r--r--drivers/md/raid1.c35
-rw-r--r--drivers/md/raid1.h132
-rw-r--r--drivers/md/raid10.c42
-rw-r--r--drivers/md/raid10.h121
-rw-r--r--drivers/md/raid5.c1494
-rw-r--r--drivers/md/raid5.h474
-rw-r--r--drivers/md/raid6.h130
-rw-r--r--drivers/md/raid6algos.c21
-rw-r--r--drivers/md/raid6altivec.uc4
-rw-r--r--drivers/md/raid6int.uc4
-rw-r--r--drivers/md/raid6mmx.c4
-rw-r--r--drivers/md/raid6recov.c13
-rw-r--r--drivers/md/raid6sse1.c4
-rw-r--r--drivers/md/raid6sse2.c4
-rw-r--r--drivers/md/raid6test/Makefile2
-rw-r--r--drivers/md/raid6test/test.c2
-rw-r--r--drivers/md/raid6x86.h2
-rw-r--r--drivers/mfd/twl4030-core.c2
-rw-r--r--drivers/mmc/core/core.c100
-rw-r--r--drivers/mtd/maps/pxa2xx-flash.c2
-rw-r--r--drivers/parisc/asp.c2
-rw-r--r--drivers/parisc/ccio-dma.c16
-rw-r--r--drivers/parisc/dino.c7
-rw-r--r--drivers/parisc/eisa.c2
-rw-r--r--drivers/parisc/eisa_enumerator.c4
-rw-r--r--drivers/parisc/iosapic.c2
-rw-r--r--drivers/parisc/led.c26
-rw-r--r--drivers/pci/intel-iommu.c115
-rw-r--r--drivers/pcmcia/pxa2xx_cm_x255.c2
-rw-r--r--drivers/regulator/Kconfig13
-rw-r--r--drivers/regulator/Makefile1
-rw-r--r--drivers/regulator/bq24022.c3
-rw-r--r--drivers/regulator/core.c386
-rw-r--r--drivers/regulator/da903x.c3
-rw-r--r--drivers/regulator/fixed.c3
-rw-r--r--drivers/regulator/pcf50633-regulator.c3
-rw-r--r--drivers/regulator/twl4030-regulator.c500
-rw-r--r--drivers/regulator/virtual.c14
-rw-r--r--drivers/regulator/wm8350-regulator.c57
-rw-r--r--drivers/regulator/wm8400-regulator.c36
-rw-r--r--drivers/rtc/Kconfig31
-rw-r--r--drivers/rtc/Makefile4
-rw-r--r--drivers/rtc/rtc-generic.c84
-rw-r--r--drivers/rtc/rtc-parisc.c86
-rw-r--r--drivers/rtc/rtc-ppc.c69
-rw-r--r--drivers/rtc/rtc-ps3.c104
-rw-r--r--drivers/serial/mcf.c2
-rw-r--r--drivers/usb/storage/isd200.c239
96 files changed, 6646 insertions, 2308 deletions
diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
index c2d1eed9037..9f0e672f4be 100644
--- a/drivers/base/iommu.c
+++ b/drivers/base/iommu.c
@@ -98,3 +98,10 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
return iommu_ops->iova_to_phys(domain, iova);
}
EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
+
+int iommu_domain_has_cap(struct iommu_domain *domain,
+ unsigned long cap)
+{
+ return iommu_ops->domain_has_cap(domain, cap);
+}
+EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 45c5a33daf4..31693bc2444 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -4,6 +4,7 @@
* Filesystem request handling methods
*/
+#include <linux/ata.h>
#include <linux/hdreg.h>
#include <linux/blkdev.h>
#include <linux/skbuff.h>
@@ -267,7 +268,7 @@ aoecmd_ata_rw(struct aoedev *d)
writebit = 0;
}
- ah->cmdstat = WIN_READ | writebit | extbit;
+ ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
/* mark all tracking fields and load out */
buf->nframesout += 1;
@@ -362,10 +363,10 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
switch (ah->cmdstat) {
default:
break;
- case WIN_READ:
- case WIN_READ_EXT:
- case WIN_WRITE:
- case WIN_WRITE_EXT:
+ case ATA_CMD_PIO_READ:
+ case ATA_CMD_PIO_READ_EXT:
+ case ATA_CMD_PIO_WRITE:
+ case ATA_CMD_PIO_WRITE_EXT:
put_lba(ah, f->lba);
n = f->bcnt;
@@ -812,8 +813,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
d->htgt = NULL;
n = ahout->scnt << 9;
switch (ahout->cmdstat) {
- case WIN_READ:
- case WIN_READ_EXT:
+ case ATA_CMD_PIO_READ:
+ case ATA_CMD_PIO_READ_EXT:
if (skb->len - sizeof *hin - sizeof *ahin < n) {
printk(KERN_ERR
"aoe: %s. skb->len=%d need=%ld\n",
@@ -823,8 +824,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
return;
}
memcpy(f->bufaddr, ahin+1, n);
- case WIN_WRITE:
- case WIN_WRITE_EXT:
+ case ATA_CMD_PIO_WRITE:
+ case ATA_CMD_PIO_WRITE_EXT:
ifp = getif(t, skb->dev);
if (ifp) {
ifp->lost = 0;
@@ -838,7 +839,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
goto xmit;
}
break;
- case WIN_IDENTIFY:
+ case ATA_CMD_ID_ATA:
if (skb->len - sizeof *hin - sizeof *ahin < 512) {
printk(KERN_INFO
"aoe: runt data size in ataid. skb->len=%d\n",
@@ -914,7 +915,7 @@ aoecmd_ata_id(struct aoedev *d)
/* set up ata header */
ah->scnt = 1;
- ah->cmdstat = WIN_IDENTIFY;
+ ah->cmdstat = ATA_CMD_ID_ATA;
ah->lba3 = 0xa0;
skb->dev = t->ifp->nd;
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 482c0c4b964..3c11f062a18 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -42,6 +42,8 @@
#include <linux/ata.h>
#include <linux/hdreg.h>
+#define HD_IRQ 14
+
#define REALLY_SLOW_IO
#include <asm/system.h>
#include <asm/io.h>
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 119be3442f2..6cccdc3f522 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -89,6 +89,7 @@
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
+#include <linux/ata.h>
#include <linux/hdreg.h>
#include <linux/platform_device.h>
#if defined(CONFIG_OF)
@@ -208,7 +209,7 @@ struct ace_device {
struct gendisk *gd;
/* Inserted CF card parameters */
- struct hd_driveid cf_id;
+ u16 cf_id[ATA_ID_WORDS];
};
static int ace_major;
@@ -402,21 +403,14 @@ static void ace_dump_regs(struct ace_device *ace)
ace_in32(ace, ACE_CFGLBA), ace_in(ace, ACE_FATSTAT));
}
-void ace_fix_driveid(struct hd_driveid *id)
+void ace_fix_driveid(u16 *id)
{
#if defined(__BIG_ENDIAN)
- u16 *buf = (void *)id;
int i;
/* All half words have wrong byte order; swap the bytes */
- for (i = 0; i < sizeof(struct hd_driveid); i += 2, buf++)
- *buf = le16_to_cpu(*buf);
-
- /* Some of the data values are 32bit; swap the half words */
- id->lba_capacity = ((id->lba_capacity >> 16) & 0x0000FFFF) |
- ((id->lba_capacity << 16) & 0xFFFF0000);
- id->spg = ((id->spg >> 16) & 0x0000FFFF) |
- ((id->spg << 16) & 0xFFFF0000);
+ for (i = 0; i < ATA_ID_WORDS; i++, id++)
+ *id = le16_to_cpu(*id);
#endif
}
@@ -614,7 +608,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
break;
case ACE_FSM_STATE_IDENTIFY_COMPLETE:
- ace_fix_driveid(&ace->cf_id);
+ ace_fix_driveid(&ace->cf_id[0]);
ace_dump_mem(&ace->cf_id, 512); /* Debug: Dump out disk ID */
if (ace->data_result) {
@@ -627,9 +621,10 @@ static void ace_fsm_dostate(struct ace_device *ace)
ace->media_change = 0;
/* Record disk parameters */
- set_capacity(ace->gd, ace->cf_id.lba_capacity);
+ set_capacity(ace->gd,
+ ata_id_u32(&ace->cf_id, ATA_ID_LBA_CAPACITY));
dev_info(ace->dev, "capacity: %i sectors\n",
- ace->cf_id.lba_capacity);
+ ata_id_u32(&ace->cf_id, ATA_ID_LBA_CAPACITY));
}
/* We're done, drop to IDLE state and notify waiters */
@@ -928,12 +923,13 @@ static int ace_release(struct gendisk *disk, fmode_t mode)
static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
struct ace_device *ace = bdev->bd_disk->private_data;
+ u16 *cf_id = &ace->cf_id[0];
dev_dbg(ace->dev, "ace_getgeo()\n");
- geo->heads = ace->cf_id.heads;
- geo->sectors = ace->cf_id.sectors;
- geo->cylinders = ace->cf_id.cyls;
+ geo->heads = cf_id[ATA_ID_HEADS];
+ geo->sectors = cf_id[ATA_ID_SECTORS];
+ geo->cylinders = cf_id[ATA_ID_CYLS];
return 0;
}
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 10ad41be589..dcd352ad0e7 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -90,10 +90,30 @@ static struct hwrng timeriomem_rng_ops = {
static int __init timeriomem_rng_probe(struct platform_device *pdev)
{
+ struct resource *res, *mem;
int ret;
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+ if (!res)
+ return -ENOENT;
+
+ mem = request_mem_region(res->start, res->end - res->start + 1,
+ pdev->name);
+ if (mem == NULL)
+ return -EBUSY;
+
+ dev_set_drvdata(&pdev->dev, mem);
+
timeriomem_rng_data = pdev->dev.platform_data;
+ timeriomem_rng_data->address = ioremap(res->start,
+ res->end - res->start + 1);
+ if (!timeriomem_rng_data->address) {
+ ret = -ENOMEM;
+ goto err_ioremap;
+ }
+
if (timeriomem_rng_data->period != 0
&& usecs_to_jiffies(timeriomem_rng_data->period) > 0) {
timeriomem_rng_timer.expires = jiffies;
@@ -104,23 +124,34 @@ static int __init timeriomem_rng_probe(struct platform_device *pdev)
timeriomem_rng_data->present = 1;
ret = hwrng_register(&timeriomem_rng_ops);
- if (ret) {
- dev_err(&pdev->dev, "problem registering\n");
- return ret;
- }
+ if (ret)
+ goto err_register;
dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n",
timeriomem_rng_data->address,
timeriomem_rng_data->period);
return 0;
+
+err_register:
+ dev_err(&pdev->dev, "problem registering\n");
+ iounmap(timeriomem_rng_data->address);
+err_ioremap:
+ release_resource(mem);
+
+ return ret;
}
static int __devexit timeriomem_rng_remove(struct platform_device *pdev)
{
+ struct resource *mem = dev_get_drvdata(&pdev->dev);
+
del_timer_sync(&timeriomem_rng_timer);
hwrng_unregister(&timeriomem_rng_ops);
+ iounmap(timeriomem_rng_data->address);
+ release_resource(mem);
+
return 0;
}
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index d9e751be8c5..af9761ccf9f 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -101,6 +101,7 @@ struct buffer_desc {
u32 phys_addr;
u32 __reserved[4];
struct buffer_desc *next;
+ enum dma_data_direction dir;
};
struct crypt_ctl {
@@ -132,14 +133,10 @@ struct crypt_ctl {
struct ablk_ctx {
struct buffer_desc *src;
struct buffer_desc *dst;
- unsigned src_nents;
- unsigned dst_nents;
};
struct aead_ctx {
struct buffer_desc *buffer;
- unsigned short assoc_nents;
- unsigned short src_nents;
struct scatterlist ivlist;
/* used when the hmac is not on one sg entry */
u8 *hmac_virt;
@@ -312,7 +309,7 @@ static struct crypt_ctl *get_crypt_desc_emerg(void)
}
}
-static void free_buf_chain(struct buffer_desc *buf, u32 phys)
+static void free_buf_chain(struct device *dev, struct buffer_desc *buf,u32 phys)
{
while (buf) {
struct buffer_desc *buf1;
@@ -320,6 +317,7 @@ static void free_buf_chain(struct buffer_desc *buf, u32 phys)
buf1 = buf->next;
phys1 = buf->phys_next;
+ dma_unmap_single(dev, buf->phys_next, buf->buf_len, buf->dir);
dma_pool_free(buffer_pool, buf, phys);
buf = buf1;
phys = phys1;
@@ -348,7 +346,6 @@ static void one_packet(dma_addr_t phys)
struct crypt_ctl *crypt;
struct ixp_ctx *ctx;
int failed;
- enum dma_data_direction src_direction = DMA_BIDIRECTIONAL;
failed = phys & 0x1 ? -EBADMSG : 0;
phys &= ~0x3;
@@ -358,13 +355,8 @@ static void one_packet(dma_addr_t phys)
case CTL_FLAG_PERFORM_AEAD: {
struct aead_request *req = crypt->data.aead_req;
struct aead_ctx *req_ctx = aead_request_ctx(req);
- dma_unmap_sg(dev, req->assoc, req_ctx->assoc_nents,
- DMA_TO_DEVICE);
- dma_unmap_sg(dev, &req_ctx->ivlist, 1, DMA_BIDIRECTIONAL);
- dma_unmap_sg(dev, req->src, req_ctx->src_nents,
- DMA_BIDIRECTIONAL);
- free_buf_chain(req_ctx->buffer, crypt->src_buf);
+ free_buf_chain(dev, req_ctx->buffer, crypt->src_buf);
if (req_ctx->hmac_virt) {
finish_scattered_hmac(crypt);
}
@@ -374,16 +366,11 @@ static void one_packet(dma_addr_t phys)
case CTL_FLAG_PERFORM_ABLK: {
struct ablkcipher_request *req = crypt->data.ablk_req;
struct ablk_ctx *req_ctx = ablkcipher_request_ctx(req);
- int nents;
+
if (req_ctx->dst) {
- nents = req_ctx->dst_nents;
- dma_unmap_sg(dev, req->dst, nents, DMA_FROM_DEVICE);
- free_buf_chain(req_ctx->dst, crypt->dst_buf);
- src_direction = DMA_TO_DEVICE;
+ free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
}
- nents = req_ctx->src_nents;
- dma_unmap_sg(dev, req->src, nents, src_direction);
- free_buf_chain(req_ctx->src, crypt->src_buf);
+ free_buf_chain(dev, req_ctx->src, crypt->src_buf);
req->base.complete(&req->base, failed);
break;
}
@@ -750,56 +737,35 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
return 0;
}
-static int count_sg(struct scatterlist *sg, int nbytes)
+static struct buffer_desc *chainup_buffers(struct device *dev,
+ struct scatterlist *sg, unsigned nbytes,
+ struct buffer_desc *buf, gfp_t flags,
+ enum dma_data_direction dir)
{
- int i;
- for (i = 0; nbytes > 0; i++, sg = sg_next(sg))
- nbytes -= sg->length;
- return i;
-}
-
-static struct buffer_desc *chainup_buffers(struct scatterlist *sg,
- unsigned nbytes, struct buffer_desc *buf, gfp_t flags)
-{
- int nents = 0;
-
- while (nbytes > 0) {
+ for (;nbytes > 0; sg = scatterwalk_sg_next(sg)) {
+ unsigned len = min(nbytes, sg->length);
struct buffer_desc *next_buf;
u32 next_buf_phys;
- unsigned len = min(nbytes, sg_dma_len(sg));
+ void *ptr;
- nents++;
nbytes -= len;
- if (!buf->phys_addr) {
- buf->phys_addr = sg_dma_address(sg);
- buf->buf_len = len;
- buf->next = NULL;
- buf->phys_next = 0;
- goto next;
- }
- /* Two consecutive chunks on one page may be handled by the old
- * buffer descriptor, increased by the length of the new one
- */
- if (sg_dma_address(sg) == buf->phys_addr + buf->buf_len) {
- buf->buf_len += len;
- goto next;
- }
+ ptr = page_address(sg_page(sg)) + sg->offset;
next_buf = dma_pool_alloc(buffer_pool, flags, &next_buf_phys);
- if (!next_buf)
- return NULL;
+ if (!next_buf) {
+ buf = NULL;
+ break;
+ }
+ sg_dma_address(sg) = dma_map_single(dev, ptr, len, dir);
buf->next = next_buf;
buf->phys_next = next_buf_phys;
-
buf = next_buf;
- buf->next = NULL;
- buf->phys_next = 0;
+
buf->phys_addr = sg_dma_address(sg);
buf->buf_len = len;
-next:
- if (nbytes > 0) {
- sg = sg_next(sg);
- }
+ buf->dir = dir;
}
+ buf->next = NULL;
+ buf->phys_next = 0;
return buf;
}
@@ -860,12 +826,12 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt)
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct ixp_ctx *ctx = crypto_ablkcipher_ctx(tfm);
unsigned ivsize = crypto_ablkcipher_ivsize(tfm);
- int ret = -ENOMEM;
struct ix_sa_dir *dir;
struct crypt_ctl *crypt;
- unsigned int nbytes = req->nbytes, nents;
+ unsigned int nbytes = req->nbytes;
enum dma_data_direction src_direction = DMA_BIDIRECTIONAL;
struct ablk_ctx *req_ctx = ablkcipher_request_ctx(req);
+ struct buffer_desc src_hook;
gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
GFP_KERNEL : GFP_ATOMIC;
@@ -878,7 +844,7 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt)
crypt = get_crypt_desc();
if (!crypt)
- return ret;
+ return -ENOMEM;
crypt->data.ablk_req = req;
crypt->crypto_ctx = dir->npe_ctx_phys;
@@ -891,53 +857,41 @@ static int ablk_perform(struct ablkcipher_request *req, int encrypt)
BUG_ON(ivsize && !req->info);
memcpy(crypt->iv, req->info, ivsize);
if (req->src != req->dst) {
+ struct buffer_desc dst_hook;
crypt->mode |= NPE_OP_NOT_IN_PLACE;
- nents = count_sg(req->dst, nbytes);
/* This was never tested by Intel
* for more than one dst buffer, I think. */
- BUG_ON(nents != 1);
- req_ctx->dst_nents = nents;
- dma_map_sg(dev, req->dst, nents, DMA_FROM_DEVICE);
- req_ctx->dst = dma_pool_alloc(buffer_pool, flags,&crypt->dst_buf);
- if (!req_ctx->dst)
- goto unmap_sg_dest;
- req_ctx->dst->phys_addr = 0;
- if (!chainup_buffers(req->dst, nbytes, req_ctx->dst, flags))
+ BUG_ON(req->dst->length < nbytes);
+ req_ctx->dst = NULL;
+ if (!chainup_buffers(dev, req->dst, nbytes, &dst_hook,
+ flags, DMA_FROM_DEVICE))
goto free_buf_dest;
src_direction = DMA_TO_DEVICE;
+ req_ctx->dst = dst_hook.next;
+ crypt->dst_buf = dst_hook.phys_next;
} else {
req_ctx->dst = NULL;
- req_ctx->dst_nents = 0;
}
- nents = count_sg(req->src, nbytes);
- req_ctx->src_nents = nents;
- dma_map_sg(dev, req->src, nents, src_direction);
-
- req_ctx->src = dma_pool_alloc(buffer_pool, flags, &crypt->src_buf);
- if (!req_ctx->src)
- goto unmap_sg_src;
- req_ctx->src->phys_addr = 0;
- if (!chainup_buffers(req->src, nbytes, req_ctx->src, flags))
+ req_ctx->src = NULL;
+ if (!chainup_buffers(dev, req->src, nbytes, &src_hook,
+ flags, src_direction))
goto free_buf_src;
+ req_ctx->src = src_hook.next;
+ crypt->src_buf = src_hook.phys_next;
crypt->ctl_flags |= CTL_FLAG_PERFORM_ABLK;
qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
BUG_ON(qmgr_stat_overflow(SEND_QID));
return -EINPROGRESS;
free_buf_src:
- free_buf_chain(req_ctx->src, crypt->src_buf);
-unmap_sg_src:
- dma_unmap_sg(dev, req->src, req_ctx->src_nents, src_direction);
+ free_buf_chain(dev, req_ctx->src, crypt->src_buf);
free_buf_dest:
if (req->src != req->dst) {
- free_buf_chain(req_ctx->dst, crypt->dst_buf);
-unmap_sg_dest:
- dma_unmap_sg(dev, req->src, req_ctx->dst_nents,
- DMA_FROM_DEVICE);
+ free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
}
crypt->ctl_flags = CTL_FLAG_UNUSED;
- return ret;
+ return -ENOMEM;
}
static int ablk_encrypt(struct ablkcipher_request *req)
@@ -985,7 +939,7 @@ static int hmac_inconsistent(struct scatterlist *sg, unsigned start,
break;
offset += sg->length;
- sg = sg_next(sg);
+ sg = scatterwalk_sg_next(sg);
}
return (start + nbytes > offset + sg->length);
}
@@ -997,11 +951,10 @@ static int aead_perform(struct aead_request *req, int encrypt,
struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
unsigned ivsize = crypto_aead_ivsize(tfm);
unsigned authsize = crypto_aead_authsize(tfm);
- int ret = -ENOMEM;
struct ix_sa_dir *dir;
struct crypt_ctl *crypt;
- unsigned int cryptlen, nents;
- struct buffer_desc *buf;
+ unsigned int cryptlen;
+ struct buffer_desc *buf, src_hook;
struct aead_ctx *req_ctx = aead_request_ctx(req);
gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
GFP_KERNEL : GFP_ATOMIC;
@@ -1022,7 +975,7 @@ static int aead_perform(struct aead_request *req, int encrypt,
}
crypt = get_crypt_desc();
if (!crypt)
- return ret;
+ return -ENOMEM;
crypt->data.aead_req = req;
crypt->crypto_ctx = dir->npe_ctx_phys;
@@ -1041,31 +994,27 @@ static int aead_perform(struct aead_request *req, int encrypt,
BUG(); /* -ENOTSUP because of my lazyness */
}
- req_ctx->buffer = dma_pool_alloc(buffer_pool, flags, &crypt->src_buf);
- if (!req_ctx->buffer)
- goto out;
- req_ctx->buffer->phys_addr = 0;
/* ASSOC data */
- nents = count_sg(req->assoc, req->assoclen);
- req_ctx->assoc_nents = nents;
- dma_map_sg(dev, req->assoc, nents, DMA_TO_DEVICE);
- buf = chainup_buffers(req->assoc, req->assoclen, req_ctx->buffer,flags);
+ buf = chainup_buffers(dev, req->assoc, req->assoclen, &src_hook,
+ flags, DMA_TO_DEVICE);
+ req_ctx->buffer = src_hook.next;
+ crypt->src_buf = src_hook.phys_next;
if (!buf)
- goto unmap_sg_assoc;
+ goto out;
/* IV */
sg_init_table(&req_ctx->ivlist, 1);
sg_set_buf(&req_ctx->ivlist, iv, ivsize);
- dma_map_sg(dev, &req_ctx->ivlist, 1, DMA_BIDIRECTIONAL);
- buf = chainup_buffers(&req_ctx->ivlist, ivsize, buf, flags);
+ buf = chainup_buffers(dev, &req_ctx->ivlist, ivsize, buf, flags,
+ DMA_BIDIRECTIONAL);
if (!buf)
- goto unmap_sg_iv;
+ goto free_chain;
if (unlikely(hmac_inconsistent(req->src, cryptlen, authsize))) {
/* The 12 hmac bytes are scattered,
* we need to copy them into a safe buffer */
req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags,
&crypt->icv_rev_aes);
if (unlikely(!req_ctx->hmac_virt))
- goto unmap_sg_iv;
+ goto free_chain;
if (!encrypt) {
scatterwalk_map_and_copy(req_ctx->hmac_virt,
req->src, cryptlen, authsize, 0);
@@ -1075,33 +1024,28 @@ static int aead_perform(struct aead_request *req, int encrypt,
req_ctx->hmac_virt = NULL;
}
/* Crypt */
- nents = count_sg(req->src, cryptlen + authsize);
- req_ctx->src_nents = nents;
- dma_map_sg(dev, req->src, nents, DMA_BIDIRECTIONAL);
- buf = chainup_buffers(req->src, cryptlen + authsize, buf, flags);
+ buf = chainup_buffers(dev, req->src, cryptlen + authsize, buf, flags,
+ DMA_BIDIRECTIONAL);
if (!buf)
- goto unmap_sg_src;
+ goto free_hmac_virt;
if (!req_ctx->hmac_virt) {
crypt->icv_rev_aes = buf->phys_addr + buf->buf_len - authsize;
}
+
crypt->ctl_flags |= CTL_FLAG_PERFORM_AEAD;
qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
BUG_ON(qmgr_stat_overflow(SEND_QID));
return -EINPROGRESS;
-unmap_sg_src:
- dma_unmap_sg(dev, req->src, req_ctx->src_nents, DMA_BIDIRECTIONAL);
+free_hmac_virt:
if (req_ctx->hmac_virt) {
dma_pool_free(buffer_pool, req_ctx->hmac_virt,
crypt->icv_rev_aes);
}
-unmap_sg_iv:
- dma_unmap_sg(dev, &req_ctx->ivlist, 1, DMA_BIDIRECTIONAL);
-unmap_sg_assoc:
- dma_unmap_sg(dev, req->assoc, req_ctx->assoc_nents, DMA_TO_DEVICE);
- free_buf_chain(req_ctx->buffer, crypt->src_buf);
+free_chain:
+ free_buf_chain(dev, req_ctx->buffer, crypt->src_buf);
out:
crypt->ctl_flags = CTL_FLAG_UNUSED;
- return ret;
+ return -ENOMEM;
}
static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 48ea59e7967..3b3c01b6f1e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -98,6 +98,17 @@ config NET_DMA
Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise
say N.
+config ASYNC_TX_DMA
+ bool "Async_tx: Offload support for the async_tx api"
+ depends on DMA_ENGINE
+ help
+ This allows the async_tx api to take advantage of offload engines for
+ memcpy, memset, xor, and raid6 p+q operations. If your platform has
+ a dma engine that can perform raid operations and you have enabled
+ MD_RAID456 say Y.
+
+ If unsure, say N.
+
config DMATEST
tristate "DMA Test client"
depends on DMA_ENGINE
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 280a9d263eb..92438e9dacc 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -507,6 +507,7 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
* published in the general-purpose allocator
*/
dma_cap_set(DMA_PRIVATE, device->cap_mask);
+ device->privatecnt++;
err = dma_chan_get(chan);
if (err == -ENODEV) {
@@ -518,6 +519,8 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
dma_chan_name(chan), err);
else
break;
+ if (--device->privatecnt == 0)
+ dma_cap_clear(DMA_PRIVATE, device->cap_mask);
chan->private = NULL;
chan = NULL;
}
@@ -537,6 +540,9 @@ void dma_release_channel(struct dma_chan *chan)
WARN_ONCE(chan->client_count != 1,
"chan reference count %d != 1\n", chan->client_count);
dma_chan_put(chan);
+ /* drop PRIVATE cap enabled by __dma_request_channel() */
+ if (--chan->device->privatecnt == 0)
+ dma_cap_clear(DMA_PRIVATE, chan->device->cap_mask);
chan->private = NULL;
mutex_unlock(&dma_list_mutex);
}
@@ -602,6 +608,24 @@ void dmaengine_put(void)
}
EXPORT_SYMBOL(dmaengine_put);
+static int get_dma_id(struct dma_device *device)
+{
+ int rc;
+
+ idr_retry:
+ if (!idr_pre_get(&dma_idr, GFP_KERNEL))
+ return -ENOMEM;
+ mutex_lock(&dma_list_mutex);
+ rc = idr_get_new(&dma_idr, NULL, &device->dev_id);
+ mutex_unlock(&dma_list_mutex);
+ if (rc == -EAGAIN)
+ goto idr_retry;
+ else if (rc != 0)
+ return rc;
+
+ return 0;
+}
+
/**
* dma_async_device_register - registers DMA devices found
* @device: &dma_device
@@ -640,27 +664,25 @@ int dma_async_device_register(struct dma_device *device)
idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
if (!idr_ref)
return -ENOMEM;
- atomic_set(idr_ref, 0);
- idr_retry:
- if (!idr_pre_get(&dma_idr, GFP_KERNEL))
- return -ENOMEM;
- mutex_lock(&dma_list_mutex);
- rc = idr_get_new(&dma_idr, NULL, &device->dev_id);
- mutex_unlock(&dma_list_mutex);
- if (rc == -EAGAIN)
- goto idr_retry;
- else if (rc != 0)
+ rc = get_dma_id(device);
+ if (rc != 0) {
+ kfree(idr_ref);
return rc;
+ }
+
+ atomic_set(idr_ref, 0);
/* represent channels in sysfs. Probably want devs too */
list_for_each_entry(chan, &device->channels, device_node) {
+ rc = -ENOMEM;
chan->local = alloc_percpu(typeof(*chan->local));
if (chan->local == NULL)
- continue;
+ goto err_out;
chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
if (chan->dev == NULL) {
free_percpu(chan->local);
- continue;
+ chan->local = NULL;
+ goto err_out;
}
chan->chan_id = chancnt++;
@@ -677,6 +699,8 @@ int dma_async_device_register(struct dma_device *device)
if (rc) {
free_percpu(chan->local);
chan->local = NULL;
+ kfree(chan->dev);
+ atomic_dec(idr_ref);
goto err_out;
}
chan->client_count = 0;
@@ -701,12 +725,23 @@ int dma_async_device_register(struct dma_device *device)
}
}
list_add_tail_rcu(&device->global_node, &dma_device_list);
+ if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+ device->privatecnt++; /* Always private */
dma_channel_rebalance();
mutex_unlock(&dma_list_mutex);
return 0;
err_out:
+ /* if we never registered a channel just release the idr */
+ if (atomic_read(idr_ref) == 0) {
+ mutex_lock(&dma_list_mutex);
+ idr_remove(&dma_idr, device->dev_id);
+ mutex_unlock(&dma_list_mutex);
+ kfree(idr_ref);
+ return rc;
+ }
+
list_for_each_entry(chan, &device->channels, device_node) {
if (chan->local == NULL)
continue;
@@ -893,6 +928,7 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
{
tx->chan = chan;
spin_lock_init(&tx->lock);
+ INIT_LIST_HEAD(&tx->tx_list);
}
EXPORT_SYMBOL(dma_async_tx_descriptor_init);
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index e190d8b3070..a27c0fb1bc1 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -38,6 +38,11 @@ module_param(max_channels, uint, S_IRUGO);
MODULE_PARM_DESC(max_channels,
"Maximum number of channels to use (default: all)");
+static unsigned int xor_sources = 3;
+module_param(xor_sources, uint, S_IRUGO);
+MODULE_PARM_DESC(xor_sources,
+ "Number of xor source buffers (default: 3)");
+
/*
* Initialization patterns. All bytes in the source buffer has bit 7
* set, all bytes in the destination buffer has bit 7 cleared.
@@ -59,8 +64,9 @@ struct dmatest_thread {
struct list_head node;
struct task_struct *task;
struct dma_chan *chan;
- u8 *srcbuf;
- u8 *dstbuf;
+ u8 **srcs;
+ u8 **dsts;
+ enum dma_transaction_type type;
};
struct dmatest_chan {
@@ -98,30 +104,37 @@ static unsigned long dmatest_random(void)
return buf;
}
-static void dmatest_init_srcbuf(u8 *buf, unsigned int start, unsigned int len)
+static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len)
{
unsigned int i;
-
- for (i = 0; i < start; i++)
- buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
- for ( ; i < start + len; i++)
- buf[i] = PATTERN_SRC | PATTERN_COPY
- | (~i & PATTERN_COUNT_MASK);;
- for ( ; i < test_buf_size; i++)
- buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+ u8 *buf;
+
+ for (; (buf = *bufs); bufs++) {
+ for (i = 0; i < start; i++)
+ buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+ for ( ; i < start + len; i++)
+ buf[i] = PATTERN_SRC | PATTERN_COPY
+ | (~i & PATTERN_COUNT_MASK);;
+ for ( ; i < test_buf_size; i++)
+ buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+ buf++;
+ }
}
-static void dmatest_init_dstbuf(u8 *buf, unsigned int start, unsigned int len)
+static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len)
{
unsigned int i;
-
- for (i = 0; i < start; i++)
- buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
- for ( ; i < start + len; i++)
- buf[i] = PATTERN_DST | PATTERN_OVERWRITE
- | (~i & PATTERN_COUNT_MASK);
- for ( ; i < test_buf_size; i++)
- buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+ u8 *buf;
+
+ for (; (buf = *bufs); bufs++) {
+ for (i = 0; i < start; i++)
+ buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+ for ( ; i < start + len; i++)
+ buf[i] = PATTERN_DST | PATTERN_OVERWRITE
+ | (~i & PATTERN_COUNT_MASK);
+ for ( ; i < test_buf_size; i++)
+ buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+ }
}
static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
@@ -150,23 +163,30 @@ static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
thread_name, index, expected, actual);
}
-static unsigned int dmatest_verify(u8 *buf, unsigned int start,
+static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
unsigned int end, unsigned int counter, u8 pattern,
bool is_srcbuf)
{
unsigned int i;
unsigned int error_count = 0;
u8 actual;
-
- for (i = start; i < end; i++) {
- actual = buf[i];
- if (actual != (pattern | (~counter & PATTERN_COUNT_MASK))) {
- if (error_count < 32)
- dmatest_mismatch(actual, pattern, i, counter,
- is_srcbuf);
- error_count++;
+ u8 expected;
+ u8 *buf;
+ unsigned int counter_orig = counter;
+
+ for (; (buf = *bufs); bufs++) {
+ counter = counter_orig;
+ for (i = start; i < end; i++) {
+ actual = buf[i];
+ expected = pattern | (~counter & PATTERN_COUNT_MASK);
+ if (actual != expected) {
+ if (error_count < 32)
+ dmatest_mismatch(actual, pattern, i,
+ counter, is_srcbuf);
+ error_count++;
+ }
+ counter++;
}
- counter++;
}
if (error_count > 32)
@@ -176,12 +196,17 @@ static unsigned int dmatest_verify(u8 *buf, unsigned int start,
return error_count;
}
+static void dmatest_callback(void *completion)
+{
+ complete(completion);
+}
+
/*
* This function repeatedly tests DMA transfers of various lengths and
- * offsets until it is told to exit by kthread_stop(). There may be
- * multiple threads running this function in parallel for a single
- * channel, and there may be multiple channels being tested in
- * parallel.
+ * offsets for a given operation type until it is told to exit by
+ * kthread_stop(). There may be multiple threads running this function
+ * in parallel for a single channel, and there may be multiple channels
+ * being tested in parallel.
*
* Before each test, the source and destination buffer is initialized
* with a known pattern. This pattern is different depending on
@@ -201,25 +226,57 @@ static int dmatest_func(void *data)
unsigned int total_tests = 0;
dma_cookie_t cookie;
enum dma_status status;
+ enum dma_ctrl_flags flags;
int ret;
+ int src_cnt;
+ int dst_cnt;
+ int i;
thread_name = current->comm;
ret = -ENOMEM;
- thread->srcbuf = kmalloc(test_buf_size, GFP_KERNEL);
- if (!thread->srcbuf)
- goto err_srcbuf;
- thread->dstbuf = kmalloc(test_buf_size, GFP_KERNEL);
- if (!thread->dstbuf)
- goto err_dstbuf;
smp_rmb();
chan = thread->chan;
+ if (thread->type == DMA_MEMCPY)
+ src_cnt = dst_cnt = 1;
+ else if (thread->type == DMA_XOR) {
+ src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
+ dst_cnt = 1;
+ } else
+ goto err_srcs;
+
+ thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL);
+ if (!thread->srcs)
+ goto err_srcs;
+ for (i = 0; i < src_cnt; i++) {
+ thread->srcs[i] = kmalloc(test_buf_size, GFP_KERNEL);
+ if (!thread->srcs[i])
+ goto err_srcbuf;
+ }
+ thread->srcs[i] = NULL;
+
+ thread->dsts = kcalloc(dst_cnt+1, sizeof(u8 *), GFP_KERNEL);
+ if (!thread->dsts)
+ goto err_dsts;
+ for (i = 0; i < dst_cnt; i++) {
+ thread->dsts[i] = kmalloc(test_buf_size, GFP_KERNEL);
+ if (!thread->dsts[i])
+ goto err_dstbuf;
+ }
+ thread->dsts[i] = NULL;
+
+ set_user_nice(current, 10);
+
+ flags = DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP | DMA_PREP_INTERRUPT;
while (!kthread_should_stop()) {
struct dma_device *dev = chan->device;
- struct dma_async_tx_descriptor *tx;
- dma_addr_t dma_src, dma_dest;
+ struct dma_async_tx_descriptor *tx = NULL;
+ dma_addr_t dma_srcs[src_cnt];
+ dma_addr_t dma_dsts[dst_cnt];
+ struct completion cmp;
+ unsigned long tmo = msecs_to_jiffies(3000);
total_tests++;
@@ -227,22 +284,41 @@ static int dmatest_func(void *data)
src_off = dmatest_random() % (test_buf_size - len + 1);
dst_off = dmatest_random() % (test_buf_size - len + 1);
- dmatest_init_srcbuf(thread->srcbuf, src_off, len);
- dmatest_init_dstbuf(thread->dstbuf, dst_off, len);
+ dmatest_init_srcs(thread->srcs, src_off, len);
+ dmatest_init_dsts(thread->dsts, dst_off, len);
- dma_src = dma_map_single(dev->dev, thread->srcbuf + src_off,
- len, DMA_TO_DEVICE);
+ for (i = 0; i < src_cnt; i++) {
+ u8 *buf = thread->srcs[i] + src_off;
+
+ dma_srcs[i] = dma_map_single(dev->dev, buf, len,
+ DMA_TO_DEVICE);
+ }
/* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
- dma_dest = dma_map_single(dev->dev, thread->dstbuf,
- test_buf_size, DMA_BIDIRECTIONAL);
+ for (i = 0; i < dst_cnt; i++) {
+ dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i],
+ test_buf_size,
+ DMA_BIDIRECTIONAL);
+ }
+
+ if (thread->type == DMA_MEMCPY)
+ tx = dev->device_prep_dma_memcpy(chan,
+ dma_dsts[0] + dst_off,
+ dma_srcs[0], len,
+ flags);
+ else if (thread->type == DMA_XOR)
+ tx = dev->device_prep_dma_xor(chan,
+ dma_dsts[0] + dst_off,
+ dma_srcs, xor_sources,
+ len, flags);
- tx = dev->device_prep_dma_memcpy(chan, dma_dest + dst_off,
- dma_src, len,
- DMA_CTRL_ACK | DMA_COMPL_SKIP_DEST_UNMAP);
if (!tx) {
- dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
- dma_unmap_single(dev->dev, dma_dest,
- test_buf_size, DMA_BIDIRECTIONAL);
+ for (i = 0; i < src_cnt; i++)
+ dma_unmap_single(dev->dev, dma_srcs[i], len,
+ DMA_TO_DEVICE);
+ for (i = 0; i < dst_cnt; i++)
+ dma_unmap_single(dev->dev, dma_dsts[i],
+ test_buf_size,
+ DMA_BIDIRECTIONAL);
pr_warning("%s: #%u: prep error with src_off=0x%x "
"dst_off=0x%x len=0x%x\n",
thread_name, total_tests - 1,
@@ -251,7 +327,10 @@ static int dmatest_func(void *data)
failed_tests++;
continue;
}
- tx->callback = NULL;
+
+ init_completion(&cmp);
+ tx->callback = dmatest_callback;
+ tx->callback_param = &cmp;
cookie = tx->tx_submit(tx);
if (dma_submit_error(cookie)) {
@@ -263,44 +342,50 @@ static int dmatest_func(void *data)
failed_tests++;
continue;
}
- dma_async_memcpy_issue_pending(chan);
+ dma_async_issue_pending(chan);
- do {
- msleep(1);
- status = dma_async_memcpy_complete(
- chan, cookie, NULL, NULL);
- } while (status == DMA_IN_PROGRESS);
+ tmo = wait_for_completion_timeout(&cmp, tmo);
+ status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
- if (status == DMA_ERROR) {
- pr_warning("%s: #%u: error during copy\n",
- thread_name, total_tests - 1);
+ if (tmo == 0) {
+ pr_warning("%s: #%u: test timed out\n",
+ thread_name, total_tests - 1);
+ failed_tests++;
+ continue;
+ } else if (status != DMA_SUCCESS) {
+ pr_warning("%s: #%u: got completion callback,"
+ " but status is \'%s\'\n",
+ thread_name, total_tests - 1,
+ status == DMA_ERROR ? "error" : "in progress");
failed_tests++;
continue;
}
+
/* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */
- dma_unmap_single(dev->dev, dma_dest,
- test_buf_size, DMA_BIDIRECTIONAL);
+ for (i = 0; i < dst_cnt; i++)
+ dma_unmap_single(dev->dev, dma_dsts[i], test_buf_size,
+ DMA_BIDIRECTIONAL);
error_count = 0;
pr_debug("%s: verifying source buffer...\n", thread_name);
- error_count += dmatest_verify(thread->srcbuf, 0, src_off,
+ error_count += dmatest_verify(thread->srcs, 0, src_off,
0, PATTERN_SRC, true);
- error_count += dmatest_verify(thread->srcbuf, src_off,
+ error_count += dmatest_verify(thread->srcs, src_off,
src_off + len, src_off,
PATTERN_SRC | PATTERN_COPY, true);
- error_count += dmatest_verify(thread->srcbuf, src_off + len,
+ error_count += dmatest_verify(thread->srcs, src_off + len,
test_buf_size, src_off + len,
PATTERN_SRC, true);
pr_debug("%s: verifying dest buffer...\n",
thread->task->comm);
- error_count += dmatest_verify(thread->dstbuf, 0, dst_off,
+ error_count += dmatest_verify(thread->dsts, 0, dst_off,
0, PATTERN_DST, false);
- error_count += dmatest_verify(thread->dstbuf, dst_off,
+ error_count += dmatest_verify(thread->dsts, dst_off,
dst_off + len, src_off,
PATTERN_SRC | PATTERN_COPY, false);
- error_count += dmatest_verify(thread->dstbuf, dst_off + len,
+ error_count += dmatest_verify(thread->dsts, dst_off + len,
test_buf_size, dst_off + len,
PATTERN_DST, false);
@@ -319,10 +404,16 @@ static int dmatest_func(void *data)
}
ret = 0;
- kfree(thread->dstbuf);
+ for (i = 0; thread->dsts[i]; i++)
+ kfree(thread->dsts[i]);
err_dstbuf:
- kfree(thread->srcbuf);
+ kfree(thread->dsts);
+err_dsts:
+ for (i = 0; thread->srcs[i]; i++)
+ kfree(thread->srcs[i]);
err_srcbuf:
+ kfree(thread->srcs);
+err_srcs:
pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
thread_name, total_tests, failed_tests, ret);
return ret;
@@ -344,35 +435,36 @@ static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
kfree(dtc);
}
-static int dmatest_add_channel(struct dma_chan *chan)
+static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_type type)
{
- struct dmatest_chan *dtc;
- struct dmatest_thread *thread;
- unsigned int i;
-
- dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
- if (!dtc) {
- pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan));
- return -ENOMEM;
- }
+ struct dmatest_thread *thread;
+ struct dma_chan *chan = dtc->chan;
+ char *op;
+ unsigned int i;
- dtc->chan = chan;
- INIT_LIST_HEAD(&dtc->threads);
+ if (type == DMA_MEMCPY)
+ op = "copy";
+ else if (type == DMA_XOR)
+ op = "xor";
+ else
+ return -EINVAL;
for (i = 0; i < threads_per_chan; i++) {
thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
if (!thread) {
- pr_warning("dmatest: No memory for %s-test%u\n",
- dma_chan_name(chan), i);
+ pr_warning("dmatest: No memory for %s-%s%u\n",
+ dma_chan_name(chan), op, i);
+
break;
}
thread->chan = dtc->chan;
+ thread->type = type;
smp_wmb();
- thread->task = kthread_run(dmatest_func, thread, "%s-test%u",
- dma_chan_name(chan), i);
+ thread->task = kthread_run(dmatest_func, thread, "%s-%s%u",
+ dma_chan_name(chan), op, i);
if (IS_ERR(thread->task)) {
- pr_warning("dmatest: Failed to run thread %s-test%u\n",
- dma_chan_name(chan), i);
+ pr_warning("dmatest: Failed to run thread %s-%s%u\n",
+ dma_chan_name(chan), op, i);
kfree(thread);
break;
}
@@ -382,7 +474,36 @@ static int dmatest_add_channel(struct dma_chan *chan)
list_add_tail(&thread->node, &dtc->threads);
}
- pr_info("dmatest: Started %u threads using %s\n", i, dma_chan_name(chan));
+ return i;
+}
+
+static int dmatest_add_channel(struct dma_chan *chan)
+{
+ struct dmatest_chan *dtc;
+ struct dma_device *dma_dev = chan->device;
+ unsigned int thread_count = 0;
+ unsigned int cnt;
+
+ dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
+ if (!dtc) {
+ pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan));
+ return -ENOMEM;
+ }
+
+ dtc->chan = chan;
+ INIT_LIST_HEAD(&dtc->threads);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ cnt = dmatest_add_threads(dtc, DMA_MEMCPY);
+ thread_count += cnt > 0 ?: 0;
+ }
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ cnt = dmatest_add_threads(dtc, DMA_XOR);
+ thread_count += cnt > 0 ?: 0;
+ }
+
+ pr_info("dmatest: Started %u threads using %s\n",
+ thread_count, dma_chan_name(chan));
list_add_tail(&dtc->node, &dmatest_channels);
nr_channels++;
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index 20ad3d26bec..98c9a847bf5 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -363,6 +363,82 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
dwc_descriptor_complete(dwc, bad_desc);
}
+/* --------------------- Cyclic DMA API extensions -------------------- */
+
+inline dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ return channel_readl(dwc, SAR);
+}
+EXPORT_SYMBOL(dw_dma_get_src_addr);
+
+inline dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ return channel_readl(dwc, DAR);
+}
+EXPORT_SYMBOL(dw_dma_get_dst_addr);
+
+/* called with dwc->lock held and all DMAC interrupts disabled */
+static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
+ u32 status_block, u32 status_err, u32 status_xfer)
+{
+ if (status_block & dwc->mask) {
+ void (*callback)(void *param);
+ void *callback_param;
+
+ dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n",
+ channel_readl(dwc, LLP));
+ dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+
+ callback = dwc->cdesc->period_callback;
+ callback_param = dwc->cdesc->period_callback_param;
+ if (callback) {
+ spin_unlock(&dwc->lock);
+ callback(callback_param);
+ spin_lock(&dwc->lock);
+ }
+ }
+
+ /*
+ * Error and transfer complete are highly unlikely, and will most
+ * likely be due to a configuration error by the user.
+ */
+ if (unlikely(status_err & dwc->mask) ||
+ unlikely(status_xfer & dwc->mask)) {
+ int i;
+
+ dev_err(chan2dev(&dwc->chan), "cyclic DMA unexpected %s "
+ "interrupt, stopping DMA transfer\n",
+ status_xfer ? "xfer" : "error");
+ dev_err(chan2dev(&dwc->chan),
+ " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+ channel_readl(dwc, SAR),
+ channel_readl(dwc, DAR),
+ channel_readl(dwc, LLP),
+ channel_readl(dwc, CTL_HI),
+ channel_readl(dwc, CTL_LO));
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+
+ /* make sure DMA does not restart by loading a new list */
+ channel_writel(dwc, LLP, 0);
+ channel_writel(dwc, CTL_LO, 0);
+ channel_writel(dwc, CTL_HI, 0);
+
+ dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+ dma_writel(dw, CLEAR.ERROR, dwc->mask);
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+ for (i = 0; i < dwc->cdesc->periods; i++)
+ dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli);
+ }
+}
+
+/* ------------------------------------------------------------------------- */
+
static void dw_dma_tasklet(unsigned long data)
{
struct dw_dma *dw = (struct dw_dma *)data;
@@ -382,7 +458,10 @@ static void dw_dma_tasklet(unsigned long data)
for (i = 0; i < dw->dma.chancnt; i++) {
dwc = &dw->chan[i];
spin_lock(&dwc->lock);
- if (status_err & (1 << i))
+ if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags))
+ dwc_handle_cyclic(dw, dwc, status_block, status_err,
+ status_xfer);
+ else if (status_err & (1 << i))
dwc_handle_error(dw, dwc);
else if ((status_block | status_xfer) & (1 << i))
dwc_scan_descriptors(dw, dwc);
@@ -826,7 +905,6 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
dma_async_tx_descriptor_init(&desc->txd, chan);
desc->txd.tx_submit = dwc_tx_submit;
desc->txd.flags = DMA_CTRL_ACK;
- INIT_LIST_HEAD(&desc->txd.tx_list);
desc->txd.phys = dma_map_single(chan2parent(chan), &desc->lli,
sizeof(desc->lli), DMA_TO_DEVICE);
dwc_desc_put(dwc, desc);
@@ -884,6 +962,257 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
dev_vdbg(chan2dev(chan), "free_chan_resources done\n");
}
+/* --------------------- Cyclic DMA API extensions -------------------- */
+
+/**
+ * dw_dma_cyclic_start - start the cyclic DMA transfer
+ * @chan: the DMA channel to start
+ *
+ * Must be called with soft interrupts disabled. Returns zero on success or
+ * -errno on failure.
+ */
+int dw_dma_cyclic_start(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+ if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) {
+ dev_err(chan2dev(&dwc->chan), "missing prep for cyclic DMA\n");
+ return -ENODEV;
+ }
+
+ spin_lock(&dwc->lock);
+
+ /* assert channel is idle */
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_err(chan2dev(&dwc->chan),
+ "BUG: Attempted to start non-idle channel\n");
+ dev_err(chan2dev(&dwc->chan),
+ " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+ channel_readl(dwc, SAR),
+ channel_readl(dwc, DAR),
+ channel_readl(dwc, LLP),
+ channel_readl(dwc, CTL_HI),
+ channel_readl(dwc, CTL_LO));
+ spin_unlock(&dwc->lock);
+ return -EBUSY;
+ }
+
+ dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+ dma_writel(dw, CLEAR.ERROR, dwc->mask);
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+ /* setup DMAC channel registers */
+ channel_writel(dwc, LLP, dwc->cdesc->desc[0]->txd.phys);
+ channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ channel_writel(dwc, CTL_HI, 0);
+
+ channel_set_bit(dw, CH_EN, dwc->mask);
+
+ spin_unlock(&dwc->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(dw_dma_cyclic_start);
+
+/**
+ * dw_dma_cyclic_stop - stop the cyclic DMA transfer
+ * @chan: the DMA channel to stop
+ *
+ * Must be called with soft interrupts disabled.
+ */
+void dw_dma_cyclic_stop(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+ spin_lock(&dwc->lock);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+
+ spin_unlock(&dwc->lock);
+}
+EXPORT_SYMBOL(dw_dma_cyclic_stop);
+
+/**
+ * dw_dma_cyclic_prep - prepare the cyclic DMA transfer
+ * @chan: the DMA channel to prepare
+ * @buf_addr: physical DMA address where the buffer starts
+ * @buf_len: total number of bytes for the entire buffer
+ * @period_len: number of bytes for each period
+ * @direction: transfer direction, to or from device
+ *
+ * Must be called before trying to start the transfer. Returns a valid struct
+ * dw_cyclic_desc if successful or an ERR_PTR(-errno) if not successful.
+ */
+struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
+ dma_addr_t buf_addr, size_t buf_len, size_t period_len,
+ enum dma_data_direction direction)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_cyclic_desc *cdesc;
+ struct dw_cyclic_desc *retval = NULL;
+ struct dw_desc *desc;
+ struct dw_desc *last = NULL;
+ struct dw_dma_slave *dws = chan->private;
+ unsigned long was_cyclic;
+ unsigned int reg_width;
+ unsigned int periods;
+ unsigned int i;
+
+ spin_lock_bh(&dwc->lock);
+ if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) {
+ spin_unlock_bh(&dwc->lock);
+ dev_dbg(chan2dev(&dwc->chan),
+ "queue and/or active list are not empty\n");
+ return ERR_PTR(-EBUSY);
+ }
+
+ was_cyclic = test_and_set_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+ spin_unlock_bh(&dwc->lock);
+ if (was_cyclic) {
+ dev_dbg(chan2dev(&dwc->chan),
+ "channel already prepared for cyclic DMA\n");
+ return ERR_PTR(-EBUSY);
+ }
+
+ retval = ERR_PTR(-EINVAL);
+ reg_width = dws->reg_width;
+ periods = buf_len / period_len;
+
+ /* Check for too big/unaligned periods and unaligned DMA buffer. */
+ if (period_len > (DWC_MAX_COUNT << reg_width))
+ goto out_err;
+ if (unlikely(period_len & ((1 << reg_width) - 1)))
+ goto out_err;
+ if (unlikely(buf_addr & ((1 << reg_width) - 1)))
+ goto out_err;
+ if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE))))
+ goto out_err;
+
+ retval = ERR_PTR(-ENOMEM);
+
+ if (periods > NR_DESCS_PER_CHANNEL)
+ goto out_err;
+
+ cdesc = kzalloc(sizeof(struct dw_cyclic_desc), GFP_KERNEL);
+ if (!cdesc)
+ goto out_err;
+
+ cdesc->desc = kzalloc(sizeof(struct dw_desc *) * periods, GFP_KERNEL);
+ if (!cdesc->desc)
+ goto out_err_alloc;
+
+ for (i = 0; i < periods; i++) {
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto out_err_desc_get;
+
+ switch (direction) {
+ case DMA_TO_DEVICE:
+ desc->lli.dar = dws->tx_reg;
+ desc->lli.sar = buf_addr + (period_len * i);
+ desc->lli.ctllo = (DWC_DEFAULT_CTLLO
+ | DWC_CTLL_DST_WIDTH(reg_width)
+ | DWC_CTLL_SRC_WIDTH(reg_width)
+ | DWC_CTLL_DST_FIX
+ | DWC_CTLL_SRC_INC
+ | DWC_CTLL_FC_M2P
+ | DWC_CTLL_INT_EN);
+ break;
+ case DMA_FROM_DEVICE:
+ desc->lli.dar = buf_addr + (period_len * i);
+ desc->lli.sar = dws->rx_reg;
+ desc->lli.ctllo = (DWC_DEFAULT_CTLLO
+ | DWC_CTLL_SRC_WIDTH(reg_width)
+ | DWC_CTLL_DST_WIDTH(reg_width)
+ | DWC_CTLL_DST_INC
+ | DWC_CTLL_SRC_FIX
+ | DWC_CTLL_FC_P2M
+ | DWC_CTLL_INT_EN);
+ break;
+ default:
+ break;
+ }
+
+ desc->lli.ctlhi = (period_len >> reg_width);
+ cdesc->desc[i] = desc;
+
+ if (last) {
+ last->lli.llp = desc->txd.phys;
+ dma_sync_single_for_device(chan2parent(chan),
+ last->txd.phys, sizeof(last->lli),
+ DMA_TO_DEVICE);
+ }
+
+ last = desc;
+ }
+
+ /* lets make a cyclic list */
+ last->lli.llp = cdesc->desc[0]->txd.phys;
+ dma_sync_single_for_device(chan2parent(chan), last->txd.phys,
+ sizeof(last->lli), DMA_TO_DEVICE);
+
+ dev_dbg(chan2dev(&dwc->chan), "cyclic prepared buf 0x%08x len %zu "
+ "period %zu periods %d\n", buf_addr, buf_len,
+ period_len, periods);
+
+ cdesc->periods = periods;
+ dwc->cdesc = cdesc;
+
+ return cdesc;
+
+out_err_desc_get:
+ while (i--)
+ dwc_desc_put(dwc, cdesc->desc[i]);
+out_err_alloc:
+ kfree(cdesc);
+out_err:
+ clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+ return (struct dw_cyclic_desc *)retval;
+}
+EXPORT_SYMBOL(dw_dma_cyclic_prep);
+
+/**
+ * dw_dma_cyclic_free - free a prepared cyclic DMA transfer
+ * @chan: the DMA channel to free
+ */
+void dw_dma_cyclic_free(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+ struct dw_cyclic_desc *cdesc = dwc->cdesc;
+ int i;
+
+ dev_dbg(chan2dev(&dwc->chan), "cyclic free\n");
+
+ if (!cdesc)
+ return;
+
+ spin_lock_bh(&dwc->lock);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+
+ dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+ dma_writel(dw, CLEAR.ERROR, dwc->mask);
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+ spin_unlock_bh(&dwc->lock);
+
+ for (i = 0; i < cdesc->periods; i++)
+ dwc_desc_put(dwc, cdesc->desc[i]);
+
+ kfree(cdesc->desc);
+ kfree(cdesc);
+
+ clear_bit(DW_DMA_IS_CYCLIC, &dwc->flags);
+}
+EXPORT_SYMBOL(dw_dma_cyclic_free);
+
/*----------------------------------------------------------------------*/
static void dw_dma_off(struct dw_dma *dw)
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
index b252b202c5c..13a58076703 100644
--- a/drivers/dma/dw_dmac_regs.h
+++ b/drivers/dma/dw_dmac_regs.h
@@ -126,6 +126,10 @@ struct dw_dma_regs {
#define DW_REGLEN 0x400
+enum dw_dmac_flags {
+ DW_DMA_IS_CYCLIC = 0,
+};
+
struct dw_dma_chan {
struct dma_chan chan;
void __iomem *ch_regs;
@@ -134,10 +138,12 @@ struct dw_dma_chan {
spinlock_t lock;
/* these other elements are all protected by lock */
+ unsigned long flags;
dma_cookie_t completed;
struct list_head active_list;
struct list_head queue;
struct list_head free_list;
+ struct dw_cyclic_desc *cdesc;
unsigned int descs_allocated;
};
@@ -158,7 +164,6 @@ static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
return container_of(chan, struct dw_dma_chan, chan);
}
-
struct dw_dma {
struct dma_device dma;
void __iomem *regs;
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 86d6da47f55..da8a8ed9e41 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -354,7 +354,6 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
dma_async_tx_descriptor_init(&desc_sw->async_tx,
&fsl_chan->common);
desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
- INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
desc_sw->async_tx.phys = pdesc;
}
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index 5905cd36bcd..e4fc33c1c32 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -693,7 +693,6 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
desc_sw->async_tx.tx_submit = ioat2_tx_submit;
break;
}
- INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
desc_sw->hw = desc;
desc_sw->async_tx.phys = phys;
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 16adbe61cfb..2f052265122 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -498,7 +498,6 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
slot->async_tx.tx_submit = iop_adma_tx_submit;
INIT_LIST_HEAD(&slot->chain_node);
INIT_LIST_HEAD(&slot->slot_node);
- INIT_LIST_HEAD(&slot->async_tx.tx_list);
hw_desc = (char *) iop_chan->device->dma_desc_pool;
slot->async_tx.phys =
(dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index da781d10789..e202a6ce557 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -28,6 +28,9 @@
#define FS_VF_IN_VALID 0x00000002
#define FS_ENC_IN_VALID 0x00000001
+static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
+ bool wait_for_stop);
+
/*
* There can be only one, we could allocate it dynamically, but then we'd have
* to add an extra parameter to some functions, and use something as ugly as
@@ -107,7 +110,7 @@ static uint32_t bytes_per_pixel(enum pixel_fmt fmt)
}
}
-/* Enable / disable direct write to memory by the Camera Sensor Interface */
+/* Enable direct write to memory by the Camera Sensor Interface */
static void ipu_ic_enable_task(struct ipu *ipu, enum ipu_channel channel)
{
uint32_t ic_conf, mask;
@@ -126,6 +129,7 @@ static void ipu_ic_enable_task(struct ipu *ipu, enum ipu_channel channel)
idmac_write_icreg(ipu, ic_conf, IC_CONF);
}
+/* Called under spin_lock_irqsave(&ipu_data.lock) */
static void ipu_ic_disable_task(struct ipu *ipu, enum ipu_channel channel)
{
uint32_t ic_conf, mask;
@@ -422,7 +426,7 @@ static void ipu_ch_param_set_size(union chan_param_mem *params,
break;
default:
dev_err(ipu_data.dev,
- "mxc ipu: unimplemented pixel format %d\n", pixel_fmt);
+ "mx3 ipu: unimplemented pixel format %d\n", pixel_fmt);
break;
}
@@ -433,20 +437,20 @@ static void ipu_ch_param_set_burst_size(union chan_param_mem *params,
uint16_t burst_pixels)
{
params->pp.npb = burst_pixels - 1;
-};
+}
static void ipu_ch_param_set_buffer(union chan_param_mem *params,
dma_addr_t buf0, dma_addr_t buf1)
{
params->pp.eba0 = buf0;
params->pp.eba1 = buf1;
-};
+}
static void ipu_ch_param_set_rotation(union chan_param_mem *params,
enum ipu_rotate_mode rotate)
{
params->pp.bam = rotate;
-};
+}
static void ipu_write_param_mem(uint32_t addr, uint32_t *data,
uint32_t num_words)
@@ -571,7 +575,7 @@ static uint32_t dma_param_addr(uint32_t dma_ch)
{
/* Channel Parameter Memory */
return 0x10000 | (dma_ch << 4);
-};
+}
static void ipu_channel_set_priority(struct ipu *ipu, enum ipu_channel channel,
bool prio)
@@ -611,7 +615,8 @@ static uint32_t ipu_channel_conf_mask(enum ipu_channel channel)
/**
* ipu_enable_channel() - enable an IPU channel.
- * @channel: channel ID.
+ * @idmac: IPU DMAC context.
+ * @ichan: IDMAC channel.
* @return: 0 on success or negative error code on failure.
*/
static int ipu_enable_channel(struct idmac *idmac, struct idmac_channel *ichan)
@@ -649,7 +654,7 @@ static int ipu_enable_channel(struct idmac *idmac, struct idmac_channel *ichan)
/**
* ipu_init_channel_buffer() - initialize a buffer for logical IPU channel.
- * @channel: channel ID.
+ * @ichan: IDMAC channel.
* @pixel_fmt: pixel format of buffer. Pixel format is a FOURCC ASCII code.
* @width: width of buffer in pixels.
* @height: height of buffer in pixels.
@@ -687,7 +692,7 @@ static int ipu_init_channel_buffer(struct idmac_channel *ichan,
}
/* IC channel's stride must be a multiple of 8 pixels */
- if ((channel <= 13) && (stride % 8)) {
+ if ((channel <= IDMAC_IC_13) && (stride % 8)) {
dev_err(ipu->dev, "Stride must be 8 pixel multiple\n");
return -EINVAL;
}
@@ -752,7 +757,7 @@ static void ipu_select_buffer(enum ipu_channel channel, int buffer_n)
/**
* ipu_update_channel_buffer() - update physical address of a channel buffer.
- * @channel: channel ID.
+ * @ichan: IDMAC channel.
* @buffer_n: buffer number to update.
* 0 or 1 are the only valid values.
* @phyaddr: buffer physical address.
@@ -760,9 +765,10 @@ static void ipu_select_buffer(enum ipu_channel channel, int buffer_n)
* function will fail if the buffer is set to ready.
*/
/* Called under spin_lock(_irqsave)(&ichan->lock) */
-static int ipu_update_channel_buffer(enum ipu_channel channel,
+static int ipu_update_channel_buffer(struct idmac_channel *ichan,
int buffer_n, dma_addr_t phyaddr)
{
+ enum ipu_channel channel = ichan->dma_chan.chan_id;
uint32_t reg;
unsigned long flags;
@@ -771,8 +777,8 @@ static int ipu_update_channel_buffer(enum ipu_channel channel,
if (buffer_n == 0) {
reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY);
if (reg & (1UL << channel)) {
- spin_unlock_irqrestore(&ipu_data.lock, flags);
- return -EACCES;
+ ipu_ic_disable_task(&ipu_data, channel);
+ ichan->status = IPU_CHANNEL_READY;
}
/* 44.3.3.1.9 - Row Number 1 (WORD1, offset 0) */
@@ -782,8 +788,8 @@ static int ipu_update_channel_buffer(enum ipu_channel channel,
} else {
reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY);
if (reg & (1UL << channel)) {
- spin_unlock_irqrestore(&ipu_data.lock, flags);
- return -EACCES;
+ ipu_ic_disable_task(&ipu_data, channel);
+ ichan->status = IPU_CHANNEL_READY;
}
/* Check if double-buffering is already enabled */
@@ -805,6 +811,39 @@ static int ipu_update_channel_buffer(enum ipu_channel channel,
}
/* Called under spin_lock_irqsave(&ichan->lock) */
+static int ipu_submit_buffer(struct idmac_channel *ichan,
+ struct idmac_tx_desc *desc, struct scatterlist *sg, int buf_idx)
+{
+ unsigned int chan_id = ichan->dma_chan.chan_id;
+ struct device *dev = &ichan->dma_chan.dev->device;
+ int ret;
+
+ if (async_tx_test_ack(&desc->txd))
+ return -EINTR;
+
+ /*
+ * On first invocation this shouldn't be necessary, the call to
+ * ipu_init_channel_buffer() above will set addresses for us, so we
+ * could make it conditional on status >= IPU_CHANNEL_ENABLED, but
+ * doing it again shouldn't hurt either.
+ */
+ ret = ipu_update_channel_buffer(ichan, buf_idx,
+ sg_dma_address(sg));
+
+ if (ret < 0) {
+ dev_err(dev, "Updating sg %p on channel 0x%x buffer %d failed!\n",
+ sg, chan_id, buf_idx);
+ return ret;
+ }
+
+ ipu_select_buffer(chan_id, buf_idx);
+ dev_dbg(dev, "Updated sg %p on channel 0x%x buffer %d\n",
+ sg, chan_id, buf_idx);
+
+ return 0;
+}
+
+/* Called under spin_lock_irqsave(&ichan->lock) */
static int ipu_submit_channel_buffers(struct idmac_channel *ichan,
struct idmac_tx_desc *desc)
{
@@ -815,20 +854,10 @@ static int ipu_submit_channel_buffers(struct idmac_channel *ichan,
if (!ichan->sg[i]) {
ichan->sg[i] = sg;
- /*
- * On first invocation this shouldn't be necessary, the
- * call to ipu_init_channel_buffer() above will set
- * addresses for us, so we could make it conditional
- * on status >= IPU_CHANNEL_ENABLED, but doing it again
- * shouldn't hurt either.
- */
- ret = ipu_update_channel_buffer(ichan->dma_chan.chan_id, i,
- sg_dma_address(sg));
+ ret = ipu_submit_buffer(ichan, desc, sg, i);
if (ret < 0)
return ret;
- ipu_select_buffer(ichan->dma_chan.chan_id, i);
-
sg = sg_next(sg);
}
}
@@ -842,19 +871,22 @@ static dma_cookie_t idmac_tx_submit(struct dma_async_tx_descriptor *tx)
struct idmac_channel *ichan = to_idmac_chan(tx->chan);
struct idmac *idmac = to_idmac(tx->chan->device);
struct ipu *ipu = to_ipu(idmac);
+ struct device *dev = &ichan->dma_chan.dev->device;
dma_cookie_t cookie;
unsigned long flags;
+ int ret;
/* Sanity check */
if (!list_empty(&desc->list)) {
/* The descriptor doesn't belong to client */
- dev_err(&ichan->dma_chan.dev->device,
- "Descriptor %p not prepared!\n", tx);
+ dev_err(dev, "Descriptor %p not prepared!\n", tx);
return -EBUSY;
}
mutex_lock(&ichan->chan_mutex);
+ async_tx_clear_ack(tx);
+
if (ichan->status < IPU_CHANNEL_READY) {
struct idmac_video_param *video = &ichan->params.video;
/*
@@ -878,16 +910,7 @@ static dma_cookie_t idmac_tx_submit(struct dma_async_tx_descriptor *tx)
goto out;
}
- /* ipu->lock can be taken under ichan->lock, but not v.v. */
- spin_lock_irqsave(&ichan->lock, flags);
-
- /* submit_buffers() atomically verifies and fills empty sg slots */
- cookie = ipu_submit_channel_buffers(ichan, desc);
-
- spin_unlock_irqrestore(&ichan->lock, flags);
-
- if (cookie < 0)
- goto out;
+ dev_dbg(dev, "Submitting sg %p\n", &desc->sg[0]);
cookie = ichan->dma_chan.cookie;
@@ -897,24 +920,40 @@ static dma_cookie_t idmac_tx_submit(struct dma_async_tx_descriptor *tx)
/* from dmaengine.h: "last cookie value returned to client" */
ichan->dma_chan.cookie = cookie;
tx->cookie = cookie;
+
+ /* ipu->lock can be taken under ichan->lock, but not v.v. */
spin_lock_irqsave(&ichan->lock, flags);
+
list_add_tail(&desc->list, &ichan->queue);
+ /* submit_buffers() atomically verifies and fills empty sg slots */
+ ret = ipu_submit_channel_buffers(ichan, desc);
+
spin_unlock_irqrestore(&ichan->lock, flags);
+ if (ret < 0) {
+ cookie = ret;
+ goto dequeue;
+ }
+
if (ichan->status < IPU_CHANNEL_ENABLED) {
- int ret = ipu_enable_channel(idmac, ichan);
+ ret = ipu_enable_channel(idmac, ichan);
if (ret < 0) {
cookie = ret;
- spin_lock_irqsave(&ichan->lock, flags);
- list_del_init(&desc->list);
- spin_unlock_irqrestore(&ichan->lock, flags);
- tx->cookie = cookie;
- ichan->dma_chan.cookie = cookie;
+ goto dequeue;
}
}
dump_idmac_reg(ipu);
+dequeue:
+ if (cookie < 0) {
+ spin_lock_irqsave(&ichan->lock, flags);
+ list_del_init(&desc->list);
+ spin_unlock_irqrestore(&ichan->lock, flags);
+ tx->cookie = cookie;
+ ichan->dma_chan.cookie = cookie;
+ }
+
out:
mutex_unlock(&ichan->chan_mutex);
@@ -944,8 +983,6 @@ static int idmac_desc_alloc(struct idmac_channel *ichan, int n)
memset(txd, 0, sizeof(*txd));
dma_async_tx_descriptor_init(txd, &ichan->dma_chan);
txd->tx_submit = idmac_tx_submit;
- txd->chan = &ichan->dma_chan;
- INIT_LIST_HEAD(&txd->tx_list);
list_add(&desc->list, &ichan->free_list);
@@ -1161,6 +1198,24 @@ static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
return 0;
}
+static struct scatterlist *idmac_sg_next(struct idmac_channel *ichan,
+ struct idmac_tx_desc **desc, struct scatterlist *sg)
+{
+ struct scatterlist *sgnew = sg ? sg_next(sg) : NULL;
+
+ if (sgnew)
+ /* next sg-element in this list */
+ return sgnew;
+
+ if ((*desc)->list.next == &ichan->queue)
+ /* No more descriptors on the queue */
+ return NULL;
+
+ /* Fetch next descriptor */
+ *desc = list_entry((*desc)->list.next, struct idmac_tx_desc, list);
+ return (*desc)->sg;
+}
+
/*
* We have several possibilities here:
* current BUF next BUF
@@ -1176,23 +1231,46 @@ static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
static irqreturn_t idmac_interrupt(int irq, void *dev_id)
{
struct idmac_channel *ichan = dev_id;
+ struct device *dev = &ichan->dma_chan.dev->device;
unsigned int chan_id = ichan->dma_chan.chan_id;
struct scatterlist **sg, *sgnext, *sgnew = NULL;
/* Next transfer descriptor */
- struct idmac_tx_desc *desc = NULL, *descnew;
+ struct idmac_tx_desc *desc, *descnew;
dma_async_tx_callback callback;
void *callback_param;
bool done = false;
- u32 ready0 = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY),
- ready1 = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY),
- curbuf = idmac_read_ipureg(&ipu_data, IPU_CHA_CUR_BUF);
+ u32 ready0, ready1, curbuf, err;
+ unsigned long flags;
/* IDMAC has cleared the respective BUFx_RDY bit, we manage the buffer */
- pr_debug("IDMAC irq %d\n", irq);
+ dev_dbg(dev, "IDMAC irq %d, buf %d\n", irq, ichan->active_buffer);
+
+ spin_lock_irqsave(&ipu_data.lock, flags);
+
+ ready0 = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY);
+ ready1 = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY);
+ curbuf = idmac_read_ipureg(&ipu_data, IPU_CHA_CUR_BUF);
+ err = idmac_read_ipureg(&ipu_data, IPU_INT_STAT_4);
+
+ if (err & (1 << chan_id)) {
+ idmac_write_ipureg(&ipu_data, 1 << chan_id, IPU_INT_STAT_4);
+ spin_unlock_irqrestore(&ipu_data.lock, flags);
+ /*
+ * Doing this
+ * ichan->sg[0] = ichan->sg[1] = NULL;
+ * you can force channel re-enable on the next tx_submit(), but
+ * this is dirty - think about descriptors with multiple
+ * sg elements.
+ */
+ dev_warn(dev, "NFB4EOF on channel %d, ready %x, %x, cur %x\n",
+ chan_id, ready0, ready1, curbuf);
+ return IRQ_HANDLED;
+ }
+ spin_unlock_irqrestore(&ipu_data.lock, flags);
+
/* Other interrupts do not interfere with this channel */
spin_lock(&ichan->lock);
-
if (unlikely(chan_id != IDMAC_SDC_0 && chan_id != IDMAC_SDC_1 &&
((curbuf >> chan_id) & 1) == ichan->active_buffer)) {
int i = 100;
@@ -1207,19 +1285,23 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id)
if (!i) {
spin_unlock(&ichan->lock);
- dev_dbg(ichan->dma_chan.device->dev,
+ dev_dbg(dev,
"IRQ on active buffer on channel %x, active "
"%d, ready %x, %x, current %x!\n", chan_id,
ichan->active_buffer, ready0, ready1, curbuf);
return IRQ_NONE;
- }
+ } else
+ dev_dbg(dev,
+ "Buffer deactivated on channel %x, active "
+ "%d, ready %x, %x, current %x, rest %d!\n", chan_id,
+ ichan->active_buffer, ready0, ready1, curbuf, i);
}
if (unlikely((ichan->active_buffer && (ready1 >> chan_id) & 1) ||
(!ichan->active_buffer && (ready0 >> chan_id) & 1)
)) {
spin_unlock(&ichan->lock);
- dev_dbg(ichan->dma_chan.device->dev,
+ dev_dbg(dev,
"IRQ with active buffer still ready on channel %x, "
"active %d, ready %x, %x!\n", chan_id,
ichan->active_buffer, ready0, ready1);
@@ -1227,8 +1309,9 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id)
}
if (unlikely(list_empty(&ichan->queue))) {
+ ichan->sg[ichan->active_buffer] = NULL;
spin_unlock(&ichan->lock);
- dev_err(ichan->dma_chan.device->dev,
+ dev_err(dev,
"IRQ without queued buffers on channel %x, active %d, "
"ready %x, %x!\n", chan_id,
ichan->active_buffer, ready0, ready1);
@@ -1243,40 +1326,44 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id)
sg = &ichan->sg[ichan->active_buffer];
sgnext = ichan->sg[!ichan->active_buffer];
+ if (!*sg) {
+ spin_unlock(&ichan->lock);
+ return IRQ_HANDLED;
+ }
+
+ desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list);
+ descnew = desc;
+
+ dev_dbg(dev, "IDMAC irq %d, dma 0x%08x, next dma 0x%08x, current %d, curbuf 0x%08x\n",
+ irq, sg_dma_address(*sg), sgnext ? sg_dma_address(sgnext) : 0, ichan->active_buffer, curbuf);
+
+ /* Find the descriptor of sgnext */
+ sgnew = idmac_sg_next(ichan, &descnew, *sg);
+ if (sgnext != sgnew)
+ dev_err(dev, "Submitted buffer %p, next buffer %p\n", sgnext, sgnew);
+
/*
* if sgnext == NULL sg must be the last element in a scatterlist and
* queue must be empty
*/
if (unlikely(!sgnext)) {
- if (unlikely(sg_next(*sg))) {
- dev_err(ichan->dma_chan.device->dev,
- "Broken buffer-update locking on channel %x!\n",
- chan_id);
- /* We'll let the user catch up */
+ if (!WARN_ON(sg_next(*sg)))
+ dev_dbg(dev, "Underrun on channel %x\n", chan_id);
+ ichan->sg[!ichan->active_buffer] = sgnew;
+
+ if (unlikely(sgnew)) {
+ ipu_submit_buffer(ichan, descnew, sgnew, !ichan->active_buffer);
} else {
- /* Underrun */
+ spin_lock_irqsave(&ipu_data.lock, flags);
ipu_ic_disable_task(&ipu_data, chan_id);
- dev_dbg(ichan->dma_chan.device->dev,
- "Underrun on channel %x\n", chan_id);
+ spin_unlock_irqrestore(&ipu_data.lock, flags);
ichan->status = IPU_CHANNEL_READY;
/* Continue to check for complete descriptor */
}
}
- desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list);
-
- /* First calculate and submit the next sg element */
- if (likely(sgnext))
- sgnew = sg_next(sgnext);
-
- if (unlikely(!sgnew)) {
- /* Start a new scatterlist, if any queued */
- if (likely(desc->list.next != &ichan->queue)) {
- descnew = list_entry(desc->list.next,
- struct idmac_tx_desc, list);
- sgnew = &descnew->sg[0];
- }
- }
+ /* Calculate and submit the next sg element */
+ sgnew = idmac_sg_next(ichan, &descnew, sgnew);
if (unlikely(!sg_next(*sg)) || !sgnext) {
/*
@@ -1289,17 +1376,13 @@ static irqreturn_t idmac_interrupt(int irq, void *dev_id)
*sg = sgnew;
- if (likely(sgnew)) {
- int ret;
-
- ret = ipu_update_channel_buffer(chan_id, ichan->active_buffer,
- sg_dma_address(*sg));
- if (ret < 0)
- dev_err(ichan->dma_chan.device->dev,
- "Failed to update buffer on channel %x buffer %d!\n",
- chan_id, ichan->active_buffer);
- else
- ipu_select_buffer(chan_id, ichan->active_buffer);
+ if (likely(sgnew) &&
+ ipu_submit_buffer(ichan, descnew, sgnew, ichan->active_buffer) < 0) {
+ callback = desc->txd.callback;
+ callback_param = desc->txd.callback_param;
+ spin_unlock(&ichan->lock);
+ callback(callback_param);
+ spin_lock(&ichan->lock);
}
/* Flip the active buffer - even if update above failed */
@@ -1327,13 +1410,20 @@ static void ipu_gc_tasklet(unsigned long arg)
struct idmac_channel *ichan = ipu->channel + i;
struct idmac_tx_desc *desc;
unsigned long flags;
- int j;
+ struct scatterlist *sg;
+ int j, k;
for (j = 0; j < ichan->n_tx_desc; j++) {
desc = ichan->desc + j;
spin_lock_irqsave(&ichan->lock, flags);
if (async_tx_test_ack(&desc->txd)) {
list_move(&desc->list, &ichan->free_list);
+ for_each_sg(desc->sg, sg, desc->sg_len, k) {
+ if (ichan->sg[0] == sg)
+ ichan->sg[0] = NULL;
+ else if (ichan->sg[1] == sg)
+ ichan->sg[1] = NULL;
+ }
async_tx_clear_ack(&desc->txd);
}
spin_unlock_irqrestore(&ichan->lock, flags);
@@ -1341,13 +1431,7 @@ static void ipu_gc_tasklet(unsigned long arg)
}
}
-/*
- * At the time .device_alloc_chan_resources() method is called, we cannot know,
- * whether the client will accept the channel. Thus we must only check, if we
- * can satisfy client's request but the only real criterion to verify, whether
- * the client has accepted our offer is the client_count. That's why we have to
- * perform the rest of our allocation tasks on the first call to this function.
- */
+/* Allocate and initialise a transfer descriptor. */
static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
struct scatterlist *sgl, unsigned int sg_len,
enum dma_data_direction direction, unsigned long tx_flags)
@@ -1358,8 +1442,8 @@ static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan
unsigned long flags;
/* We only can handle these three channels so far */
- if (ichan->dma_chan.chan_id != IDMAC_SDC_0 && ichan->dma_chan.chan_id != IDMAC_SDC_1 &&
- ichan->dma_chan.chan_id != IDMAC_IC_7)
+ if (chan->chan_id != IDMAC_SDC_0 && chan->chan_id != IDMAC_SDC_1 &&
+ chan->chan_id != IDMAC_IC_7)
return NULL;
if (direction != DMA_FROM_DEVICE && direction != DMA_TO_DEVICE) {
@@ -1400,7 +1484,7 @@ static void idmac_issue_pending(struct dma_chan *chan)
/* This is not always needed, but doesn't hurt either */
spin_lock_irqsave(&ipu->lock, flags);
- ipu_select_buffer(ichan->dma_chan.chan_id, ichan->active_buffer);
+ ipu_select_buffer(chan->chan_id, ichan->active_buffer);
spin_unlock_irqrestore(&ipu->lock, flags);
/*
@@ -1432,8 +1516,7 @@ static void __idmac_terminate_all(struct dma_chan *chan)
struct idmac_tx_desc *desc = ichan->desc + i;
if (list_empty(&desc->list))
/* Descriptor was prepared, but not submitted */
- list_add(&desc->list,
- &ichan->free_list);
+ list_add(&desc->list, &ichan->free_list);
async_tx_clear_ack(&desc->txd);
}
@@ -1458,6 +1541,28 @@ static void idmac_terminate_all(struct dma_chan *chan)
mutex_unlock(&ichan->chan_mutex);
}
+#ifdef DEBUG
+static irqreturn_t ic_sof_irq(int irq, void *dev_id)
+{
+ struct idmac_channel *ichan = dev_id;
+ printk(KERN_DEBUG "Got SOF IRQ %d on Channel %d\n",
+ irq, ichan->dma_chan.chan_id);
+ disable_irq(irq);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ic_eof_irq(int irq, void *dev_id)
+{
+ struct idmac_channel *ichan = dev_id;
+ printk(KERN_DEBUG "Got EOF IRQ %d on Channel %d\n",
+ irq, ichan->dma_chan.chan_id);
+ disable_irq(irq);
+ return IRQ_HANDLED;
+}
+
+static int ic_sof = -EINVAL, ic_eof = -EINVAL;
+#endif
+
static int idmac_alloc_chan_resources(struct dma_chan *chan)
{
struct idmac_channel *ichan = to_idmac_chan(chan);
@@ -1471,31 +1576,49 @@ static int idmac_alloc_chan_resources(struct dma_chan *chan)
chan->cookie = 1;
ichan->completed = -ENXIO;
- ret = ipu_irq_map(ichan->dma_chan.chan_id);
+ ret = ipu_irq_map(chan->chan_id);
if (ret < 0)
goto eimap;
ichan->eof_irq = ret;
+
+ /*
+ * Important to first disable the channel, because maybe someone
+ * used it before us, e.g., the bootloader
+ */
+ ipu_disable_channel(idmac, ichan, true);
+
+ ret = ipu_init_channel(idmac, ichan);
+ if (ret < 0)
+ goto eichan;
+
ret = request_irq(ichan->eof_irq, idmac_interrupt, 0,
ichan->eof_name, ichan);
if (ret < 0)
goto erirq;
- ret = ipu_init_channel(idmac, ichan);
- if (ret < 0)
- goto eichan;
+#ifdef DEBUG
+ if (chan->chan_id == IDMAC_IC_7) {
+ ic_sof = ipu_irq_map(69);
+ if (ic_sof > 0)
+ request_irq(ic_sof, ic_sof_irq, 0, "IC SOF", ichan);
+ ic_eof = ipu_irq_map(70);
+ if (ic_eof > 0)
+ request_irq(ic_eof, ic_eof_irq, 0, "IC EOF", ichan);
+ }
+#endif
ichan->status = IPU_CHANNEL_INITIALIZED;
- dev_dbg(&ichan->dma_chan.dev->device, "Found channel 0x%x, irq %d\n",
- ichan->dma_chan.chan_id, ichan->eof_irq);
+ dev_dbg(&chan->dev->device, "Found channel 0x%x, irq %d\n",
+ chan->chan_id, ichan->eof_irq);
return ret;
-eichan:
- free_irq(ichan->eof_irq, ichan);
erirq:
- ipu_irq_unmap(ichan->dma_chan.chan_id);
+ ipu_uninit_channel(idmac, ichan);
+eichan:
+ ipu_irq_unmap(chan->chan_id);
eimap:
return ret;
}
@@ -1510,8 +1633,22 @@ static void idmac_free_chan_resources(struct dma_chan *chan)
__idmac_terminate_all(chan);
if (ichan->status > IPU_CHANNEL_FREE) {
+#ifdef DEBUG
+ if (chan->chan_id == IDMAC_IC_7) {
+ if (ic_sof > 0) {
+ free_irq(ic_sof, ichan);
+ ipu_irq_unmap(69);
+ ic_sof = -EINVAL;
+ }
+ if (ic_eof > 0) {
+ free_irq(ic_eof, ichan);
+ ipu_irq_unmap(70);
+ ic_eof = -EINVAL;
+ }
+ }
+#endif
free_irq(ichan->eof_irq, ichan);
- ipu_irq_unmap(ichan->dma_chan.chan_id);
+ ipu_irq_unmap(chan->chan_id);
}
ichan->status = IPU_CHANNEL_FREE;
@@ -1573,7 +1710,7 @@ static int __init ipu_idmac_init(struct ipu *ipu)
dma_chan->device = &idmac->dma;
dma_chan->cookie = 1;
dma_chan->chan_id = i;
- list_add_tail(&ichan->dma_chan.device_node, &dma->channels);
+ list_add_tail(&dma_chan->device_node, &dma->channels);
}
idmac_write_icreg(ipu, 0x00000070, IDMAC_CONF);
@@ -1581,7 +1718,7 @@ static int __init ipu_idmac_init(struct ipu *ipu)
return dma_async_device_register(&idmac->dma);
}
-static void ipu_idmac_exit(struct ipu *ipu)
+static void __exit ipu_idmac_exit(struct ipu *ipu)
{
int i;
struct idmac *idmac = &ipu->idmac;
@@ -1600,7 +1737,7 @@ static void ipu_idmac_exit(struct ipu *ipu)
* IPU common probe / remove
*/
-static int ipu_probe(struct platform_device *pdev)
+static int __init ipu_probe(struct platform_device *pdev)
{
struct ipu_platform_data *pdata = pdev->dev.platform_data;
struct resource *mem_ipu, *mem_ic;
@@ -1700,7 +1837,7 @@ err_noirq:
return ret;
}
-static int ipu_remove(struct platform_device *pdev)
+static int __exit ipu_remove(struct platform_device *pdev)
{
struct ipu *ipu = platform_get_drvdata(pdev);
@@ -1725,7 +1862,7 @@ static struct platform_driver ipu_platform_driver = {
.name = "ipu-core",
.owner = THIS_MODULE,
},
- .remove = ipu_remove,
+ .remove = __exit_p(ipu_remove),
};
static int __init ipu_init(void)
diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c
index 83f532cc767..dd8ebc75b66 100644
--- a/drivers/dma/ipu/ipu_irq.c
+++ b/drivers/dma/ipu/ipu_irq.c
@@ -352,7 +352,7 @@ static struct irq_chip ipu_irq_chip = {
};
/* Install the IRQ handler */
-int ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev)
+int __init ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev)
{
struct ipu_platform_data *pdata = dev->dev.platform_data;
unsigned int irq, irq_base, i;
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index cb7f26fb9f1..ddab94f5122 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -632,7 +632,6 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
slot->async_tx.tx_submit = mv_xor_tx_submit;
INIT_LIST_HEAD(&slot->chain_node);
INIT_LIST_HEAD(&slot->slot_node);
- INIT_LIST_HEAD(&slot->async_tx.tx_list);
hw_desc = (char *) mv_chan->device->dma_desc_pool;
slot->async_tx.phys =
(dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2281b5098e9..36e0675be9f 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -121,6 +121,7 @@ config MD_RAID10
config MD_RAID456
tristate "RAID-4/RAID-5/RAID-6 mode"
depends on BLK_DEV_MD
+ select MD_RAID6_PQ
select ASYNC_MEMCPY
select ASYNC_XOR
---help---
@@ -151,34 +152,8 @@ config MD_RAID456
If unsure, say Y.
-config MD_RAID5_RESHAPE
- bool "Support adding drives to a raid-5 array"
- depends on MD_RAID456
- default y
- ---help---
- A RAID-5 set can be expanded by adding extra drives. This
- requires "restriping" the array which means (almost) every
- block must be written to a different place.
-
- This option allows such restriping to be done while the array
- is online.
-
- You will need mdadm version 2.4.1 or later to use this
- feature safely. During the early stage of reshape there is
- a critical section where live data is being over-written. A
- crash during this time needs extra care for recovery. The
- newer mdadm takes a copy of the data in the critical section
- and will restore it, if necessary, after a crash.
-
- The mdadm usage is e.g.
- mdadm --grow /dev/md1 --raid-disks=6
- to grow '/dev/md1' to having 6 disks.
-
- Note: The array can only be expanded, not contracted.
- There should be enough spares already present to make the new
- array workable.
-
- If unsure, say Y.
+config MD_RAID6_PQ
+ tristate
config MD_MULTIPATH
tristate "Multipath I/O support"
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 72880b7e28d..45cc5951d92 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -2,20 +2,21 @@
# Makefile for the kernel software RAID and LVM drivers.
#
-dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
+dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
-dm-multipath-objs := dm-path-selector.o dm-mpath.o
-dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \
+dm-multipath-y += dm-path-selector.o dm-mpath.o
+dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-snap-persistent.o
-dm-mirror-objs := dm-raid1.o
-md-mod-objs := md.o bitmap.o
-raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
+dm-mirror-y += dm-raid1.o
+md-mod-y += md.o bitmap.o
+raid456-y += raid5.o
+raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
raid6int1.o raid6int2.o raid6int4.o \
raid6int8.o raid6int16.o raid6int32.o \
raid6altivec1.o raid6altivec2.o raid6altivec4.o \
raid6altivec8.o \
raid6mmx.o raid6sse1.o raid6sse2.o
-hostprogs-y := mktables
+hostprogs-y += mktables
# Note: link order is important. All raid personalities
# and must come before md.o, as they each initialise
@@ -26,6 +27,7 @@ obj-$(CONFIG_MD_LINEAR) += linear.o
obj-$(CONFIG_MD_RAID0) += raid0.o
obj-$(CONFIG_MD_RAID1) += raid1.o
obj-$(CONFIG_MD_RAID10) += raid10.o
+obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o
obj-$(CONFIG_MD_RAID456) += raid456.o
obj-$(CONFIG_MD_MULTIPATH) += multipath.o
obj-$(CONFIG_MD_FAULTY) += faulty.o
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 71994376339..f8a9f7ab2cb 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -16,6 +16,7 @@
* wait if count gets too high, wake when it drops to half.
*/
+#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
@@ -26,8 +27,8 @@
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/buffer_head.h>
-#include <linux/raid/md.h>
-#include <linux/raid/bitmap.h>
+#include "md.h"
+#include "bitmap.h"
/* debug macros */
@@ -111,9 +112,10 @@ static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int creat
unsigned char *mappage;
if (page >= bitmap->pages) {
- printk(KERN_ALERT
- "%s: invalid bitmap page request: %lu (> %lu)\n",
- bmname(bitmap), page, bitmap->pages-1);
+ /* This can happen if bitmap_start_sync goes beyond
+ * End-of-device while looking for a whole page.
+ * It is harmless.
+ */
return -EINVAL;
}
@@ -265,7 +267,6 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
list_for_each_continue_rcu(pos, &mddev->disks) {
rdev = list_entry(pos, mdk_rdev_t, same_set);
if (rdev->raid_disk >= 0 &&
- test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags)) {
/* this is a usable devices */
atomic_inc(&rdev->nr_pending);
@@ -297,7 +298,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
+ size/512 > 0)
/* bitmap runs in to metadata */
goto bad_alignment;
- if (rdev->data_offset + mddev->size*2
+ if (rdev->data_offset + mddev->dev_sectors
> rdev->sb_start + bitmap->offset)
/* data runs in to bitmap */
goto bad_alignment;
@@ -570,7 +571,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
reason = "unrecognized superblock version";
- else if (chunksize < PAGE_SIZE)
+ else if (chunksize < 512)
reason = "bitmap chunksize too small";
else if ((1 << ffz(~chunksize)) != chunksize)
reason = "bitmap chunksize not a power of 2";
@@ -1306,6 +1307,9 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
}
+ if (bitmap->mddev->degraded)
+ /* Never clear bits or update events_cleared when degraded */
+ success = 0;
while (sectors) {
int blocks;
@@ -1345,8 +1349,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
}
}
-int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
- int degraded)
+static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
+ int degraded)
{
bitmap_counter_t *bmc;
int rv;
@@ -1374,6 +1378,29 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
return rv;
}
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
+ int degraded)
+{
+ /* bitmap_start_sync must always report on multiples of whole
+ * pages, otherwise resync (which is very PAGE_SIZE based) will
+ * get confused.
+ * So call __bitmap_start_sync repeatedly (if needed) until
+ * At least PAGE_SIZE>>9 blocks are covered.
+ * Return the 'or' of the result.
+ */
+ int rv = 0;
+ int blocks1;
+
+ *blocks = 0;
+ while (*blocks < (PAGE_SIZE>>9)) {
+ rv |= __bitmap_start_sync(bitmap, offset,
+ &blocks1, degraded);
+ offset += blocks1;
+ *blocks += blocks1;
+ }
+ return rv;
+}
+
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
{
bitmap_counter_t *bmc;
@@ -1443,6 +1470,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
wait_event(bitmap->mddev->recovery_wait,
atomic_read(&bitmap->mddev->recovery_active) == 0);
+ bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync;
+ set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
s = 0;
while (s < sector && s < bitmap->mddev->resync_max_sectors) {
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
new file mode 100644
index 00000000000..e98900671ca
--- /dev/null
+++ b/drivers/md/bitmap.h
@@ -0,0 +1,288 @@
+/*
+ * bitmap.h: Copyright (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
+ *
+ * additions: Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.
+ */
+#ifndef BITMAP_H
+#define BITMAP_H 1
+
+#define BITMAP_MAJOR_LO 3
+/* version 4 insists the bitmap is in little-endian order
+ * with version 3, it is host-endian which is non-portable
+ */
+#define BITMAP_MAJOR_HI 4
+#define BITMAP_MAJOR_HOSTENDIAN 3
+
+#define BITMAP_MINOR 39
+
+/*
+ * in-memory bitmap:
+ *
+ * Use 16 bit block counters to track pending writes to each "chunk".
+ * The 2 high order bits are special-purpose, the first is a flag indicating
+ * whether a resync is needed. The second is a flag indicating whether a
+ * resync is active.
+ * This means that the counter is actually 14 bits:
+ *
+ * +--------+--------+------------------------------------------------+
+ * | resync | resync | counter |
+ * | needed | active | |
+ * | (0-1) | (0-1) | (0-16383) |
+ * +--------+--------+------------------------------------------------+
+ *
+ * The "resync needed" bit is set when:
+ * a '1' bit is read from storage at startup.
+ * a write request fails on some drives
+ * a resync is aborted on a chunk with 'resync active' set
+ * It is cleared (and resync-active set) when a resync starts across all drives
+ * of the chunk.
+ *
+ *
+ * The "resync active" bit is set when:
+ * a resync is started on all drives, and resync_needed is set.
+ * resync_needed will be cleared (as long as resync_active wasn't already set).
+ * It is cleared when a resync completes.
+ *
+ * The counter counts pending write requests, plus the on-disk bit.
+ * When the counter is '1' and the resync bits are clear, the on-disk
+ * bit can be cleared aswell, thus setting the counter to 0.
+ * When we set a bit, or in the counter (to start a write), if the fields is
+ * 0, we first set the disk bit and set the counter to 1.
+ *
+ * If the counter is 0, the on-disk bit is clear and the stipe is clean
+ * Anything that dirties the stipe pushes the counter to 2 (at least)
+ * and sets the on-disk bit (lazily).
+ * If a periodic sweep find the counter at 2, it is decremented to 1.
+ * If the sweep find the counter at 1, the on-disk bit is cleared and the
+ * counter goes to zero.
+ *
+ * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
+ * counters as a fallback when "page" memory cannot be allocated:
+ *
+ * Normal case (page memory allocated):
+ *
+ * page pointer (32-bit)
+ *
+ * [ ] ------+
+ * |
+ * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters)
+ * c1 c2 c2048
+ *
+ * Hijacked case (page memory allocation failed):
+ *
+ * hijacked page pointer (32-bit)
+ *
+ * [ ][ ] (no page memory allocated)
+ * counter #1 (16-bit) counter #2 (16-bit)
+ *
+ */
+
+#ifdef __KERNEL__
+
+#define PAGE_BITS (PAGE_SIZE << 3)
+#define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
+
+typedef __u16 bitmap_counter_t;
+#define COUNTER_BITS 16
+#define COUNTER_BIT_SHIFT 4
+#define COUNTER_BYTE_RATIO (COUNTER_BITS / 8)
+#define COUNTER_BYTE_SHIFT (COUNTER_BIT_SHIFT - 3)
+
+#define NEEDED_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 1)))
+#define RESYNC_MASK ((bitmap_counter_t) (1 << (COUNTER_BITS - 2)))
+#define COUNTER_MAX ((bitmap_counter_t) RESYNC_MASK - 1)
+#define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK)
+#define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK)
+#define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX)
+
+/* how many counters per page? */
+#define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
+/* same, except a shift value for more efficient bitops */
+#define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
+/* same, except a mask value for more efficient bitops */
+#define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
+
+#define BITMAP_BLOCK_SIZE 512
+#define BITMAP_BLOCK_SHIFT 9
+
+/* how many blocks per chunk? (this is variable) */
+#define CHUNK_BLOCK_RATIO(bitmap) ((bitmap)->chunksize >> BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_SHIFT(bitmap) ((bitmap)->chunkshift - BITMAP_BLOCK_SHIFT)
+#define CHUNK_BLOCK_MASK(bitmap) (CHUNK_BLOCK_RATIO(bitmap) - 1)
+
+/* when hijacked, the counters and bits represent even larger "chunks" */
+/* there will be 1024 chunks represented by each counter in the page pointers */
+#define PAGEPTR_BLOCK_RATIO(bitmap) \
+ (CHUNK_BLOCK_RATIO(bitmap) << PAGE_COUNTER_SHIFT >> 1)
+#define PAGEPTR_BLOCK_SHIFT(bitmap) \
+ (CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
+#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
+
+/*
+ * on-disk bitmap:
+ *
+ * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
+ * file a page at a time. There's a superblock at the start of the file.
+ */
+
+/* map chunks (bits) to file pages - offset by the size of the superblock */
+#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
+
+#endif
+
+/*
+ * bitmap structures:
+ */
+
+#define BITMAP_MAGIC 0x6d746962
+
+/* use these for bitmap->flags and bitmap->sb->state bit-fields */
+enum bitmap_state {
+ BITMAP_STALE = 0x002, /* the bitmap file is out of date or had -EIO */
+ BITMAP_WRITE_ERROR = 0x004, /* A write error has occurred */
+ BITMAP_HOSTENDIAN = 0x8000,
+};
+
+/* the superblock at the front of the bitmap file -- little endian */
+typedef struct bitmap_super_s {
+ __le32 magic; /* 0 BITMAP_MAGIC */
+ __le32 version; /* 4 the bitmap major for now, could change... */
+ __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */
+ __le64 events; /* 24 event counter for the bitmap (1)*/
+ __le64 events_cleared;/*32 event counter when last bit cleared (2) */
+ __le64 sync_size; /* 40 the size of the md device's sync range(3) */
+ __le32 state; /* 48 bitmap state information */
+ __le32 chunksize; /* 52 the bitmap chunk size in bytes */
+ __le32 daemon_sleep; /* 56 seconds between disk flushes */
+ __le32 write_behind; /* 60 number of outstanding write-behind writes */
+
+ __u8 pad[256 - 64]; /* set to zero */
+} bitmap_super_t;
+
+/* notes:
+ * (1) This event counter is updated before the eventcounter in the md superblock
+ * When a bitmap is loaded, it is only accepted if this event counter is equal
+ * to, or one greater than, the event counter in the superblock.
+ * (2) This event counter is updated when the other one is *if*and*only*if* the
+ * array is not degraded. As bits are not cleared when the array is degraded,
+ * this represents the last time that any bits were cleared.
+ * If a device is being added that has an event count with this value or
+ * higher, it is accepted as conforming to the bitmap.
+ * (3)This is the number of sectors represented by the bitmap, and is the range that
+ * resync happens across. For raid1 and raid5/6 it is the size of individual
+ * devices. For raid10 it is the size of the array.
+ */
+
+#ifdef __KERNEL__
+
+/* the in-memory bitmap is represented by bitmap_pages */
+struct bitmap_page {
+ /*
+ * map points to the actual memory page
+ */
+ char *map;
+ /*
+ * in emergencies (when map cannot be alloced), hijack the map
+ * pointer and use it as two counters itself
+ */
+ unsigned int hijacked:1;
+ /*
+ * count of dirty bits on the page
+ */
+ unsigned int count:31;
+};
+
+/* keep track of bitmap file pages that have pending writes on them */
+struct page_list {
+ struct list_head list;
+ struct page *page;
+};
+
+/* the main bitmap structure - one per mddev */
+struct bitmap {
+ struct bitmap_page *bp;
+ unsigned long pages; /* total number of pages in the bitmap */
+ unsigned long missing_pages; /* number of pages not yet allocated */
+
+ mddev_t *mddev; /* the md device that the bitmap is for */
+
+ int counter_bits; /* how many bits per block counter */
+
+ /* bitmap chunksize -- how much data does each bit represent? */
+ unsigned long chunksize;
+ unsigned long chunkshift; /* chunksize = 2^chunkshift (for bitops) */
+ unsigned long chunks; /* total number of data chunks for the array */
+
+ /* We hold a count on the chunk currently being synced, and drop
+ * it when the last block is started. If the resync is aborted
+ * midway, we need to be able to drop that count, so we remember
+ * the counted chunk..
+ */
+ unsigned long syncchunk;
+
+ __u64 events_cleared;
+ int need_sync;
+
+ /* bitmap spinlock */
+ spinlock_t lock;
+
+ long offset; /* offset from superblock if file is NULL */
+ struct file *file; /* backing disk file */
+ struct page *sb_page; /* cached copy of the bitmap file superblock */
+ struct page **filemap; /* list of cache pages for the file */
+ unsigned long *filemap_attr; /* attributes associated w/ filemap pages */
+ unsigned long file_pages; /* number of pages in the file */
+ int last_page_size; /* bytes in the last page */
+
+ unsigned long flags;
+
+ int allclean;
+
+ unsigned long max_write_behind; /* write-behind mode */
+ atomic_t behind_writes;
+
+ /*
+ * the bitmap daemon - periodically wakes up and sweeps the bitmap
+ * file, cleaning up bits and flushing out pages to disk as necessary
+ */
+ unsigned long daemon_lastrun; /* jiffies of last run */
+ unsigned long daemon_sleep; /* how many seconds between updates? */
+ unsigned long last_end_sync; /* when we lasted called end_sync to
+ * update bitmap with resync progress */
+
+ atomic_t pending_writes; /* pending writes to the bitmap file */
+ wait_queue_head_t write_wait;
+ wait_queue_head_t overflow_wait;
+
+};
+
+/* the bitmap API */
+
+/* these are used only by md/bitmap */
+int bitmap_create(mddev_t *mddev);
+void bitmap_flush(mddev_t *mddev);
+void bitmap_destroy(mddev_t *mddev);
+
+void bitmap_print_sb(struct bitmap *bitmap);
+void bitmap_update_sb(struct bitmap *bitmap);
+
+int bitmap_setallbits(struct bitmap *bitmap);
+void bitmap_write_all(struct bitmap *bitmap);
+
+void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e);
+
+/* these are exported */
+int bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
+ unsigned long sectors, int behind);
+void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
+ unsigned long sectors, int success, int behind);
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int degraded);
+void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted);
+void bitmap_close_sync(struct bitmap *bitmap);
+void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
+
+void bitmap_unplug(struct bitmap *bitmap);
+void bitmap_daemon_work(struct bitmap *bitmap);
+#endif
+
+#endif
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h
index d4509be0fe6..345098b4ca7 100644
--- a/drivers/md/dm-bio-list.h
+++ b/drivers/md/dm-bio-list.h
@@ -52,6 +52,16 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
bl->tail = bio;
}
+static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
+{
+ bio->bi_next = bl->head;
+
+ bl->head = bio;
+
+ if (!bl->tail)
+ bl->tail = bio;
+}
+
static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
{
if (!bl2->head)
diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h
index d3ec217847d..3a8cfa2645c 100644
--- a/drivers/md/dm-bio-record.h
+++ b/drivers/md/dm-bio-record.h
@@ -16,30 +16,56 @@
* functions in this file help the target record and restore the
* original bio state.
*/
+
+struct dm_bio_vec_details {
+#if PAGE_SIZE < 65536
+ __u16 bv_len;
+ __u16 bv_offset;
+#else
+ unsigned bv_len;
+ unsigned bv_offset;
+#endif
+};
+
struct dm_bio_details {
sector_t bi_sector;
struct block_device *bi_bdev;
unsigned int bi_size;
unsigned short bi_idx;
unsigned long bi_flags;
+ struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES];
};
static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
{
+ unsigned i;
+
bd->bi_sector = bio->bi_sector;
bd->bi_bdev = bio->bi_bdev;
bd->bi_size = bio->bi_size;
bd->bi_idx = bio->bi_idx;
bd->bi_flags = bio->bi_flags;
+
+ for (i = 0; i < bio->bi_vcnt; i++) {
+ bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
+ bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
+ }
}
static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
{
+ unsigned i;
+
bio->bi_sector = bd->bi_sector;
bio->bi_bdev = bd->bi_bdev;
bio->bi_size = bd->bi_size;
bio->bi_idx = bd->bi_idx;
bio->bi_flags = bd->bi_flags;
+
+ for (i = 0; i < bio->bi_vcnt; i++) {
+ bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
+ bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
+ }
}
#endif
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index bfefd079a95..53394e863c7 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1156,8 +1156,7 @@ bad_ivmode:
crypto_free_ablkcipher(tfm);
bad_cipher:
/* Must zero key material before freeing */
- memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
- kfree(cc);
+ kzfree(cc);
return -EINVAL;
}
@@ -1183,8 +1182,7 @@ static void crypt_dtr(struct dm_target *ti)
dm_put_device(ti, cc->dev);
/* Must zero key material before freeing */
- memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
- kfree(cc);
+ kzfree(cc);
}
static int crypt_map(struct dm_target *ti, struct bio *bio,
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index dccbfb0e010..a2e26c24214 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -7,6 +7,7 @@
#include "dm-exception-store.h"
+#include <linux/ctype.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
@@ -14,6 +15,257 @@
#define DM_MSG_PREFIX "snapshot exception stores"
+static LIST_HEAD(_exception_store_types);
+static DEFINE_SPINLOCK(_lock);
+
+static struct dm_exception_store_type *__find_exception_store_type(const char *name)
+{
+ struct dm_exception_store_type *type;
+
+ list_for_each_entry(type, &_exception_store_types, list)
+ if (!strcmp(name, type->name))
+ return type;
+
+ return NULL;
+}
+
+static struct dm_exception_store_type *_get_exception_store_type(const char *name)
+{
+ struct dm_exception_store_type *type;
+
+ spin_lock(&_lock);
+
+ type = __find_exception_store_type(name);
+
+ if (type && !try_module_get(type->module))
+ type = NULL;
+
+ spin_unlock(&_lock);
+
+ return type;
+}
+
+/*
+ * get_type
+ * @type_name
+ *
+ * Attempt to retrieve the dm_exception_store_type by name. If not already
+ * available, attempt to load the appropriate module.
+ *
+ * Exstore modules are named "dm-exstore-" followed by the 'type_name'.
+ * Modules may contain multiple types.
+ * This function will first try the module "dm-exstore-<type_name>",
+ * then truncate 'type_name' on the last '-' and try again.
+ *
+ * For example, if type_name was "clustered-shared", it would search
+ * 'dm-exstore-clustered-shared' then 'dm-exstore-clustered'.
+ *
+ * 'dm-exception-store-<type_name>' is too long of a name in my
+ * opinion, which is why I've chosen to have the files
+ * containing exception store implementations be 'dm-exstore-<type_name>'.
+ * If you want your module to be autoloaded, you will follow this
+ * naming convention.
+ *
+ * Returns: dm_exception_store_type* on success, NULL on failure
+ */
+static struct dm_exception_store_type *get_type(const char *type_name)
+{
+ char *p, *type_name_dup;
+ struct dm_exception_store_type *type;
+
+ type = _get_exception_store_type(type_name);
+ if (type)
+ return type;
+
+ type_name_dup = kstrdup(type_name, GFP_KERNEL);
+ if (!type_name_dup) {
+ DMERR("No memory left to attempt load for \"%s\"", type_name);
+ return NULL;
+ }
+
+ while (request_module("dm-exstore-%s", type_name_dup) ||
+ !(type = _get_exception_store_type(type_name))) {
+ p = strrchr(type_name_dup, '-');
+ if (!p)
+ break;
+ p[0] = '\0';
+ }
+
+ if (!type)
+ DMWARN("Module for exstore type \"%s\" not found.", type_name);
+
+ kfree(type_name_dup);
+
+ return type;
+}
+
+static void put_type(struct dm_exception_store_type *type)
+{
+ spin_lock(&_lock);
+ module_put(type->module);
+ spin_unlock(&_lock);
+}
+
+int dm_exception_store_type_register(struct dm_exception_store_type *type)
+{
+ int r = 0;
+
+ spin_lock(&_lock);
+ if (!__find_exception_store_type(type->name))
+ list_add(&type->list, &_exception_store_types);
+ else
+ r = -EEXIST;
+ spin_unlock(&_lock);
+
+ return r;
+}
+EXPORT_SYMBOL(dm_exception_store_type_register);
+
+int dm_exception_store_type_unregister(struct dm_exception_store_type *type)
+{
+ spin_lock(&_lock);
+
+ if (!__find_exception_store_type(type->name)) {
+ spin_unlock(&_lock);
+ return -EINVAL;
+ }
+
+ list_del(&type->list);
+
+ spin_unlock(&_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(dm_exception_store_type_unregister);
+
+/*
+ * Round a number up to the nearest 'size' boundary. size must
+ * be a power of 2.
+ */
+static ulong round_up(ulong n, ulong size)
+{
+ size--;
+ return (n + size) & ~size;
+}
+
+static int set_chunk_size(struct dm_exception_store *store,
+ const char *chunk_size_arg, char **error)
+{
+ unsigned long chunk_size_ulong;
+ char *value;
+
+ chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
+ if (*chunk_size_arg == '\0' || *value != '\0') {
+ *error = "Invalid chunk size";
+ return -EINVAL;
+ }
+
+ if (!chunk_size_ulong) {
+ store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
+ return 0;
+ }
+
+ /*
+ * Chunk size must be multiple of page size. Silently
+ * round up if it's not.
+ */
+ chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
+
+ /* Check chunk_size is a power of 2 */
+ if (!is_power_of_2(chunk_size_ulong)) {
+ *error = "Chunk size is not a power of 2";
+ return -EINVAL;
+ }
+
+ /* Validate the chunk size against the device block size */
+ if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
+ *error = "Chunk size is not a multiple of device blocksize";
+ return -EINVAL;
+ }
+
+ store->chunk_size = chunk_size_ulong;
+ store->chunk_mask = chunk_size_ulong - 1;
+ store->chunk_shift = ffs(chunk_size_ulong) - 1;
+
+ return 0;
+}
+
+int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
+ unsigned *args_used,
+ struct dm_exception_store **store)
+{
+ int r = 0;
+ struct dm_exception_store_type *type;
+ struct dm_exception_store *tmp_store;
+ char persistent;
+
+ if (argc < 3) {
+ ti->error = "Insufficient exception store arguments";
+ return -EINVAL;
+ }
+
+ tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL);
+ if (!tmp_store) {
+ ti->error = "Exception store allocation failed";
+ return -ENOMEM;
+ }
+
+ persistent = toupper(*argv[1]);
+ if (persistent != 'P' && persistent != 'N') {
+ ti->error = "Persistent flag is not P or N";
+ return -EINVAL;
+ }
+
+ type = get_type(argv[1]);
+ if (!type) {
+ ti->error = "Exception store type not recognised";
+ r = -EINVAL;
+ goto bad_type;
+ }
+
+ tmp_store->type = type;
+ tmp_store->ti = ti;
+
+ r = dm_get_device(ti, argv[0], 0, 0,
+ FMODE_READ | FMODE_WRITE, &tmp_store->cow);
+ if (r) {
+ ti->error = "Cannot get COW device";
+ goto bad_cow;
+ }
+
+ r = set_chunk_size(tmp_store, argv[2], &ti->error);
+ if (r)
+ goto bad_cow;
+
+ r = type->ctr(tmp_store, 0, NULL);
+ if (r) {
+ ti->error = "Exception store type constructor failed";
+ goto bad_ctr;
+ }
+
+ *args_used = 3;
+ *store = tmp_store;
+ return 0;
+
+bad_ctr:
+ dm_put_device(ti, tmp_store->cow);
+bad_cow:
+ put_type(type);
+bad_type:
+ kfree(tmp_store);
+ return r;
+}
+EXPORT_SYMBOL(dm_exception_store_create);
+
+void dm_exception_store_destroy(struct dm_exception_store *store)
+{
+ store->type->dtr(store);
+ dm_put_device(store->ti, store->cow);
+ put_type(store->type);
+ kfree(store);
+}
+EXPORT_SYMBOL(dm_exception_store_destroy);
+
int dm_exception_store_init(void)
{
int r;
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index bb9f33d5daa..0a2e6e7f67b 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -37,11 +37,18 @@ struct dm_snap_exception {
* Abstraction to handle the meta/layout of exception stores (the
* COW device).
*/
-struct dm_exception_store {
+struct dm_exception_store;
+struct dm_exception_store_type {
+ const char *name;
+ struct module *module;
+
+ int (*ctr) (struct dm_exception_store *store,
+ unsigned argc, char **argv);
+
/*
* Destroys this object when you've finished with it.
*/
- void (*destroy) (struct dm_exception_store *store);
+ void (*dtr) (struct dm_exception_store *store);
/*
* The target shouldn't read the COW device until this is
@@ -72,8 +79,9 @@ struct dm_exception_store {
*/
void (*drop_snapshot) (struct dm_exception_store *store);
- int (*status) (struct dm_exception_store *store, status_type_t status,
- char *result, unsigned int maxlen);
+ unsigned (*status) (struct dm_exception_store *store,
+ status_type_t status, char *result,
+ unsigned maxlen);
/*
* Return how full the snapshot is.
@@ -82,7 +90,21 @@ struct dm_exception_store {
sector_t *numerator,
sector_t *denominator);
- struct dm_snapshot *snap;
+ /* For internal device-mapper use only. */
+ struct list_head list;
+};
+
+struct dm_exception_store {
+ struct dm_exception_store_type *type;
+ struct dm_target *ti;
+
+ struct dm_dev *cow;
+
+ /* Size of data blocks saved - must be a power of 2 */
+ chunk_t chunk_size;
+ chunk_t chunk_mask;
+ chunk_t chunk_shift;
+
void *context;
};
@@ -129,6 +151,28 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
# endif
+/*
+ * Return the number of sectors in the device.
+ */
+static inline sector_t get_dev_size(struct block_device *bdev)
+{
+ return bdev->bd_inode->i_size >> SECTOR_SHIFT;
+}
+
+static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
+ sector_t sector)
+{
+ return (sector & ~store->chunk_mask) >> store->chunk_shift;
+}
+
+int dm_exception_store_type_register(struct dm_exception_store_type *type);
+int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
+
+int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
+ unsigned *args_used,
+ struct dm_exception_store **store);
+void dm_exception_store_destroy(struct dm_exception_store *store);
+
int dm_exception_store_init(void);
void dm_exception_store_exit(void);
@@ -141,8 +185,4 @@ void dm_persistent_snapshot_exit(void);
int dm_transient_snapshot_init(void);
void dm_transient_snapshot_exit(void);
-int dm_create_persistent(struct dm_exception_store *store);
-
-int dm_create_transient(struct dm_exception_store *store);
-
#endif /* _LINUX_DM_EXCEPTION_STORE */
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 36e2b5e46a6..e73aabd61cd 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -370,16 +370,13 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!atomic_read(&io.count) || signal_pending(current))
+ if (!atomic_read(&io.count))
break;
io_schedule();
}
set_current_state(TASK_RUNNING);
- if (atomic_read(&io.count))
- return -EINTR;
-
if (error_bits)
*error_bits = io.error_bits;
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 737961f275c..be233bc4d91 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -16,40 +16,29 @@
#define DM_MSG_PREFIX "dirty region log"
-struct dm_dirty_log_internal {
- struct dm_dirty_log_type *type;
-
- struct list_head list;
- long use;
-};
-
static LIST_HEAD(_log_types);
static DEFINE_SPINLOCK(_lock);
-static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name)
+static struct dm_dirty_log_type *__find_dirty_log_type(const char *name)
{
- struct dm_dirty_log_internal *log_type;
+ struct dm_dirty_log_type *log_type;
list_for_each_entry(log_type, &_log_types, list)
- if (!strcmp(name, log_type->type->name))
+ if (!strcmp(name, log_type->name))
return log_type;
return NULL;
}
-static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
+static struct dm_dirty_log_type *_get_dirty_log_type(const char *name)
{
- struct dm_dirty_log_internal *log_type;
+ struct dm_dirty_log_type *log_type;
spin_lock(&_lock);
log_type = __find_dirty_log_type(name);
- if (log_type) {
- if (!log_type->use && !try_module_get(log_type->type->module))
- log_type = NULL;
- else
- log_type->use++;
- }
+ if (log_type && !try_module_get(log_type->module))
+ log_type = NULL;
spin_unlock(&_lock);
@@ -76,14 +65,14 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
static struct dm_dirty_log_type *get_type(const char *type_name)
{
char *p, *type_name_dup;
- struct dm_dirty_log_internal *log_type;
+ struct dm_dirty_log_type *log_type;
if (!type_name)
return NULL;
log_type = _get_dirty_log_type(type_name);
if (log_type)
- return log_type->type;
+ return log_type;
type_name_dup = kstrdup(type_name, GFP_KERNEL);
if (!type_name_dup) {
@@ -105,56 +94,33 @@ static struct dm_dirty_log_type *get_type(const char *type_name)
kfree(type_name_dup);
- return log_type ? log_type->type : NULL;
+ return log_type;
}
static void put_type(struct dm_dirty_log_type *type)
{
- struct dm_dirty_log_internal *log_type;
-
if (!type)
return;
spin_lock(&_lock);
- log_type = __find_dirty_log_type(type->name);
- if (!log_type)
+ if (!__find_dirty_log_type(type->name))
goto out;
- if (!--log_type->use)
- module_put(type->module);
-
- BUG_ON(log_type->use < 0);
+ module_put(type->module);
out:
spin_unlock(&_lock);
}
-static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type)
-{
- struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type),
- GFP_KERNEL);
-
- if (log_type)
- log_type->type = type;
-
- return log_type;
-}
-
int dm_dirty_log_type_register(struct dm_dirty_log_type *type)
{
- struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type);
int r = 0;
- if (!log_type)
- return -ENOMEM;
-
spin_lock(&_lock);
if (!__find_dirty_log_type(type->name))
- list_add(&log_type->list, &_log_types);
- else {
- kfree(log_type);
+ list_add(&type->list, &_log_types);
+ else
r = -EEXIST;
- }
spin_unlock(&_lock);
return r;
@@ -163,25 +129,16 @@ EXPORT_SYMBOL(dm_dirty_log_type_register);
int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
{
- struct dm_dirty_log_internal *log_type;
-
spin_lock(&_lock);
- log_type = __find_dirty_log_type(type->name);
- if (!log_type) {
+ if (!__find_dirty_log_type(type->name)) {
spin_unlock(&_lock);
return -EINVAL;
}
- if (log_type->use) {
- spin_unlock(&_lock);
- return -ETXTBSY;
- }
-
- list_del(&log_type->list);
+ list_del(&type->list);
spin_unlock(&_lock);
- kfree(log_type);
return 0;
}
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c
index 96ea226155b..42c04f04a0c 100644
--- a/drivers/md/dm-path-selector.c
+++ b/drivers/md/dm-path-selector.c
@@ -17,9 +17,7 @@
struct ps_internal {
struct path_selector_type pst;
-
struct list_head list;
- long use;
};
#define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst)
@@ -45,12 +43,8 @@ static struct ps_internal *get_path_selector(const char *name)
down_read(&_ps_lock);
psi = __find_path_selector_type(name);
- if (psi) {
- if ((psi->use == 0) && !try_module_get(psi->pst.module))
- psi = NULL;
- else
- psi->use++;
- }
+ if (psi && !try_module_get(psi->pst.module))
+ psi = NULL;
up_read(&_ps_lock);
return psi;
@@ -84,11 +78,7 @@ void dm_put_path_selector(struct path_selector_type *pst)
if (!psi)
goto out;
- if (--psi->use == 0)
- module_put(psi->pst.module);
-
- BUG_ON(psi->use < 0);
-
+ module_put(psi->pst.module);
out:
up_read(&_ps_lock);
}
@@ -136,11 +126,6 @@ int dm_unregister_path_selector(struct path_selector_type *pst)
return -EINVAL;
}
- if (psi->use) {
- up_write(&_ps_lock);
- return -ETXTBSY;
- }
-
list_del(&psi->list);
up_write(&_ps_lock);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4d6bc101962..536ef0bef15 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -145,6 +145,8 @@ struct dm_raid1_read_record {
struct dm_bio_details details;
};
+static struct kmem_cache *_dm_raid1_read_record_cache;
+
/*
* Every mirror should look like this one.
*/
@@ -586,6 +588,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
int state;
struct bio *bio;
struct bio_list sync, nosync, recover, *this_list = NULL;
+ struct bio_list requeue;
+ struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
+ region_t region;
if (!writes->head)
return;
@@ -596,10 +601,18 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
bio_list_init(&sync);
bio_list_init(&nosync);
bio_list_init(&recover);
+ bio_list_init(&requeue);
while ((bio = bio_list_pop(writes))) {
- state = dm_rh_get_state(ms->rh,
- dm_rh_bio_to_region(ms->rh, bio), 1);
+ region = dm_rh_bio_to_region(ms->rh, bio);
+
+ if (log->type->is_remote_recovering &&
+ log->type->is_remote_recovering(log, region)) {
+ bio_list_add(&requeue, bio);
+ continue;
+ }
+
+ state = dm_rh_get_state(ms->rh, region, 1);
switch (state) {
case DM_RH_CLEAN:
case DM_RH_DIRTY:
@@ -619,6 +632,16 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
}
/*
+ * Add bios that are delayed due to remote recovery
+ * back on to the write queue
+ */
+ if (unlikely(requeue.head)) {
+ spin_lock_irq(&ms->lock);
+ bio_list_merge(&ms->writes, &requeue);
+ spin_unlock_irq(&ms->lock);
+ }
+
+ /*
* Increment the pending counts for any regions that will
* be written to (writes to recover regions are going to
* be delayed).
@@ -764,9 +787,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
atomic_set(&ms->suspend, 0);
atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
- len = sizeof(struct dm_raid1_read_record);
- ms->read_record_pool = mempool_create_kmalloc_pool(MIN_READ_RECORDS,
- len);
+ ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
+ _dm_raid1_read_record_cache);
+
if (!ms->read_record_pool) {
ti->error = "Error creating mirror read_record_pool";
kfree(ms);
@@ -1279,16 +1302,31 @@ static int __init dm_mirror_init(void)
{
int r;
+ _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
+ if (!_dm_raid1_read_record_cache) {
+ DMERR("Can't allocate dm_raid1_read_record cache");
+ r = -ENOMEM;
+ goto bad_cache;
+ }
+
r = dm_register_target(&mirror_target);
- if (r < 0)
+ if (r < 0) {
DMERR("Failed to register mirror target");
+ goto bad_target;
+ }
+
+ return 0;
+bad_target:
+ kmem_cache_destroy(_dm_raid1_read_record_cache);
+bad_cache:
return r;
}
static void __exit dm_mirror_exit(void)
{
dm_unregister_target(&mirror_target);
+ kmem_cache_destroy(_dm_raid1_read_record_cache);
}
/* Module hooks */
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 936b34e0959..e75c6dd76a9 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -6,7 +6,6 @@
*/
#include "dm-exception-store.h"
-#include "dm-snap.h"
#include <linux/mm.h>
#include <linux/pagemap.h>
@@ -89,7 +88,7 @@ struct commit_callback {
* The top level structure for a persistent exception store.
*/
struct pstore {
- struct dm_snapshot *snap; /* up pointer to my snapshot */
+ struct dm_exception_store *store;
int version;
int valid;
uint32_t exceptions_per_area;
@@ -141,7 +140,7 @@ static int alloc_area(struct pstore *ps)
int r = -ENOMEM;
size_t len;
- len = ps->snap->chunk_size << SECTOR_SHIFT;
+ len = ps->store->chunk_size << SECTOR_SHIFT;
/*
* Allocate the chunk_size block of memory that will hold
@@ -163,9 +162,12 @@ static int alloc_area(struct pstore *ps)
static void free_area(struct pstore *ps)
{
- vfree(ps->area);
+ if (ps->area)
+ vfree(ps->area);
ps->area = NULL;
- vfree(ps->zero_area);
+
+ if (ps->zero_area)
+ vfree(ps->zero_area);
ps->zero_area = NULL;
}
@@ -189,9 +191,9 @@ static void do_metadata(struct work_struct *work)
static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
{
struct dm_io_region where = {
- .bdev = ps->snap->cow->bdev,
- .sector = ps->snap->chunk_size * chunk,
- .count = ps->snap->chunk_size,
+ .bdev = ps->store->cow->bdev,
+ .sector = ps->store->chunk_size * chunk,
+ .count = ps->store->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = rw,
@@ -247,15 +249,15 @@ static int area_io(struct pstore *ps, int rw)
static void zero_memory_area(struct pstore *ps)
{
- memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
+ memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
}
static int zero_disk_area(struct pstore *ps, chunk_t area)
{
struct dm_io_region where = {
- .bdev = ps->snap->cow->bdev,
- .sector = ps->snap->chunk_size * area_location(ps, area),
- .count = ps->snap->chunk_size,
+ .bdev = ps->store->cow->bdev,
+ .sector = ps->store->chunk_size * area_location(ps, area),
+ .count = ps->store->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = WRITE,
@@ -278,15 +280,15 @@ static int read_header(struct pstore *ps, int *new_snapshot)
/*
* Use default chunk size (or hardsect_size, if larger) if none supplied
*/
- if (!ps->snap->chunk_size) {
- ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
- bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
- ps->snap->chunk_mask = ps->snap->chunk_size - 1;
- ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
+ if (!ps->store->chunk_size) {
+ ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
+ bdev_hardsect_size(ps->store->cow->bdev) >> 9);
+ ps->store->chunk_mask = ps->store->chunk_size - 1;
+ ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
chunk_size_supplied = 0;
}
- ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
+ ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
chunk_size));
if (IS_ERR(ps->io_client))
return PTR_ERR(ps->io_client);
@@ -317,22 +319,22 @@ static int read_header(struct pstore *ps, int *new_snapshot)
ps->version = le32_to_cpu(dh->version);
chunk_size = le32_to_cpu(dh->chunk_size);
- if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
+ if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
return 0;
DMWARN("chunk size %llu in device metadata overrides "
"table chunk size of %llu.",
(unsigned long long)chunk_size,
- (unsigned long long)ps->snap->chunk_size);
+ (unsigned long long)ps->store->chunk_size);
/* We had a bogus chunk_size. Fix stuff up. */
free_area(ps);
- ps->snap->chunk_size = chunk_size;
- ps->snap->chunk_mask = chunk_size - 1;
- ps->snap->chunk_shift = ffs(chunk_size) - 1;
+ ps->store->chunk_size = chunk_size;
+ ps->store->chunk_mask = chunk_size - 1;
+ ps->store->chunk_shift = ffs(chunk_size) - 1;
- r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
+ r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
ps->io_client);
if (r)
return r;
@@ -349,13 +351,13 @@ static int write_header(struct pstore *ps)
{
struct disk_header *dh;
- memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
+ memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
dh = (struct disk_header *) ps->area;
dh->magic = cpu_to_le32(SNAP_MAGIC);
dh->valid = cpu_to_le32(ps->valid);
dh->version = cpu_to_le32(ps->version);
- dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
+ dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
return chunk_io(ps, 0, WRITE, 1);
}
@@ -474,18 +476,25 @@ static struct pstore *get_info(struct dm_exception_store *store)
static void persistent_fraction_full(struct dm_exception_store *store,
sector_t *numerator, sector_t *denominator)
{
- *numerator = get_info(store)->next_free * store->snap->chunk_size;
- *denominator = get_dev_size(store->snap->cow->bdev);
+ *numerator = get_info(store)->next_free * store->chunk_size;
+ *denominator = get_dev_size(store->cow->bdev);
}
-static void persistent_destroy(struct dm_exception_store *store)
+static void persistent_dtr(struct dm_exception_store *store)
{
struct pstore *ps = get_info(store);
destroy_workqueue(ps->metadata_wq);
- dm_io_client_destroy(ps->io_client);
- vfree(ps->callbacks);
+
+ /* Created in read_header */
+ if (ps->io_client)
+ dm_io_client_destroy(ps->io_client);
free_area(ps);
+
+ /* Allocated in persistent_read_metadata */
+ if (ps->callbacks)
+ vfree(ps->callbacks);
+
kfree(ps);
}
@@ -507,7 +516,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
/*
* Now we know correct chunk_size, complete the initialisation.
*/
- ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
+ ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
sizeof(struct disk_exception);
ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
sizeof(*ps->callbacks));
@@ -564,10 +573,10 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
struct pstore *ps = get_info(store);
uint32_t stride;
chunk_t next_free;
- sector_t size = get_dev_size(store->snap->cow->bdev);
+ sector_t size = get_dev_size(store->cow->bdev);
/* Is there enough room ? */
- if (size < ((ps->next_free + 1) * store->snap->chunk_size))
+ if (size < ((ps->next_free + 1) * store->chunk_size))
return -ENOSPC;
e->new_chunk = ps->next_free;
@@ -656,16 +665,17 @@ static void persistent_drop_snapshot(struct dm_exception_store *store)
DMWARN("write header failed");
}
-int dm_create_persistent(struct dm_exception_store *store)
+static int persistent_ctr(struct dm_exception_store *store,
+ unsigned argc, char **argv)
{
struct pstore *ps;
/* allocate the pstore */
- ps = kmalloc(sizeof(*ps), GFP_KERNEL);
+ ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
return -ENOMEM;
- ps->snap = store->snap;
+ ps->store = store;
ps->valid = 1;
ps->version = SNAPSHOT_DISK_VERSION;
ps->area = NULL;
@@ -683,22 +693,77 @@ int dm_create_persistent(struct dm_exception_store *store)
return -ENOMEM;
}
- store->destroy = persistent_destroy;
- store->read_metadata = persistent_read_metadata;
- store->prepare_exception = persistent_prepare_exception;
- store->commit_exception = persistent_commit_exception;
- store->drop_snapshot = persistent_drop_snapshot;
- store->fraction_full = persistent_fraction_full;
store->context = ps;
return 0;
}
+static unsigned persistent_status(struct dm_exception_store *store,
+ status_type_t status, char *result,
+ unsigned maxlen)
+{
+ unsigned sz = 0;
+
+ switch (status) {
+ case STATUSTYPE_INFO:
+ break;
+ case STATUSTYPE_TABLE:
+ DMEMIT(" %s P %llu", store->cow->name,
+ (unsigned long long)store->chunk_size);
+ }
+
+ return sz;
+}
+
+static struct dm_exception_store_type _persistent_type = {
+ .name = "persistent",
+ .module = THIS_MODULE,
+ .ctr = persistent_ctr,
+ .dtr = persistent_dtr,
+ .read_metadata = persistent_read_metadata,
+ .prepare_exception = persistent_prepare_exception,
+ .commit_exception = persistent_commit_exception,
+ .drop_snapshot = persistent_drop_snapshot,
+ .fraction_full = persistent_fraction_full,
+ .status = persistent_status,
+};
+
+static struct dm_exception_store_type _persistent_compat_type = {
+ .name = "P",
+ .module = THIS_MODULE,
+ .ctr = persistent_ctr,
+ .dtr = persistent_dtr,
+ .read_metadata = persistent_read_metadata,
+ .prepare_exception = persistent_prepare_exception,
+ .commit_exception = persistent_commit_exception,
+ .drop_snapshot = persistent_drop_snapshot,
+ .fraction_full = persistent_fraction_full,
+ .status = persistent_status,
+};
+
int dm_persistent_snapshot_init(void)
{
- return 0;
+ int r;
+
+ r = dm_exception_store_type_register(&_persistent_type);
+ if (r) {
+ DMERR("Unable to register persistent exception store type");
+ return r;
+ }
+
+ r = dm_exception_store_type_register(&_persistent_compat_type);
+ if (r) {
+ DMERR("Unable to register old-style persistent exception "
+ "store type");
+ dm_exception_store_type_unregister(&_persistent_type);
+ return r;
+ }
+
+ return r;
}
void dm_persistent_snapshot_exit(void)
{
+ dm_exception_store_type_unregister(&_persistent_type);
+ dm_exception_store_type_unregister(&_persistent_compat_type);
}
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c
index 7f6e2e6dcb0..cde5aa558e6 100644
--- a/drivers/md/dm-snap-transient.c
+++ b/drivers/md/dm-snap-transient.c
@@ -6,7 +6,6 @@
*/
#include "dm-exception-store.h"
-#include "dm-snap.h"
#include <linux/mm.h>
#include <linux/pagemap.h>
@@ -23,7 +22,7 @@ struct transient_c {
sector_t next_free;
};
-static void transient_destroy(struct dm_exception_store *store)
+static void transient_dtr(struct dm_exception_store *store)
{
kfree(store->context);
}
@@ -39,14 +38,14 @@ static int transient_read_metadata(struct dm_exception_store *store,
static int transient_prepare_exception(struct dm_exception_store *store,
struct dm_snap_exception *e)
{
- struct transient_c *tc = (struct transient_c *) store->context;
- sector_t size = get_dev_size(store->snap->cow->bdev);
+ struct transient_c *tc = store->context;
+ sector_t size = get_dev_size(store->cow->bdev);
- if (size < (tc->next_free + store->snap->chunk_size))
+ if (size < (tc->next_free + store->chunk_size))
return -1;
- e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
- tc->next_free += store->snap->chunk_size;
+ e->new_chunk = sector_to_chunk(store, tc->next_free);
+ tc->next_free += store->chunk_size;
return 0;
}
@@ -64,20 +63,14 @@ static void transient_fraction_full(struct dm_exception_store *store,
sector_t *numerator, sector_t *denominator)
{
*numerator = ((struct transient_c *) store->context)->next_free;
- *denominator = get_dev_size(store->snap->cow->bdev);
+ *denominator = get_dev_size(store->cow->bdev);
}
-int dm_create_transient(struct dm_exception_store *store)
+static int transient_ctr(struct dm_exception_store *store,
+ unsigned argc, char **argv)
{
struct transient_c *tc;
- store->destroy = transient_destroy;
- store->read_metadata = transient_read_metadata;
- store->prepare_exception = transient_prepare_exception;
- store->commit_exception = transient_commit_exception;
- store->drop_snapshot = NULL;
- store->fraction_full = transient_fraction_full;
-
tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
if (!tc)
return -ENOMEM;
@@ -88,11 +81,70 @@ int dm_create_transient(struct dm_exception_store *store)
return 0;
}
+static unsigned transient_status(struct dm_exception_store *store,
+ status_type_t status, char *result,
+ unsigned maxlen)
+{
+ unsigned sz = 0;
+
+ switch (status) {
+ case STATUSTYPE_INFO:
+ break;
+ case STATUSTYPE_TABLE:
+ DMEMIT(" %s N %llu", store->cow->name,
+ (unsigned long long)store->chunk_size);
+ }
+
+ return sz;
+}
+
+static struct dm_exception_store_type _transient_type = {
+ .name = "transient",
+ .module = THIS_MODULE,
+ .ctr = transient_ctr,
+ .dtr = transient_dtr,
+ .read_metadata = transient_read_metadata,
+ .prepare_exception = transient_prepare_exception,
+ .commit_exception = transient_commit_exception,
+ .fraction_full = transient_fraction_full,
+ .status = transient_status,
+};
+
+static struct dm_exception_store_type _transient_compat_type = {
+ .name = "N",
+ .module = THIS_MODULE,
+ .ctr = transient_ctr,
+ .dtr = transient_dtr,
+ .read_metadata = transient_read_metadata,
+ .prepare_exception = transient_prepare_exception,
+ .commit_exception = transient_commit_exception,
+ .fraction_full = transient_fraction_full,
+ .status = transient_status,
+};
+
int dm_transient_snapshot_init(void)
{
- return 0;
+ int r;
+
+ r = dm_exception_store_type_register(&_transient_type);
+ if (r) {
+ DMWARN("Unable to register transient exception store type");
+ return r;
+ }
+
+ r = dm_exception_store_type_register(&_transient_compat_type);
+ if (r) {
+ DMWARN("Unable to register old-style transient "
+ "exception store type");
+ dm_exception_store_type_unregister(&_transient_type);
+ return r;
+ }
+
+ return r;
}
void dm_transient_snapshot_exit(void)
{
+ dm_exception_store_type_unregister(&_transient_type);
+ dm_exception_store_type_unregister(&_transient_compat_type);
}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 65ff82ff124..981a0413068 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -7,7 +7,6 @@
*/
#include <linux/blkdev.h>
-#include <linux/ctype.h>
#include <linux/device-mapper.h>
#include <linux/delay.h>
#include <linux/fs.h>
@@ -20,9 +19,9 @@
#include <linux/vmalloc.h>
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
+#include <linux/workqueue.h>
#include "dm-exception-store.h"
-#include "dm-snap.h"
#include "dm-bio-list.h"
#define DM_MSG_PREFIX "snapshots"
@@ -47,9 +46,76 @@
*/
#define MIN_IOS 256
+#define DM_TRACKED_CHUNK_HASH_SIZE 16
+#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
+ (DM_TRACKED_CHUNK_HASH_SIZE - 1))
+
+struct exception_table {
+ uint32_t hash_mask;
+ unsigned hash_shift;
+ struct list_head *table;
+};
+
+struct dm_snapshot {
+ struct rw_semaphore lock;
+
+ struct dm_dev *origin;
+
+ /* List of snapshots per Origin */
+ struct list_head list;
+
+ /* You can't use a snapshot if this is 0 (e.g. if full) */
+ int valid;
+
+ /* Origin writes don't trigger exceptions until this is set */
+ int active;
+
+ mempool_t *pending_pool;
+
+ atomic_t pending_exceptions_count;
+
+ struct exception_table pending;
+ struct exception_table complete;
+
+ /*
+ * pe_lock protects all pending_exception operations and access
+ * as well as the snapshot_bios list.
+ */
+ spinlock_t pe_lock;
+
+ /* The on disk metadata handler */
+ struct dm_exception_store *store;
+
+ struct dm_kcopyd_client *kcopyd_client;
+
+ /* Queue of snapshot writes for ksnapd to flush */
+ struct bio_list queued_bios;
+ struct work_struct queued_bios_work;
+
+ /* Chunks with outstanding reads */
+ mempool_t *tracked_chunk_pool;
+ spinlock_t tracked_chunk_lock;
+ struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+};
+
static struct workqueue_struct *ksnapd;
static void flush_queued_bios(struct work_struct *work);
+static sector_t chunk_to_sector(struct dm_exception_store *store,
+ chunk_t chunk)
+{
+ return chunk << store->chunk_shift;
+}
+
+static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
+{
+ /*
+ * There is only ever one instance of a particular block
+ * device so we can compare pointers safely.
+ */
+ return lhs == rhs;
+}
+
struct dm_snap_pending_exception {
struct dm_snap_exception e;
@@ -476,11 +542,11 @@ static int init_hash_tables(struct dm_snapshot *s)
* Calculate based on the size of the original volume or
* the COW volume...
*/
- cow_dev_size = get_dev_size(s->cow->bdev);
+ cow_dev_size = get_dev_size(s->store->cow->bdev);
origin_dev_size = get_dev_size(s->origin->bdev);
max_buckets = calc_max_buckets();
- hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
+ hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
hash_size = min(hash_size, max_buckets);
hash_size = rounddown_pow_of_two(hash_size);
@@ -505,58 +571,6 @@ static int init_hash_tables(struct dm_snapshot *s)
}
/*
- * Round a number up to the nearest 'size' boundary. size must
- * be a power of 2.
- */
-static ulong round_up(ulong n, ulong size)
-{
- size--;
- return (n + size) & ~size;
-}
-
-static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
- char **error)
-{
- unsigned long chunk_size;
- char *value;
-
- chunk_size = simple_strtoul(chunk_size_arg, &value, 10);
- if (*chunk_size_arg == '\0' || *value != '\0') {
- *error = "Invalid chunk size";
- return -EINVAL;
- }
-
- if (!chunk_size) {
- s->chunk_size = s->chunk_mask = s->chunk_shift = 0;
- return 0;
- }
-
- /*
- * Chunk size must be multiple of page size. Silently
- * round up if it's not.
- */
- chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
-
- /* Check chunk_size is a power of 2 */
- if (!is_power_of_2(chunk_size)) {
- *error = "Chunk size is not a power of 2";
- return -EINVAL;
- }
-
- /* Validate the chunk size against the device block size */
- if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
- *error = "Chunk size is not a multiple of device blocksize";
- return -EINVAL;
- }
-
- s->chunk_size = chunk_size;
- s->chunk_mask = chunk_size - 1;
- s->chunk_shift = ffs(chunk_size) - 1;
-
- return 0;
-}
-
-/*
* Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
*/
static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
@@ -564,91 +578,68 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
struct dm_snapshot *s;
int i;
int r = -EINVAL;
- char persistent;
char *origin_path;
- char *cow_path;
+ struct dm_exception_store *store;
+ unsigned args_used;
if (argc != 4) {
ti->error = "requires exactly 4 arguments";
r = -EINVAL;
- goto bad1;
+ goto bad_args;
}
origin_path = argv[0];
- cow_path = argv[1];
- persistent = toupper(*argv[2]);
+ argv++;
+ argc--;
- if (persistent != 'P' && persistent != 'N') {
- ti->error = "Persistent flag is not P or N";
+ r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
+ if (r) {
+ ti->error = "Couldn't create exception store";
r = -EINVAL;
- goto bad1;
+ goto bad_args;
}
+ argv += args_used;
+ argc -= args_used;
+
s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (s == NULL) {
+ if (!s) {
ti->error = "Cannot allocate snapshot context private "
"structure";
r = -ENOMEM;
- goto bad1;
+ goto bad_snap;
}
r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
if (r) {
ti->error = "Cannot get origin device";
- goto bad2;
- }
-
- r = dm_get_device(ti, cow_path, 0, 0,
- FMODE_READ | FMODE_WRITE, &s->cow);
- if (r) {
- dm_put_device(ti, s->origin);
- ti->error = "Cannot get COW device";
- goto bad2;
+ goto bad_origin;
}
- r = set_chunk_size(s, argv[3], &ti->error);
- if (r)
- goto bad3;
-
- s->type = persistent;
-
+ s->store = store;
s->valid = 1;
s->active = 0;
atomic_set(&s->pending_exceptions_count, 0);
init_rwsem(&s->lock);
spin_lock_init(&s->pe_lock);
- s->ti = ti;
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
ti->error = "Unable to allocate hash table space";
r = -ENOMEM;
- goto bad3;
- }
-
- s->store.snap = s;
-
- if (persistent == 'P')
- r = dm_create_persistent(&s->store);
- else
- r = dm_create_transient(&s->store);
-
- if (r) {
- ti->error = "Couldn't create exception store";
- r = -EINVAL;
- goto bad4;
+ goto bad_hash_tables;
}
r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
if (r) {
ti->error = "Could not create kcopyd client";
- goto bad5;
+ goto bad_kcopyd;
}
s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
if (!s->pending_pool) {
ti->error = "Could not allocate mempool for pending exceptions";
- goto bad6;
+ goto bad_pending_pool;
}
s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
@@ -665,7 +656,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
spin_lock_init(&s->tracked_chunk_lock);
/* Metadata must only be loaded into one table at once */
- r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s);
+ r = s->store->type->read_metadata(s->store, dm_add_exception,
+ (void *)s);
if (r < 0) {
ti->error = "Failed to read snapshot metadata";
goto bad_load_and_register;
@@ -686,34 +678,33 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ti->private = s;
- ti->split_io = s->chunk_size;
+ ti->split_io = s->store->chunk_size;
return 0;
- bad_load_and_register:
+bad_load_and_register:
mempool_destroy(s->tracked_chunk_pool);
- bad_tracked_chunk_pool:
+bad_tracked_chunk_pool:
mempool_destroy(s->pending_pool);
- bad6:
+bad_pending_pool:
dm_kcopyd_client_destroy(s->kcopyd_client);
- bad5:
- s->store.destroy(&s->store);
-
- bad4:
+bad_kcopyd:
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
- bad3:
- dm_put_device(ti, s->cow);
+bad_hash_tables:
dm_put_device(ti, s->origin);
- bad2:
+bad_origin:
kfree(s);
- bad1:
+bad_snap:
+ dm_exception_store_destroy(store);
+
+bad_args:
return r;
}
@@ -724,8 +715,6 @@ static void __free_exceptions(struct dm_snapshot *s)
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
-
- s->store.destroy(&s->store);
}
static void snapshot_dtr(struct dm_target *ti)
@@ -761,7 +750,8 @@ static void snapshot_dtr(struct dm_target *ti)
mempool_destroy(s->pending_pool);
dm_put_device(ti, s->origin);
- dm_put_device(ti, s->cow);
+
+ dm_exception_store_destroy(s->store);
kfree(s);
}
@@ -820,12 +810,12 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
else if (err == -ENOMEM)
DMERR("Invalidating snapshot: Unable to allocate exception.");
- if (s->store.drop_snapshot)
- s->store.drop_snapshot(&s->store);
+ if (s->store->type->drop_snapshot)
+ s->store->type->drop_snapshot(s->store);
s->valid = 0;
- dm_table_event(s->ti->table);
+ dm_table_event(s->store->ti->table);
}
static void get_pending_exception(struct dm_snap_pending_exception *pe)
@@ -943,8 +933,8 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
else
/* Update the metadata if we are persistent */
- s->store.commit_exception(&s->store, &pe->e, commit_callback,
- pe);
+ s->store->type->commit_exception(s->store, &pe->e,
+ commit_callback, pe);
}
/*
@@ -960,11 +950,11 @@ static void start_copy(struct dm_snap_pending_exception *pe)
dev_size = get_dev_size(bdev);
src.bdev = bdev;
- src.sector = chunk_to_sector(s, pe->e.old_chunk);
- src.count = min(s->chunk_size, dev_size - src.sector);
+ src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
+ src.count = min(s->store->chunk_size, dev_size - src.sector);
- dest.bdev = s->cow->bdev;
- dest.sector = chunk_to_sector(s, pe->e.new_chunk);
+ dest.bdev = s->store->cow->bdev;
+ dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
dest.count = src.count;
/* Hand over to kcopyd */
@@ -972,6 +962,17 @@ static void start_copy(struct dm_snap_pending_exception *pe)
&src, 1, &dest, 0, copy_callback, pe);
}
+static struct dm_snap_pending_exception *
+__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
+{
+ struct dm_snap_exception *e = lookup_exception(&s->pending, chunk);
+
+ if (!e)
+ return NULL;
+
+ return container_of(e, struct dm_snap_pending_exception, e);
+}
+
/*
* Looks to see if this snapshot already has a pending exception
* for this chunk, otherwise it allocates a new one and inserts
@@ -981,40 +982,15 @@ static void start_copy(struct dm_snap_pending_exception *pe)
* this.
*/
static struct dm_snap_pending_exception *
-__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
+__find_pending_exception(struct dm_snapshot *s,
+ struct dm_snap_pending_exception *pe, chunk_t chunk)
{
- struct dm_snap_exception *e;
- struct dm_snap_pending_exception *pe;
- chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
+ struct dm_snap_pending_exception *pe2;
- /*
- * Is there a pending exception for this already ?
- */
- e = lookup_exception(&s->pending, chunk);
- if (e) {
- /* cast the exception to a pending exception */
- pe = container_of(e, struct dm_snap_pending_exception, e);
- goto out;
- }
-
- /*
- * Create a new pending exception, we don't want
- * to hold the lock while we do this.
- */
- up_write(&s->lock);
- pe = alloc_pending_exception(s);
- down_write(&s->lock);
-
- if (!s->valid) {
- free_pending_exception(pe);
- return NULL;
- }
-
- e = lookup_exception(&s->pending, chunk);
- if (e) {
+ pe2 = __lookup_pending_exception(s, chunk);
+ if (pe2) {
free_pending_exception(pe);
- pe = container_of(e, struct dm_snap_pending_exception, e);
- goto out;
+ return pe2;
}
pe->e.old_chunk = chunk;
@@ -1024,7 +1000,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
atomic_set(&pe->ref_count, 0);
pe->started = 0;
- if (s->store.prepare_exception(&s->store, &pe->e)) {
+ if (s->store->type->prepare_exception(s->store, &pe->e)) {
free_pending_exception(pe);
return NULL;
}
@@ -1032,17 +1008,18 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
get_pending_exception(pe);
insert_exception(&s->pending, &pe->e);
- out:
return pe;
}
static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
struct bio *bio, chunk_t chunk)
{
- bio->bi_bdev = s->cow->bdev;
- bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) +
- (chunk - e->old_chunk)) +
- (bio->bi_sector & s->chunk_mask);
+ bio->bi_bdev = s->store->cow->bdev;
+ bio->bi_sector = chunk_to_sector(s->store,
+ dm_chunk_number(e->new_chunk) +
+ (chunk - e->old_chunk)) +
+ (bio->bi_sector &
+ s->store->chunk_mask);
}
static int snapshot_map(struct dm_target *ti, struct bio *bio,
@@ -1054,7 +1031,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
chunk_t chunk;
struct dm_snap_pending_exception *pe = NULL;
- chunk = sector_to_chunk(s, bio->bi_sector);
+ chunk = sector_to_chunk(s->store, bio->bi_sector);
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
@@ -1083,11 +1060,31 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
* writeable.
*/
if (bio_rw(bio) == WRITE) {
- pe = __find_pending_exception(s, bio);
+ pe = __lookup_pending_exception(s, chunk);
if (!pe) {
- __invalidate_snapshot(s, -ENOMEM);
- r = -EIO;
- goto out_unlock;
+ up_write(&s->lock);
+ pe = alloc_pending_exception(s);
+ down_write(&s->lock);
+
+ if (!s->valid) {
+ free_pending_exception(pe);
+ r = -EIO;
+ goto out_unlock;
+ }
+
+ e = lookup_exception(&s->complete, chunk);
+ if (e) {
+ free_pending_exception(pe);
+ remap_exception(s, e, bio, chunk);
+ goto out_unlock;
+ }
+
+ pe = __find_pending_exception(s, pe, chunk);
+ if (!pe) {
+ __invalidate_snapshot(s, -ENOMEM);
+ r = -EIO;
+ goto out_unlock;
+ }
}
remap_exception(s, &pe->e, bio, chunk);
@@ -1137,24 +1134,25 @@ static void snapshot_resume(struct dm_target *ti)
static int snapshot_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
{
+ unsigned sz = 0;
struct dm_snapshot *snap = ti->private;
switch (type) {
case STATUSTYPE_INFO:
if (!snap->valid)
- snprintf(result, maxlen, "Invalid");
+ DMEMIT("Invalid");
else {
- if (snap->store.fraction_full) {
+ if (snap->store->type->fraction_full) {
sector_t numerator, denominator;
- snap->store.fraction_full(&snap->store,
- &numerator,
- &denominator);
- snprintf(result, maxlen, "%llu/%llu",
- (unsigned long long)numerator,
- (unsigned long long)denominator);
+ snap->store->type->fraction_full(snap->store,
+ &numerator,
+ &denominator);
+ DMEMIT("%llu/%llu",
+ (unsigned long long)numerator,
+ (unsigned long long)denominator);
}
else
- snprintf(result, maxlen, "Unknown");
+ DMEMIT("Unknown");
}
break;
@@ -1164,10 +1162,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
* to make private copies if the output is to
* make sense.
*/
- snprintf(result, maxlen, "%s %s %c %llu",
- snap->origin->name, snap->cow->name,
- snap->type,
- (unsigned long long)snap->chunk_size);
+ DMEMIT("%s", snap->origin->name);
+ snap->store->type->status(snap->store, type, result + sz,
+ maxlen - sz);
break;
}
@@ -1196,14 +1193,14 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
goto next_snapshot;
/* Nothing to do if writing beyond end of snapshot */
- if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
+ if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
goto next_snapshot;
/*
* Remember, different snapshots can have
* different chunk sizes.
*/
- chunk = sector_to_chunk(snap, bio->bi_sector);
+ chunk = sector_to_chunk(snap->store, bio->bi_sector);
/*
* Check exception table to see if block
@@ -1217,10 +1214,28 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
if (e)
goto next_snapshot;
- pe = __find_pending_exception(snap, bio);
+ pe = __lookup_pending_exception(snap, chunk);
if (!pe) {
- __invalidate_snapshot(snap, -ENOMEM);
- goto next_snapshot;
+ up_write(&snap->lock);
+ pe = alloc_pending_exception(snap);
+ down_write(&snap->lock);
+
+ if (!snap->valid) {
+ free_pending_exception(pe);
+ goto next_snapshot;
+ }
+
+ e = lookup_exception(&snap->complete, chunk);
+ if (e) {
+ free_pending_exception(pe);
+ goto next_snapshot;
+ }
+
+ pe = __find_pending_exception(snap, pe, chunk);
+ if (!pe) {
+ __invalidate_snapshot(snap, -ENOMEM);
+ goto next_snapshot;
+ }
}
if (!primary_pe) {
@@ -1360,7 +1375,8 @@ static void origin_resume(struct dm_target *ti)
o = __lookup_origin(dev->bdev);
if (o)
list_for_each_entry (snap, &o->snapshots, list)
- chunk_size = min_not_zero(chunk_size, snap->chunk_size);
+ chunk_size = min_not_zero(chunk_size,
+ snap->store->chunk_size);
up_read(&_origins_lock);
ti->split_io = chunk_size;
diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h
deleted file mode 100644
index d9e62b43cf8..00000000000
--- a/drivers/md/dm-snap.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_SNAPSHOT_H
-#define DM_SNAPSHOT_H
-
-#include <linux/device-mapper.h>
-#include "dm-exception-store.h"
-#include "dm-bio-list.h"
-#include <linux/blkdev.h>
-#include <linux/workqueue.h>
-
-struct exception_table {
- uint32_t hash_mask;
- unsigned hash_shift;
- struct list_head *table;
-};
-
-#define DM_TRACKED_CHUNK_HASH_SIZE 16
-#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
- (DM_TRACKED_CHUNK_HASH_SIZE - 1))
-
-struct dm_snapshot {
- struct rw_semaphore lock;
- struct dm_target *ti;
-
- struct dm_dev *origin;
- struct dm_dev *cow;
-
- /* List of snapshots per Origin */
- struct list_head list;
-
- /* Size of data blocks saved - must be a power of 2 */
- chunk_t chunk_size;
- chunk_t chunk_mask;
- chunk_t chunk_shift;
-
- /* You can't use a snapshot if this is 0 (e.g. if full) */
- int valid;
-
- /* Origin writes don't trigger exceptions until this is set */
- int active;
-
- /* Used for display of table */
- char type;
-
- mempool_t *pending_pool;
-
- atomic_t pending_exceptions_count;
-
- struct exception_table pending;
- struct exception_table complete;
-
- /*
- * pe_lock protects all pending_exception operations and access
- * as well as the snapshot_bios list.
- */
- spinlock_t pe_lock;
-
- /* The on disk metadata handler */
- struct dm_exception_store store;
-
- struct dm_kcopyd_client *kcopyd_client;
-
- /* Queue of snapshot writes for ksnapd to flush */
- struct bio_list queued_bios;
- struct work_struct queued_bios_work;
-
- /* Chunks with outstanding reads */
- mempool_t *tracked_chunk_pool;
- spinlock_t tracked_chunk_lock;
- struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
-};
-
-/*
- * Return the number of sectors in the device.
- */
-static inline sector_t get_dev_size(struct block_device *bdev)
-{
- return bdev->bd_inode->i_size >> SECTOR_SHIFT;
-}
-
-static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
-{
- return (sector & ~s->chunk_mask) >> s->chunk_shift;
-}
-
-static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
-{
- return chunk << s->chunk_shift;
-}
-
-static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
-{
- /*
- * There is only ever one instance of a particular block
- * device so we can compare pointers safely.
- */
- return lhs == rhs;
-}
-
-#endif
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2fd66c30f7f..e8361b191b9 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -399,28 +399,30 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start,
}
/*
- * This upgrades the mode on an already open dm_dev. Being
+ * This upgrades the mode on an already open dm_dev, being
* careful to leave things as they were if we fail to reopen the
- * device.
+ * device and not to touch the existing bdev field in case
+ * it is accessed concurrently inside dm_table_any_congested().
*/
static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
struct mapped_device *md)
{
int r;
- struct dm_dev_internal dd_copy;
- dev_t dev = dd->dm_dev.bdev->bd_dev;
+ struct dm_dev_internal dd_new, dd_old;
- dd_copy = *dd;
+ dd_new = dd_old = *dd;
+
+ dd_new.dm_dev.mode |= new_mode;
+ dd_new.dm_dev.bdev = NULL;
+
+ r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
+ if (r)
+ return r;
dd->dm_dev.mode |= new_mode;
- dd->dm_dev.bdev = NULL;
- r = open_dev(dd, dev, md);
- if (!r)
- close_dev(&dd_copy, md);
- else
- *dd = dd_copy;
+ close_dev(&dd_old, md);
- return r;
+ return 0;
}
/*
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 7decf10006e..04feccf2a99 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -14,45 +14,34 @@
#define DM_MSG_PREFIX "target"
-struct tt_internal {
- struct target_type tt;
-
- struct list_head list;
- long use;
-};
-
static LIST_HEAD(_targets);
static DECLARE_RWSEM(_lock);
#define DM_MOD_NAME_SIZE 32
-static inline struct tt_internal *__find_target_type(const char *name)
+static inline struct target_type *__find_target_type(const char *name)
{
- struct tt_internal *ti;
+ struct target_type *tt;
- list_for_each_entry (ti, &_targets, list)
- if (!strcmp(name, ti->tt.name))
- return ti;
+ list_for_each_entry(tt, &_targets, list)
+ if (!strcmp(name, tt->name))
+ return tt;
return NULL;
}
-static struct tt_internal *get_target_type(const char *name)
+static struct target_type *get_target_type(const char *name)
{
- struct tt_internal *ti;
+ struct target_type *tt;
down_read(&_lock);
- ti = __find_target_type(name);
- if (ti) {
- if ((ti->use == 0) && !try_module_get(ti->tt.module))
- ti = NULL;
- else
- ti->use++;
- }
+ tt = __find_target_type(name);
+ if (tt && !try_module_get(tt->module))
+ tt = NULL;
up_read(&_lock);
- return ti;
+ return tt;
}
static void load_module(const char *name)
@@ -62,92 +51,59 @@ static void load_module(const char *name)
struct target_type *dm_get_target_type(const char *name)
{
- struct tt_internal *ti = get_target_type(name);
+ struct target_type *tt = get_target_type(name);
- if (!ti) {
+ if (!tt) {
load_module(name);
- ti = get_target_type(name);
+ tt = get_target_type(name);
}
- return ti ? &ti->tt : NULL;
+ return tt;
}
-void dm_put_target_type(struct target_type *t)
+void dm_put_target_type(struct target_type *tt)
{
- struct tt_internal *ti = (struct tt_internal *) t;
-
down_read(&_lock);
- if (--ti->use == 0)
- module_put(ti->tt.module);
-
- BUG_ON(ti->use < 0);
+ module_put(tt->module);
up_read(&_lock);
-
- return;
-}
-
-static struct tt_internal *alloc_target(struct target_type *t)
-{
- struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
-
- if (ti)
- ti->tt = *t;
-
- return ti;
}
-
int dm_target_iterate(void (*iter_func)(struct target_type *tt,
void *param), void *param)
{
- struct tt_internal *ti;
+ struct target_type *tt;
down_read(&_lock);
- list_for_each_entry (ti, &_targets, list)
- iter_func(&ti->tt, param);
+ list_for_each_entry(tt, &_targets, list)
+ iter_func(tt, param);
up_read(&_lock);
return 0;
}
-int dm_register_target(struct target_type *t)
+int dm_register_target(struct target_type *tt)
{
int rv = 0;
- struct tt_internal *ti = alloc_target(t);
-
- if (!ti)
- return -ENOMEM;
down_write(&_lock);
- if (__find_target_type(t->name))
+ if (__find_target_type(tt->name))
rv = -EEXIST;
else
- list_add(&ti->list, &_targets);
+ list_add(&tt->list, &_targets);
up_write(&_lock);
- if (rv)
- kfree(ti);
return rv;
}
-void dm_unregister_target(struct target_type *t)
+void dm_unregister_target(struct target_type *tt)
{
- struct tt_internal *ti;
-
down_write(&_lock);
- if (!(ti = __find_target_type(t->name))) {
- DMCRIT("Unregistering unrecognised target: %s", t->name);
- BUG();
- }
-
- if (ti->use) {
- DMCRIT("Attempt to unregister target still in use: %s",
- t->name);
+ if (!__find_target_type(tt->name)) {
+ DMCRIT("Unregistering unrecognised target: %s", tt->name);
BUG();
}
- list_del(&ti->list);
- kfree(ti);
+ list_del(&tt->list);
up_write(&_lock);
}
@@ -156,17 +112,17 @@ void dm_unregister_target(struct target_type *t)
* io-err: always fails an io, useful for bringing
* up LVs that have holes in them.
*/
-static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args)
+static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
{
return 0;
}
-static void io_err_dtr(struct dm_target *ti)
+static void io_err_dtr(struct dm_target *tt)
{
/* empty */
}
-static int io_err_map(struct dm_target *ti, struct bio *bio,
+static int io_err_map(struct dm_target *tt, struct bio *bio,
union map_info *map_context)
{
return -EIO;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8d40f27cce8..788ba96a625 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -99,19 +99,9 @@ union map_info *dm_get_mapinfo(struct bio *bio)
/*
* Work processed by per-device workqueue.
*/
-struct dm_wq_req {
- enum {
- DM_WQ_FLUSH_DEFERRED,
- } type;
- struct work_struct work;
- struct mapped_device *md;
- void *context;
-};
-
struct mapped_device {
struct rw_semaphore io_lock;
struct mutex suspend_lock;
- spinlock_t pushback_lock;
rwlock_t map_lock;
atomic_t holders;
atomic_t open_count;
@@ -129,8 +119,9 @@ struct mapped_device {
*/
atomic_t pending;
wait_queue_head_t wait;
+ struct work_struct work;
struct bio_list deferred;
- struct bio_list pushback;
+ spinlock_t deferred_lock;
/*
* Processing queue (flush/barriers)
@@ -453,7 +444,9 @@ static int queue_io(struct mapped_device *md, struct bio *bio)
return 1;
}
+ spin_lock_irq(&md->deferred_lock);
bio_list_add(&md->deferred, bio);
+ spin_unlock_irq(&md->deferred_lock);
up_write(&md->io_lock);
return 0; /* deferred successfully */
@@ -537,16 +530,14 @@ static void dec_pending(struct dm_io *io, int error)
if (io->error == DM_ENDIO_REQUEUE) {
/*
* Target requested pushing back the I/O.
- * This must be handled before the sleeper on
- * suspend queue merges the pushback list.
*/
- spin_lock_irqsave(&md->pushback_lock, flags);
+ spin_lock_irqsave(&md->deferred_lock, flags);
if (__noflush_suspending(md))
- bio_list_add(&md->pushback, io->bio);
+ bio_list_add(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
io->error = -EIO;
- spin_unlock_irqrestore(&md->pushback_lock, flags);
+ spin_unlock_irqrestore(&md->deferred_lock, flags);
}
end_io_acct(io);
@@ -834,20 +825,22 @@ static int __clone_and_map(struct clone_info *ci)
}
/*
- * Split the bio into several clones.
+ * Split the bio into several clones and submit it to targets.
*/
-static int __split_bio(struct mapped_device *md, struct bio *bio)
+static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
{
struct clone_info ci;
int error = 0;
ci.map = dm_get_table(md);
- if (unlikely(!ci.map))
- return -EIO;
+ if (unlikely(!ci.map)) {
+ bio_io_error(bio);
+ return;
+ }
if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
dm_table_put(ci.map);
bio_endio(bio, -EOPNOTSUPP);
- return 0;
+ return;
}
ci.md = md;
ci.bio = bio;
@@ -867,8 +860,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
/* drop the extra reference count */
dec_pending(ci.io, error);
dm_table_put(ci.map);
-
- return 0;
}
/*-----------------------------------------------------------------
* CRUD END
@@ -959,8 +950,9 @@ static int dm_request(struct request_queue *q, struct bio *bio)
down_read(&md->io_lock);
}
- r = __split_bio(md, bio);
+ __split_and_process_bio(md, bio);
up_read(&md->io_lock);
+ return 0;
out_req:
if (r < 0)
@@ -1074,6 +1066,8 @@ out:
static struct block_device_operations dm_blk_dops;
+static void dm_wq_work(struct work_struct *work);
+
/*
* Allocate and initialise a blank device with a given minor.
*/
@@ -1101,7 +1095,7 @@ static struct mapped_device *alloc_dev(int minor)
init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock);
- spin_lock_init(&md->pushback_lock);
+ spin_lock_init(&md->deferred_lock);
rwlock_init(&md->map_lock);
atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0);
@@ -1118,6 +1112,7 @@ static struct mapped_device *alloc_dev(int minor)
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request);
+ blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
@@ -1140,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor)
atomic_set(&md->pending, 0);
init_waitqueue_head(&md->wait);
+ INIT_WORK(&md->work, dm_wq_work);
init_waitqueue_head(&md->eventq);
md->disk->major = _major;
@@ -1379,18 +1375,24 @@ void dm_put(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_put);
-static int dm_wait_for_completion(struct mapped_device *md)
+static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
{
int r = 0;
+ DECLARE_WAITQUEUE(wait, current);
+
+ dm_unplug_all(md->queue);
+
+ add_wait_queue(&md->wait, &wait);
while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(interruptible);
smp_mb();
if (!atomic_read(&md->pending))
break;
- if (signal_pending(current)) {
+ if (interruptible == TASK_INTERRUPTIBLE &&
+ signal_pending(current)) {
r = -EINTR;
break;
}
@@ -1399,67 +1401,40 @@ static int dm_wait_for_completion(struct mapped_device *md)
}
set_current_state(TASK_RUNNING);
+ remove_wait_queue(&md->wait, &wait);
+
return r;
}
/*
* Process the deferred bios
*/
-static void __flush_deferred_io(struct mapped_device *md)
+static void dm_wq_work(struct work_struct *work)
{
+ struct mapped_device *md = container_of(work, struct mapped_device,
+ work);
struct bio *c;
- while ((c = bio_list_pop(&md->deferred))) {
- if (__split_bio(md, c))
- bio_io_error(c);
- }
-
- clear_bit(DMF_BLOCK_IO, &md->flags);
-}
+ down_write(&md->io_lock);
-static void __merge_pushback_list(struct mapped_device *md)
-{
- unsigned long flags;
+next_bio:
+ spin_lock_irq(&md->deferred_lock);
+ c = bio_list_pop(&md->deferred);
+ spin_unlock_irq(&md->deferred_lock);
- spin_lock_irqsave(&md->pushback_lock, flags);
- clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
- bio_list_merge_head(&md->deferred, &md->pushback);
- bio_list_init(&md->pushback);
- spin_unlock_irqrestore(&md->pushback_lock, flags);
-}
+ if (c) {
+ __split_and_process_bio(md, c);
+ goto next_bio;
+ }
-static void dm_wq_work(struct work_struct *work)
-{
- struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
- struct mapped_device *md = req->md;
+ clear_bit(DMF_BLOCK_IO, &md->flags);
- down_write(&md->io_lock);
- switch (req->type) {
- case DM_WQ_FLUSH_DEFERRED:
- __flush_deferred_io(md);
- break;
- default:
- DMERR("dm_wq_work: unrecognised work type %d", req->type);
- BUG();
- }
up_write(&md->io_lock);
}
-static void dm_wq_queue(struct mapped_device *md, int type, void *context,
- struct dm_wq_req *req)
-{
- req->type = type;
- req->md = md;
- req->context = context;
- INIT_WORK(&req->work, dm_wq_work);
- queue_work(md->wq, &req->work);
-}
-
-static void dm_queue_flush(struct mapped_device *md, int type, void *context)
+static void dm_queue_flush(struct mapped_device *md)
{
- struct dm_wq_req req;
-
- dm_wq_queue(md, type, context, &req);
+ queue_work(md->wq, &md->work);
flush_workqueue(md->wq);
}
@@ -1534,7 +1509,6 @@ static void unlock_fs(struct mapped_device *md)
int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
{
struct dm_table *map = NULL;
- DECLARE_WAITQUEUE(wait, current);
int r = 0;
int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
@@ -1584,28 +1558,22 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
down_write(&md->io_lock);
set_bit(DMF_BLOCK_IO, &md->flags);
- add_wait_queue(&md->wait, &wait);
up_write(&md->io_lock);
- /* unplug */
- if (map)
- dm_table_unplug_all(map);
-
/*
* Wait for the already-mapped ios to complete.
*/
- r = dm_wait_for_completion(md);
+ r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
down_write(&md->io_lock);
- remove_wait_queue(&md->wait, &wait);
if (noflush)
- __merge_pushback_list(md);
+ clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
up_write(&md->io_lock);
/* were we interrupted ? */
if (r < 0) {
- dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+ dm_queue_flush(md);
unlock_fs(md);
goto out; /* pushback list is already flushed, so skip flush */
@@ -1645,7 +1613,7 @@ int dm_resume(struct mapped_device *md)
if (r)
goto out;
- dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
+ dm_queue_flush(md);
unlock_fs(md);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 20194e000c5..b48397c0abb 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -60,7 +60,7 @@ int dm_table_barrier_ok(struct dm_table *t);
int dm_target_init(void);
void dm_target_exit(void);
struct target_type *dm_get_target_type(const char *name);
-void dm_put_target_type(struct target_type *t);
+void dm_put_target_type(struct target_type *tt);
int dm_target_iterate(void (*iter_func)(struct target_type *tt,
void *param), void *param);
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 86d9adf90e7..8695809b24b 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -62,7 +62,10 @@
#define ModeShift 5
#define MaxFault 50
-#include <linux/raid/md.h>
+#include <linux/blkdev.h>
+#include <linux/raid/md_u.h>
+#include "md.h"
+#include <linux/seq_file.h>
static void faulty_fail(struct bio *bio, int error)
@@ -280,6 +283,17 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size)
return 0;
}
+static sector_t faulty_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+ WARN_ONCE(raid_disks,
+ "%s does not support generic reshape\n", __func__);
+
+ if (sectors == 0)
+ return mddev->dev_sectors;
+
+ return sectors;
+}
+
static int run(mddev_t *mddev)
{
mdk_rdev_t *rdev;
@@ -298,7 +312,7 @@ static int run(mddev_t *mddev)
list_for_each_entry(rdev, &mddev->disks, same_set)
conf->rdev = rdev;
- mddev->array_sectors = mddev->size * 2;
+ md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
mddev->private = conf;
reconfig(mddev, mddev->layout, -1);
@@ -325,6 +339,7 @@ static struct mdk_personality faulty_personality =
.stop = stop,
.status = status,
.reconfig = reconfig,
+ .size = faulty_size,
};
static int __init raid_init(void)
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 09658b21847..7a36e38393a 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -16,7 +16,11 @@
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/raid/linear.h>
+#include <linux/blkdev.h>
+#include <linux/raid/md_u.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "linear.h"
/*
* find which device holds a particular offset
@@ -97,6 +101,16 @@ static int linear_congested(void *data, int bits)
return ret;
}
+static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+ linear_conf_t *conf = mddev_to_conf(mddev);
+
+ WARN_ONCE(sectors || raid_disks,
+ "%s does not support generic reshape\n", __func__);
+
+ return conf->array_sectors;
+}
+
static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
{
linear_conf_t *conf;
@@ -135,8 +149,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
mddev->queue->max_sectors > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
- disk->num_sectors = rdev->size * 2;
- conf->array_sectors += rdev->size * 2;
+ disk->num_sectors = rdev->sectors;
+ conf->array_sectors += rdev->sectors;
cnt++;
}
@@ -249,7 +263,7 @@ static int linear_run (mddev_t *mddev)
if (!conf)
return 1;
mddev->private = conf;
- mddev->array_sectors = conf->array_sectors;
+ md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->unplug_fn = linear_unplug;
@@ -283,7 +297,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
newconf->prev = mddev_to_conf(mddev);
mddev->private = newconf;
mddev->raid_disks++;
- mddev->array_sectors = newconf->array_sectors;
+ md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors);
return 0;
}
@@ -381,6 +395,7 @@ static struct mdk_personality linear_personality =
.stop = linear_stop,
.status = linear_status,
.hot_add_disk = linear_add,
+ .size = linear_size,
};
static int __init linear_init (void)
diff --git a/drivers/md/linear.h b/drivers/md/linear.h
new file mode 100644
index 00000000000..bf8179587f9
--- /dev/null
+++ b/drivers/md/linear.h
@@ -0,0 +1,29 @@
+#ifndef _LINEAR_H
+#define _LINEAR_H
+
+struct dev_info {
+ mdk_rdev_t *rdev;
+ sector_t num_sectors;
+ sector_t start_sector;
+};
+
+typedef struct dev_info dev_info_t;
+
+struct linear_private_data
+{
+ struct linear_private_data *prev; /* earlier version */
+ dev_info_t **hash_table;
+ sector_t spacing;
+ sector_t array_sectors;
+ int sector_shift; /* shift before dividing
+ * by spacing
+ */
+ dev_info_t disks[0];
+};
+
+
+typedef struct linear_private_data linear_conf_t;
+
+#define mddev_to_conf(mddev) ((linear_conf_t *) mddev->private)
+
+#endif
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a307f87eb90..ed5727c089a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -33,9 +33,9 @@
*/
#include <linux/kthread.h>
-#include <linux/raid/md.h>
-#include <linux/raid/bitmap.h>
+#include <linux/blkdev.h>
#include <linux/sysctl.h>
+#include <linux/seq_file.h>
#include <linux/buffer_head.h> /* for invalidate_bdev */
#include <linux/poll.h>
#include <linux/ctype.h>
@@ -45,11 +45,10 @@
#include <linux/reboot.h>
#include <linux/file.h>
#include <linux/delay.h>
-
-#define MAJOR_NR MD_MAJOR
-
-/* 63 partitions with the alternate major number (mdp) */
-#define MdpMinorShift 6
+#include <linux/raid/md_p.h>
+#include <linux/raid/md_u.h>
+#include "md.h"
+#include "bitmap.h"
#define DEBUG 0
#define dprintk(x...) ((void)(DEBUG && printk(x)))
@@ -202,12 +201,68 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
)
-static int md_fail_request(struct request_queue *q, struct bio *bio)
+/* Rather than calling directly into the personality make_request function,
+ * IO requests come here first so that we can check if the device is
+ * being suspended pending a reconfiguration.
+ * We hold a refcount over the call to ->make_request. By the time that
+ * call has finished, the bio has been linked into some internal structure
+ * and so is visible to ->quiesce(), so we don't need the refcount any more.
+ */
+static int md_make_request(struct request_queue *q, struct bio *bio)
{
- bio_io_error(bio);
- return 0;
+ mddev_t *mddev = q->queuedata;
+ int rv;
+ if (mddev == NULL || mddev->pers == NULL) {
+ bio_io_error(bio);
+ return 0;
+ }
+ rcu_read_lock();
+ if (mddev->suspended) {
+ DEFINE_WAIT(__wait);
+ for (;;) {
+ prepare_to_wait(&mddev->sb_wait, &__wait,
+ TASK_UNINTERRUPTIBLE);
+ if (!mddev->suspended)
+ break;
+ rcu_read_unlock();
+ schedule();
+ rcu_read_lock();
+ }
+ finish_wait(&mddev->sb_wait, &__wait);
+ }
+ atomic_inc(&mddev->active_io);
+ rcu_read_unlock();
+ rv = mddev->pers->make_request(q, bio);
+ if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
+ wake_up(&mddev->sb_wait);
+
+ return rv;
+}
+
+static void mddev_suspend(mddev_t *mddev)
+{
+ BUG_ON(mddev->suspended);
+ mddev->suspended = 1;
+ synchronize_rcu();
+ wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
+ mddev->pers->quiesce(mddev, 1);
+ md_unregister_thread(mddev->thread);
+ mddev->thread = NULL;
+ /* we now know that no code is executing in the personality module,
+ * except possibly the tail end of a ->bi_end_io function, but that
+ * is certain to complete before the module has a chance to get
+ * unloaded
+ */
+}
+
+static void mddev_resume(mddev_t *mddev)
+{
+ mddev->suspended = 0;
+ wake_up(&mddev->sb_wait);
+ mddev->pers->quiesce(mddev, 0);
}
+
static inline mddev_t *mddev_get(mddev_t *mddev)
{
atomic_inc(&mddev->active);
@@ -310,6 +365,7 @@ static mddev_t * mddev_find(dev_t unit)
init_timer(&new->safemode_timer);
atomic_set(&new->active, 1);
atomic_set(&new->openers, 0);
+ atomic_set(&new->active_io, 0);
spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait);
init_waitqueue_head(&new->recovery_wait);
@@ -326,6 +382,11 @@ static inline int mddev_lock(mddev_t * mddev)
return mutex_lock_interruptible(&mddev->reconfig_mutex);
}
+static inline int mddev_is_locked(mddev_t *mddev)
+{
+ return mutex_is_locked(&mddev->reconfig_mutex);
+}
+
static inline int mddev_trylock(mddev_t * mddev)
{
return mutex_trylock(&mddev->reconfig_mutex);
@@ -409,7 +470,7 @@ static void free_disk_sb(mdk_rdev_t * rdev)
rdev->sb_loaded = 0;
rdev->sb_page = NULL;
rdev->sb_start = 0;
- rdev->size = 0;
+ rdev->sectors = 0;
}
}
@@ -775,9 +836,9 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
else
ret = 0;
}
- rdev->size = calc_num_sectors(rdev, sb->chunk_size) / 2;
+ rdev->sectors = calc_num_sectors(rdev, sb->chunk_size);
- if (rdev->size < sb->size && sb->level > 1)
+ if (rdev->sectors < sb->size * 2 && sb->level > 1)
/* "this cannot possibly happen" ... */
ret = -EINVAL;
@@ -812,7 +873,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
mddev->clevel[0] = 0;
mddev->layout = sb->layout;
mddev->raid_disks = sb->raid_disks;
- mddev->size = sb->size;
+ mddev->dev_sectors = sb->size * 2;
mddev->events = ev1;
mddev->bitmap_offset = 0;
mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
@@ -926,7 +987,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->ctime = mddev->ctime;
sb->level = mddev->level;
- sb->size = mddev->size;
+ sb->size = mddev->dev_sectors / 2;
sb->raid_disks = mddev->raid_disks;
sb->md_minor = mddev->md_minor;
sb->not_persistent = 0;
@@ -1024,7 +1085,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
static unsigned long long
super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
{
- if (num_sectors && num_sectors < rdev->mddev->size * 2)
+ if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
return 0; /* component must fit device */
if (rdev->mddev->bitmap_offset)
return 0; /* can't move bitmap */
@@ -1180,16 +1241,17 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
ret = 0;
}
if (minor_version)
- rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2;
+ rdev->sectors = (rdev->bdev->bd_inode->i_size >> 9) -
+ le64_to_cpu(sb->data_offset);
else
- rdev->size = rdev->sb_start / 2;
- if (rdev->size < le64_to_cpu(sb->data_size)/2)
+ rdev->sectors = rdev->sb_start;
+ if (rdev->sectors < le64_to_cpu(sb->data_size))
return -EINVAL;
- rdev->size = le64_to_cpu(sb->data_size)/2;
+ rdev->sectors = le64_to_cpu(sb->data_size);
if (le32_to_cpu(sb->chunksize))
- rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);
+ rdev->sectors &= ~((sector_t)le32_to_cpu(sb->chunksize) - 1);
- if (le64_to_cpu(sb->size) > rdev->size*2)
+ if (le64_to_cpu(sb->size) > rdev->sectors)
return -EINVAL;
return ret;
}
@@ -1216,7 +1278,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
mddev->clevel[0] = 0;
mddev->layout = le32_to_cpu(sb->layout);
mddev->raid_disks = le32_to_cpu(sb->raid_disks);
- mddev->size = le64_to_cpu(sb->size)/2;
+ mddev->dev_sectors = le64_to_cpu(sb->size);
mddev->events = ev1;
mddev->bitmap_offset = 0;
mddev->default_bitmap_offset = 1024 >> 9;
@@ -1312,7 +1374,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
sb->raid_disks = cpu_to_le32(mddev->raid_disks);
- sb->size = cpu_to_le64(mddev->size<<1);
+ sb->size = cpu_to_le64(mddev->dev_sectors);
if (mddev->bitmap && mddev->bitmap_file == NULL) {
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
@@ -1320,10 +1382,15 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
}
if (rdev->raid_disk >= 0 &&
- !test_bit(In_sync, &rdev->flags) &&
- rdev->recovery_offset > 0) {
- sb->feature_map |= cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
- sb->recovery_offset = cpu_to_le64(rdev->recovery_offset);
+ !test_bit(In_sync, &rdev->flags)) {
+ if (mddev->curr_resync_completed > rdev->recovery_offset)
+ rdev->recovery_offset = mddev->curr_resync_completed;
+ if (rdev->recovery_offset > 0) {
+ sb->feature_map |=
+ cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
+ sb->recovery_offset =
+ cpu_to_le64(rdev->recovery_offset);
+ }
}
if (mddev->reshape_position != MaxSector) {
@@ -1365,7 +1432,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
{
struct mdp_superblock_1 *sb;
sector_t max_sectors;
- if (num_sectors && num_sectors < rdev->mddev->size * 2)
+ if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
return 0; /* component must fit device */
if (rdev->sb_start < rdev->data_offset) {
/* minor versions 1 and 2; superblock before data */
@@ -1381,7 +1448,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
sector_t sb_start;
sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2;
sb_start &= ~(sector_t)(4*2 - 1);
- max_sectors = rdev->size * 2 + sb_start - rdev->sb_start;
+ max_sectors = rdev->sectors + sb_start - rdev->sb_start;
if (!num_sectors || num_sectors > max_sectors)
num_sectors = max_sectors;
rdev->sb_start = sb_start;
@@ -1433,6 +1500,38 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
static LIST_HEAD(pending_raid_disks);
+static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev)
+{
+ struct mdk_personality *pers = mddev->pers;
+ struct gendisk *disk = mddev->gendisk;
+ struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
+ struct blk_integrity *bi_mddev = blk_get_integrity(disk);
+
+ /* Data integrity passthrough not supported on RAID 4, 5 and 6 */
+ if (pers && pers->level >= 4 && pers->level <= 6)
+ return;
+
+ /* If rdev is integrity capable, register profile for mddev */
+ if (!bi_mddev && bi_rdev) {
+ if (blk_integrity_register(disk, bi_rdev))
+ printk(KERN_ERR "%s: %s Could not register integrity!\n",
+ __func__, disk->disk_name);
+ else
+ printk(KERN_NOTICE "Enabling data integrity on %s\n",
+ disk->disk_name);
+ return;
+ }
+
+ /* Check that mddev and rdev have matching profiles */
+ if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) {
+ printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__,
+ disk->disk_name, rdev->bdev->bd_disk->disk_name);
+ printk(KERN_NOTICE "Disabling data integrity on %s\n",
+ disk->disk_name);
+ blk_integrity_unregister(disk);
+ }
+}
+
static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
{
char b[BDEVNAME_SIZE];
@@ -1449,8 +1548,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
if (find_rdev(mddev, rdev->bdev->bd_dev))
return -EEXIST;
- /* make sure rdev->size exceeds mddev->size */
- if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
+ /* make sure rdev->sectors exceeds mddev->dev_sectors */
+ if (rdev->sectors && (mddev->dev_sectors == 0 ||
+ rdev->sectors < mddev->dev_sectors)) {
if (mddev->pers) {
/* Cannot change size, so fail
* If mddev->level <= 0, then we don't care
@@ -1459,7 +1559,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
if (mddev->level > 0)
return -ENOSPC;
} else
- mddev->size = rdev->size;
+ mddev->dev_sectors = rdev->sectors;
}
/* Verify rdev->desc_nr is unique.
@@ -1503,6 +1603,8 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
/* May as well allow recovery to be retried once */
mddev->recovery_disabled = 0;
+
+ md_integrity_check(rdev, mddev);
return 0;
fail:
@@ -1713,8 +1815,8 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
static void print_rdev(mdk_rdev_t *rdev, int major_version)
{
char b[BDEVNAME_SIZE];
- printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n",
- bdevname(rdev->bdev,b), (unsigned long long)rdev->size,
+ printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
+ bdevname(rdev->bdev, b), (unsigned long long)rdev->sectors,
test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
rdev->desc_nr);
if (rdev->sb_loaded) {
@@ -2153,7 +2255,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
return -EINVAL;
if (rdev->mddev->pers && rdev->raid_disk >= 0)
return -EBUSY;
- if (rdev->size && rdev->mddev->external)
+ if (rdev->sectors && rdev->mddev->external)
/* Must set offset before size, so overlap checks
* can be sane */
return -EBUSY;
@@ -2167,7 +2269,7 @@ __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
static ssize_t
rdev_size_show(mdk_rdev_t *rdev, char *page)
{
- return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
+ return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
}
static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
@@ -2180,34 +2282,52 @@ static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
return 1;
}
+static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
+{
+ unsigned long long blocks;
+ sector_t new;
+
+ if (strict_strtoull(buf, 10, &blocks) < 0)
+ return -EINVAL;
+
+ if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
+ return -EINVAL; /* sector conversion overflow */
+
+ new = blocks * 2;
+ if (new != blocks * 2)
+ return -EINVAL; /* unsigned long long to sector_t overflow */
+
+ *sectors = new;
+ return 0;
+}
+
static ssize_t
rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
{
- unsigned long long size;
- unsigned long long oldsize = rdev->size;
mddev_t *my_mddev = rdev->mddev;
+ sector_t oldsectors = rdev->sectors;
+ sector_t sectors;
- if (strict_strtoull(buf, 10, &size) < 0)
+ if (strict_blocks_to_sectors(buf, &sectors) < 0)
return -EINVAL;
if (my_mddev->pers && rdev->raid_disk >= 0) {
if (my_mddev->persistent) {
- size = super_types[my_mddev->major_version].
- rdev_size_change(rdev, size * 2);
- if (!size)
+ sectors = super_types[my_mddev->major_version].
+ rdev_size_change(rdev, sectors);
+ if (!sectors)
return -EBUSY;
- } else if (!size) {
- size = (rdev->bdev->bd_inode->i_size >> 10);
- size -= rdev->data_offset/2;
- }
+ } else if (!sectors)
+ sectors = (rdev->bdev->bd_inode->i_size >> 9) -
+ rdev->data_offset;
}
- if (size < my_mddev->size)
+ if (sectors < my_mddev->dev_sectors)
return -EINVAL; /* component must fit device */
- rdev->size = size;
- if (size > oldsize && my_mddev->external) {
+ rdev->sectors = sectors;
+ if (sectors > oldsectors && my_mddev->external) {
/* need to check that all other rdevs with the same ->bdev
* do not overlap. We need to unlock the mddev to avoid
- * a deadlock. We have already changed rdev->size, and if
+ * a deadlock. We have already changed rdev->sectors, and if
* we have to change it back, we will have the lock again.
*/
mddev_t *mddev;
@@ -2223,9 +2343,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
if (test_bit(AllReserved, &rdev2->flags) ||
(rdev->bdev == rdev2->bdev &&
rdev != rdev2 &&
- overlaps(rdev->data_offset, rdev->size * 2,
+ overlaps(rdev->data_offset, rdev->sectors,
rdev2->data_offset,
- rdev2->size * 2))) {
+ rdev2->sectors))) {
overlap = 1;
break;
}
@@ -2239,11 +2359,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
if (overlap) {
/* Someone else could have slipped in a size
* change here, but doing so is just silly.
- * We put oldsize back because we *know* it is
+ * We put oldsectors back because we *know* it is
* safe, and trust userspace not to race with
* itself
*/
- rdev->size = oldsize;
+ rdev->sectors = oldsectors;
return -EBUSY;
}
}
@@ -2547,18 +2667,101 @@ level_show(mddev_t *mddev, char *page)
static ssize_t
level_store(mddev_t *mddev, const char *buf, size_t len)
{
+ char level[16];
ssize_t rv = len;
- if (mddev->pers)
+ struct mdk_personality *pers;
+ void *priv;
+
+ if (mddev->pers == NULL) {
+ if (len == 0)
+ return 0;
+ if (len >= sizeof(mddev->clevel))
+ return -ENOSPC;
+ strncpy(mddev->clevel, buf, len);
+ if (mddev->clevel[len-1] == '\n')
+ len--;
+ mddev->clevel[len] = 0;
+ mddev->level = LEVEL_NONE;
+ return rv;
+ }
+
+ /* request to change the personality. Need to ensure:
+ * - array is not engaged in resync/recovery/reshape
+ * - old personality can be suspended
+ * - new personality will access other array.
+ */
+
+ if (mddev->sync_thread || mddev->reshape_position != MaxSector)
return -EBUSY;
- if (len == 0)
- return 0;
- if (len >= sizeof(mddev->clevel))
- return -ENOSPC;
- strncpy(mddev->clevel, buf, len);
- if (mddev->clevel[len-1] == '\n')
+
+ if (!mddev->pers->quiesce) {
+ printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
+ mdname(mddev), mddev->pers->name);
+ return -EINVAL;
+ }
+
+ /* Now find the new personality */
+ if (len == 0 || len >= sizeof(level))
+ return -EINVAL;
+ strncpy(level, buf, len);
+ if (level[len-1] == '\n')
len--;
- mddev->clevel[len] = 0;
- mddev->level = LEVEL_NONE;
+ level[len] = 0;
+
+ request_module("md-%s", level);
+ spin_lock(&pers_lock);
+ pers = find_pers(LEVEL_NONE, level);
+ if (!pers || !try_module_get(pers->owner)) {
+ spin_unlock(&pers_lock);
+ printk(KERN_WARNING "md: personality %s not loaded\n", level);
+ return -EINVAL;
+ }
+ spin_unlock(&pers_lock);
+
+ if (pers == mddev->pers) {
+ /* Nothing to do! */
+ module_put(pers->owner);
+ return rv;
+ }
+ if (!pers->takeover) {
+ module_put(pers->owner);
+ printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
+ mdname(mddev), level);
+ return -EINVAL;
+ }
+
+ /* ->takeover must set new_* and/or delta_disks
+ * if it succeeds, and may set them when it fails.
+ */
+ priv = pers->takeover(mddev);
+ if (IS_ERR(priv)) {
+ mddev->new_level = mddev->level;
+ mddev->new_layout = mddev->layout;
+ mddev->new_chunk = mddev->chunk_size;
+ mddev->raid_disks -= mddev->delta_disks;
+ mddev->delta_disks = 0;
+ module_put(pers->owner);
+ printk(KERN_WARNING "md: %s: %s would not accept array\n",
+ mdname(mddev), level);
+ return PTR_ERR(priv);
+ }
+
+ /* Looks like we have a winner */
+ mddev_suspend(mddev);
+ mddev->pers->stop(mddev);
+ module_put(mddev->pers->owner);
+ mddev->pers = pers;
+ mddev->private = priv;
+ strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+ mddev->level = mddev->new_level;
+ mddev->layout = mddev->new_layout;
+ mddev->chunk_size = mddev->new_chunk;
+ mddev->delta_disks = 0;
+ pers->run(mddev);
+ mddev_resume(mddev);
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
return rv;
}
@@ -2586,12 +2789,18 @@ layout_store(mddev_t *mddev, const char *buf, size_t len)
if (!*buf || (*e && *e != '\n'))
return -EINVAL;
- if (mddev->pers)
- return -EBUSY;
- if (mddev->reshape_position != MaxSector)
+ if (mddev->pers) {
+ int err;
+ if (mddev->pers->reconfig == NULL)
+ return -EBUSY;
+ err = mddev->pers->reconfig(mddev, n, -1);
+ if (err)
+ return err;
+ } else {
mddev->new_layout = n;
- else
- mddev->layout = n;
+ if (mddev->reshape_position == MaxSector)
+ mddev->layout = n;
+ }
return len;
}
static struct md_sysfs_entry md_layout =
@@ -2648,19 +2857,24 @@ chunk_size_show(mddev_t *mddev, char *page)
static ssize_t
chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
{
- /* can only set chunk_size if array is not yet active */
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
if (!*buf || (*e && *e != '\n'))
return -EINVAL;
- if (mddev->pers)
- return -EBUSY;
- else if (mddev->reshape_position != MaxSector)
+ if (mddev->pers) {
+ int err;
+ if (mddev->pers->reconfig == NULL)
+ return -EBUSY;
+ err = mddev->pers->reconfig(mddev, -1, n);
+ if (err)
+ return err;
+ } else {
mddev->new_chunk = n;
- else
- mddev->chunk_size = n;
+ if (mddev->reshape_position == MaxSector)
+ mddev->chunk_size = n;
+ }
return len;
}
static struct md_sysfs_entry md_chunk_size =
@@ -2669,6 +2883,8 @@ __ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
static ssize_t
resync_start_show(mddev_t *mddev, char *page)
{
+ if (mddev->recovery_cp == MaxSector)
+ return sprintf(page, "none\n");
return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
}
@@ -2766,7 +2982,7 @@ array_state_show(mddev_t *mddev, char *page)
else {
if (list_empty(&mddev->disks) &&
mddev->raid_disks == 0 &&
- mddev->size == 0)
+ mddev->dev_sectors == 0)
st = clear;
else
st = inactive;
@@ -2973,7 +3189,8 @@ __ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
static ssize_t
size_show(mddev_t *mddev, char *page)
{
- return sprintf(page, "%llu\n", (unsigned long long)mddev->size);
+ return sprintf(page, "%llu\n",
+ (unsigned long long)mddev->dev_sectors / 2);
}
static int update_size(mddev_t *mddev, sector_t num_sectors);
@@ -2985,20 +3202,18 @@ size_store(mddev_t *mddev, const char *buf, size_t len)
* not increase it (except from 0).
* If array is active, we can try an on-line resize
*/
- char *e;
- int err = 0;
- unsigned long long size = simple_strtoull(buf, &e, 10);
- if (!*buf || *buf == '\n' ||
- (*e && *e != '\n'))
- return -EINVAL;
+ sector_t sectors;
+ int err = strict_blocks_to_sectors(buf, &sectors);
+ if (err < 0)
+ return err;
if (mddev->pers) {
- err = update_size(mddev, size * 2);
+ err = update_size(mddev, sectors);
md_update_sb(mddev, 1);
} else {
- if (mddev->size == 0 ||
- mddev->size > size)
- mddev->size = size;
+ if (mddev->dev_sectors == 0 ||
+ mddev->dev_sectors > sectors)
+ mddev->dev_sectors = sectors;
else
err = -ENOSPC;
}
@@ -3251,6 +3466,8 @@ static ssize_t
sync_speed_show(mddev_t *mddev, char *page)
{
unsigned long resync, dt, db;
+ if (mddev->curr_resync == 0)
+ return sprintf(page, "none\n");
resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active);
dt = (jiffies - mddev->resync_mark) / HZ;
if (!dt) dt++;
@@ -3263,15 +3480,15 @@ static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
static ssize_t
sync_completed_show(mddev_t *mddev, char *page)
{
- unsigned long max_blocks, resync;
+ unsigned long max_sectors, resync;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
- max_blocks = mddev->resync_max_sectors;
+ max_sectors = mddev->resync_max_sectors;
else
- max_blocks = mddev->size << 1;
+ max_sectors = mddev->dev_sectors;
resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
- return sprintf(page, "%lu / %lu\n", resync, max_blocks);
+ return sprintf(page, "%lu / %lu\n", resync, max_sectors);
}
static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
@@ -3431,6 +3648,57 @@ static struct md_sysfs_entry md_reshape_position =
__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
reshape_position_store);
+static ssize_t
+array_size_show(mddev_t *mddev, char *page)
+{
+ if (mddev->external_size)
+ return sprintf(page, "%llu\n",
+ (unsigned long long)mddev->array_sectors/2);
+ else
+ return sprintf(page, "default\n");
+}
+
+static ssize_t
+array_size_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ sector_t sectors;
+
+ if (strncmp(buf, "default", 7) == 0) {
+ if (mddev->pers)
+ sectors = mddev->pers->size(mddev, 0, 0);
+ else
+ sectors = mddev->array_sectors;
+
+ mddev->external_size = 0;
+ } else {
+ if (strict_blocks_to_sectors(buf, &sectors) < 0)
+ return -EINVAL;
+ if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
+ return -EINVAL;
+
+ mddev->external_size = 1;
+ }
+
+ mddev->array_sectors = sectors;
+ set_capacity(mddev->gendisk, mddev->array_sectors);
+ if (mddev->pers) {
+ struct block_device *bdev = bdget_disk(mddev->gendisk, 0);
+
+ if (bdev) {
+ mutex_lock(&bdev->bd_inode->i_mutex);
+ i_size_write(bdev->bd_inode,
+ (loff_t)mddev->array_sectors << 9);
+ mutex_unlock(&bdev->bd_inode->i_mutex);
+ bdput(bdev);
+ }
+ }
+
+ return len;
+}
+
+static struct md_sysfs_entry md_array_size =
+__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
+ array_size_store);
static struct attribute *md_default_attrs[] = {
&md_level.attr,
@@ -3444,6 +3712,7 @@ static struct attribute *md_default_attrs[] = {
&md_safe_delay.attr,
&md_array_state.attr,
&md_reshape_position.attr,
+ &md_array_size.attr,
NULL,
};
@@ -3602,10 +3871,12 @@ static int md_alloc(dev_t dev, char *name)
mddev_put(mddev);
return -ENOMEM;
}
+ mddev->queue->queuedata = mddev;
+
/* Can be unlocked because the queue is new: no concurrency */
queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
- blk_queue_make_request(mddev->queue, md_fail_request);
+ blk_queue_make_request(mddev->queue, md_make_request);
disk = alloc_disk(1 << shift);
if (!disk) {
@@ -3731,13 +4002,13 @@ static int do_md_run(mddev_t * mddev)
list_for_each_entry(rdev, &mddev->disks, same_set) {
if (test_bit(Faulty, &rdev->flags))
continue;
- if (rdev->size < chunk_size / 1024) {
+ if (rdev->sectors < chunk_size / 512) {
printk(KERN_WARNING
"md: Dev %s smaller than chunk_size:"
- " %lluk < %dk\n",
+ " %llu < %d\n",
bdevname(rdev->bdev,b),
- (unsigned long long)rdev->size,
- chunk_size / 1024);
+ (unsigned long long)rdev->sectors,
+ chunk_size / 512);
return -EINVAL;
}
}
@@ -3761,11 +4032,11 @@ static int do_md_run(mddev_t * mddev)
/* perform some consistency tests on the device.
* We don't want the data to overlap the metadata,
- * Internal Bitmap issues has handled elsewhere.
+ * Internal Bitmap issues have been handled elsewhere.
*/
if (rdev->data_offset < rdev->sb_start) {
- if (mddev->size &&
- rdev->data_offset + mddev->size*2
+ if (mddev->dev_sectors &&
+ rdev->data_offset + mddev->dev_sectors
> rdev->sb_start) {
printk("md: %s: data overlaps metadata\n",
mdname(mddev));
@@ -3801,9 +4072,16 @@ static int do_md_run(mddev_t * mddev)
}
mddev->pers = pers;
spin_unlock(&pers_lock);
- mddev->level = pers->level;
+ if (mddev->level != pers->level) {
+ mddev->level = pers->level;
+ mddev->new_level = pers->level;
+ }
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+ if (pers->level >= 4 && pers->level <= 6)
+ /* Cannot support integrity (yet) */
+ blk_integrity_unregister(mddev->gendisk);
+
if (mddev->reshape_position != MaxSector &&
pers->start_reshape == NULL) {
/* This personality cannot handle reshaping... */
@@ -3843,7 +4121,9 @@ static int do_md_run(mddev_t * mddev)
}
mddev->recovery = 0;
- mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
+ /* may be over-ridden by personality */
+ mddev->resync_max_sectors = mddev->dev_sectors;
+
mddev->barriers_work = 1;
mddev->ok_start_degraded = start_dirty_degraded;
@@ -3853,7 +4133,17 @@ static int do_md_run(mddev_t * mddev)
err = mddev->pers->run(mddev);
if (err)
printk(KERN_ERR "md: pers->run() failed ...\n");
- else if (mddev->pers->sync_request) {
+ else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
+ WARN_ONCE(!mddev->external_size, "%s: default size too small,"
+ " but 'external_size' not in effect?\n", __func__);
+ printk(KERN_ERR
+ "md: invalid array_size %llu > default size %llu\n",
+ (unsigned long long)mddev->array_sectors / 2,
+ (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
+ err = -EINVAL;
+ mddev->pers->stop(mddev);
+ }
+ if (err == 0 && mddev->pers->sync_request) {
err = bitmap_create(mddev);
if (err) {
printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
@@ -3899,16 +4189,6 @@ static int do_md_run(mddev_t * mddev)
set_capacity(disk, mddev->array_sectors);
- /* If we call blk_queue_make_request here, it will
- * re-initialise max_sectors etc which may have been
- * refined inside -> run. So just set the bits we need to set.
- * Most initialisation happended when we called
- * blk_queue_make_request(..., md_fail_request)
- * earlier.
- */
- mddev->queue->queuedata = mddev;
- mddev->queue->make_request_fn = mddev->pers->make_request;
-
/* If there is a partially-recovered drive we need to
* start recovery here. If we leave it to md_check_recovery,
* it will remove the drives and not do the right thing
@@ -4038,7 +4318,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
md_super_wait(mddev);
if (mddev->ro)
set_disk_ro(disk, 0);
- blk_queue_make_request(mddev->queue, md_fail_request);
+
mddev->pers->stop(mddev);
mddev->queue->merge_bvec_fn = NULL;
mddev->queue->unplug_fn = NULL;
@@ -4095,7 +4375,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
export_array(mddev);
mddev->array_sectors = 0;
- mddev->size = 0;
+ mddev->external_size = 0;
+ mddev->dev_sectors = 0;
mddev->raid_disks = 0;
mddev->recovery_cp = 0;
mddev->resync_min = 0;
@@ -4135,6 +4416,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
mdname(mddev));
err = 0;
+ blk_integrity_unregister(disk);
md_new_event(mddev);
sysfs_notify_dirent(mddev->sysfs_state);
out:
@@ -4300,8 +4582,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
info.patch_version = MD_PATCHLEVEL_VERSION;
info.ctime = mddev->ctime;
info.level = mddev->level;
- info.size = mddev->size;
- if (info.size != mddev->size) /* overflow */
+ info.size = mddev->dev_sectors / 2;
+ if (info.size != mddev->dev_sectors / 2) /* overflow */
info.size = -1;
info.nr_disks = nr;
info.raid_disks = mddev->raid_disks;
@@ -4480,6 +4762,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
clear_bit(In_sync, &rdev->flags); /* just to be sure */
if (info->state & (1<<MD_DISK_WRITEMOSTLY))
set_bit(WriteMostly, &rdev->flags);
+ else
+ clear_bit(WriteMostly, &rdev->flags);
rdev->raid_disk = -1;
err = bind_rdev_to_array(rdev, mddev);
@@ -4543,7 +4827,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
} else
rdev->sb_start = calc_dev_sboffset(rdev->bdev);
- rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2;
+ rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size);
err = bind_rdev_to_array(rdev, mddev);
if (err) {
@@ -4613,7 +4897,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
else
rdev->sb_start = rdev->bdev->bd_inode->i_size / 512;
- rdev->size = calc_num_sectors(rdev, mddev->chunk_size) / 2;
+ rdev->sectors = calc_num_sectors(rdev, mddev->chunk_size);
if (test_bit(Faulty, &rdev->flags)) {
printk(KERN_WARNING
@@ -4749,7 +5033,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
mddev->level = info->level;
mddev->clevel[0] = 0;
- mddev->size = info->size;
+ mddev->dev_sectors = 2 * (sector_t)info->size;
mddev->raid_disks = info->raid_disks;
/* don't set md_minor, it is determined by which /dev/md* was
* openned
@@ -4788,6 +5072,17 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
return 0;
}
+void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors)
+{
+ WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
+
+ if (mddev->external_size)
+ return;
+
+ mddev->array_sectors = array_sectors;
+}
+EXPORT_SYMBOL(md_set_array_sectors);
+
static int update_size(mddev_t *mddev, sector_t num_sectors)
{
mdk_rdev_t *rdev;
@@ -4814,8 +5109,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
*/
return -EBUSY;
list_for_each_entry(rdev, &mddev->disks, same_set) {
- sector_t avail;
- avail = rdev->size * 2;
+ sector_t avail = rdev->sectors;
if (fit && (num_sectors == 0 || num_sectors > avail))
num_sectors = avail;
@@ -4887,12 +5181,18 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
)
return -EINVAL;
/* Check there is only one change */
- if (info->size >= 0 && mddev->size != info->size) cnt++;
- if (mddev->raid_disks != info->raid_disks) cnt++;
- if (mddev->layout != info->layout) cnt++;
- if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) cnt++;
- if (cnt == 0) return 0;
- if (cnt > 1) return -EINVAL;
+ if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
+ cnt++;
+ if (mddev->raid_disks != info->raid_disks)
+ cnt++;
+ if (mddev->layout != info->layout)
+ cnt++;
+ if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT))
+ cnt++;
+ if (cnt == 0)
+ return 0;
+ if (cnt > 1)
+ return -EINVAL;
if (mddev->layout != info->layout) {
/* Change layout
@@ -4904,7 +5204,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
else
return mddev->pers->reconfig(mddev, info->layout, -1);
}
- if (info->size >= 0 && mddev->size != info->size)
+ if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
rv = update_size(mddev, (sector_t)info->size * 2);
if (mddev->raid_disks != info->raid_disks)
@@ -5331,6 +5631,8 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
void md_unregister_thread(mdk_thread_t *thread)
{
+ if (!thread)
+ return;
dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
kthread_stop(thread->tsk);
@@ -5404,7 +5706,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
max_blocks = mddev->resync_max_sectors >> 1;
else
- max_blocks = mddev->size;
+ max_blocks = mddev->dev_sectors / 2;
/*
* Should not happen.
@@ -5537,7 +5839,7 @@ struct mdstat_info {
static int md_seq_show(struct seq_file *seq, void *v)
{
mddev_t *mddev = v;
- sector_t size;
+ sector_t sectors;
mdk_rdev_t *rdev;
struct mdstat_info *mi = seq->private;
struct bitmap *bitmap;
@@ -5573,7 +5875,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, " %s", mddev->pers->name);
}
- size = 0;
+ sectors = 0;
list_for_each_entry(rdev, &mddev->disks, same_set) {
char b[BDEVNAME_SIZE];
seq_printf(seq, " %s[%d]",
@@ -5585,7 +5887,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
continue;
} else if (rdev->raid_disk < 0)
seq_printf(seq, "(S)"); /* spare */
- size += rdev->size;
+ sectors += rdev->sectors;
}
if (!list_empty(&mddev->disks)) {
@@ -5595,7 +5897,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
mddev->array_sectors / 2);
else
seq_printf(seq, "\n %llu blocks",
- (unsigned long long)size);
+ (unsigned long long)sectors / 2);
}
if (mddev->persistent) {
if (mddev->major_version != 0 ||
@@ -5722,19 +6024,19 @@ int unregister_md_personality(struct mdk_personality *p)
return 0;
}
-static int is_mddev_idle(mddev_t *mddev)
+static int is_mddev_idle(mddev_t *mddev, int init)
{
mdk_rdev_t * rdev;
int idle;
- long curr_events;
+ int curr_events;
idle = 1;
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev) {
struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
- curr_events = part_stat_read(&disk->part0, sectors[0]) +
- part_stat_read(&disk->part0, sectors[1]) -
- atomic_read(&disk->sync_io);
+ curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
+ (int)part_stat_read(&disk->part0, sectors[1]) -
+ atomic_read(&disk->sync_io);
/* sync IO will cause sync_io to increase before the disk_stats
* as sync_io is counted when a request starts, and
* disk_stats is counted when it completes.
@@ -5757,7 +6059,7 @@ static int is_mddev_idle(mddev_t *mddev)
* always make curr_events less than last_events.
*
*/
- if (curr_events - rdev->last_events > 4096) {
+ if (init || curr_events - rdev->last_events > 64) {
rdev->last_events = curr_events;
idle = 0;
}
@@ -5980,10 +6282,10 @@ void md_do_sync(mddev_t *mddev)
j = mddev->recovery_cp;
} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
- max_sectors = mddev->size << 1;
+ max_sectors = mddev->dev_sectors;
else {
/* recovery follows the physical size of devices */
- max_sectors = mddev->size << 1;
+ max_sectors = mddev->dev_sectors;
j = MaxSector;
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 &&
@@ -6000,7 +6302,7 @@ void md_do_sync(mddev_t *mddev)
"(but not more than %d KB/sec) for %s.\n",
speed_max(mddev), desc);
- is_mddev_idle(mddev); /* this also initializes IO event counters */
+ is_mddev_idle(mddev, 1); /* this initializes IO event counters */
io_sectors = 0;
for (m = 0; m < SYNC_MARKS; m++) {
@@ -6040,6 +6342,18 @@ void md_do_sync(mddev_t *mddev)
}
if (kthread_should_stop())
goto interrupted;
+
+ if (mddev->curr_resync > mddev->curr_resync_completed &&
+ (mddev->curr_resync - mddev->curr_resync_completed)
+ > (max_sectors >> 4)) {
+ /* time to update curr_resync_completed */
+ blk_unplug(mddev->queue);
+ wait_event(mddev->recovery_wait,
+ atomic_read(&mddev->recovery_active) == 0);
+ mddev->curr_resync_completed =
+ mddev->curr_resync;
+ set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ }
sectors = mddev->pers->sync_request(mddev, j, &skipped,
currspeed < speed_min(mddev));
if (sectors == 0) {
@@ -6102,7 +6416,7 @@ void md_do_sync(mddev_t *mddev)
if (currspeed > speed_min(mddev)) {
if ((currspeed > speed_max(mddev)) ||
- !is_mddev_idle(mddev)) {
+ !is_mddev_idle(mddev, 0)) {
msleep(500);
goto repeat;
}
@@ -6173,6 +6487,8 @@ static int remove_and_add_spares(mddev_t *mddev)
mdk_rdev_t *rdev;
int spares = 0;
+ mddev->curr_resync_completed = 0;
+
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 &&
!test_bit(Blocked, &rdev->flags) &&
@@ -6327,6 +6643,9 @@ void md_check_recovery(mddev_t *mddev)
sysfs_notify(&mddev->kobj, NULL,
"degraded");
}
+ if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ mddev->pers->finish_reshape)
+ mddev->pers->finish_reshape(mddev);
md_update_sb(mddev, 1);
/* if array is no-longer degraded, then any saved_raid_disk
@@ -6470,13 +6789,13 @@ static void md_geninit(void)
static int __init md_init(void)
{
- if (register_blkdev(MAJOR_NR, "md"))
+ if (register_blkdev(MD_MAJOR, "md"))
return -1;
if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
- unregister_blkdev(MAJOR_NR, "md");
+ unregister_blkdev(MD_MAJOR, "md");
return -1;
}
- blk_register_region(MKDEV(MAJOR_NR, 0), 1UL<<MINORBITS, THIS_MODULE,
+ blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
md_probe, NULL, NULL);
blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
md_probe, NULL, NULL);
@@ -6562,10 +6881,10 @@ static __exit void md_exit(void)
mddev_t *mddev;
struct list_head *tmp;
- blk_unregister_region(MKDEV(MAJOR_NR,0), 1U << MINORBITS);
+ blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
- unregister_blkdev(MAJOR_NR,"md");
+ unregister_blkdev(MD_MAJOR,"md");
unregister_blkdev(mdp_major, "mdp");
unregister_reboot_notifier(&md_notifier);
unregister_sysctl_table(raid_table_header);
diff --git a/drivers/md/md.h b/drivers/md/md.h
new file mode 100644
index 00000000000..e9b7f54c24d
--- /dev/null
+++ b/drivers/md/md.h
@@ -0,0 +1,436 @@
+/*
+ md_k.h : kernel internal structure of the Linux MD driver
+ Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ You should have received a copy of the GNU General Public License
+ (for example /usr/src/linux/COPYING); if not, write to the Free
+ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef _MD_K_H
+#define _MD_K_H
+
+#ifdef CONFIG_BLOCK
+
+#define MaxSector (~(sector_t)0)
+
+typedef struct mddev_s mddev_t;
+typedef struct mdk_rdev_s mdk_rdev_t;
+
+/*
+ * options passed in raidrun:
+ */
+
+/* Currently this must fit in an 'int' */
+#define MAX_CHUNK_SIZE (1<<30)
+
+/*
+ * MD's 'extended' device
+ */
+struct mdk_rdev_s
+{
+ struct list_head same_set; /* RAID devices within the same set */
+
+ sector_t sectors; /* Device size (in 512bytes sectors) */
+ mddev_t *mddev; /* RAID array if running */
+ int last_events; /* IO event timestamp */
+
+ struct block_device *bdev; /* block device handle */
+
+ struct page *sb_page;
+ int sb_loaded;
+ __u64 sb_events;
+ sector_t data_offset; /* start of data in array */
+ sector_t sb_start; /* offset of the super block (in 512byte sectors) */
+ int sb_size; /* bytes in the superblock */
+ int preferred_minor; /* autorun support */
+
+ struct kobject kobj;
+
+ /* A device can be in one of three states based on two flags:
+ * Not working: faulty==1 in_sync==0
+ * Fully working: faulty==0 in_sync==1
+ * Working, but not
+ * in sync with array
+ * faulty==0 in_sync==0
+ *
+ * It can never have faulty==1, in_sync==1
+ * This reduces the burden of testing multiple flags in many cases
+ */
+
+ unsigned long flags;
+#define Faulty 1 /* device is known to have a fault */
+#define In_sync 2 /* device is in_sync with rest of array */
+#define WriteMostly 4 /* Avoid reading if at all possible */
+#define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */
+#define AllReserved 6 /* If whole device is reserved for
+ * one array */
+#define AutoDetected 7 /* added by auto-detect */
+#define Blocked 8 /* An error occured on an externally
+ * managed array, don't allow writes
+ * until it is cleared */
+#define StateChanged 9 /* Faulty or Blocked has changed during
+ * interrupt, so it needs to be
+ * notified by the thread */
+ wait_queue_head_t blocked_wait;
+
+ int desc_nr; /* descriptor index in the superblock */
+ int raid_disk; /* role of device in array */
+ int saved_raid_disk; /* role that device used to have in the
+ * array and could again if we did a partial
+ * resync from the bitmap
+ */
+ sector_t recovery_offset;/* If this device has been partially
+ * recovered, this is where we were
+ * up to.
+ */
+
+ atomic_t nr_pending; /* number of pending requests.
+ * only maintained for arrays that
+ * support hot removal
+ */
+ atomic_t read_errors; /* number of consecutive read errors that
+ * we have tried to ignore.
+ */
+ atomic_t corrected_errors; /* number of corrected read errors,
+ * for reporting to userspace and storing
+ * in superblock.
+ */
+ struct work_struct del_work; /* used for delayed sysfs removal */
+
+ struct sysfs_dirent *sysfs_state; /* handle for 'state'
+ * sysfs entry */
+};
+
+struct mddev_s
+{
+ void *private;
+ struct mdk_personality *pers;
+ dev_t unit;
+ int md_minor;
+ struct list_head disks;
+ unsigned long flags;
+#define MD_CHANGE_DEVS 0 /* Some device status has changed */
+#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
+#define MD_CHANGE_PENDING 2 /* superblock update in progress */
+
+ int suspended;
+ atomic_t active_io;
+ int ro;
+
+ struct gendisk *gendisk;
+
+ struct kobject kobj;
+ int hold_active;
+#define UNTIL_IOCTL 1
+#define UNTIL_STOP 2
+
+ /* Superblock information */
+ int major_version,
+ minor_version,
+ patch_version;
+ int persistent;
+ int external; /* metadata is
+ * managed externally */
+ char metadata_type[17]; /* externally set*/
+ int chunk_size;
+ time_t ctime, utime;
+ int level, layout;
+ char clevel[16];
+ int raid_disks;
+ int max_disks;
+ sector_t dev_sectors; /* used size of
+ * component devices */
+ sector_t array_sectors; /* exported array size */
+ int external_size; /* size managed
+ * externally */
+ __u64 events;
+
+ char uuid[16];
+
+ /* If the array is being reshaped, we need to record the
+ * new shape and an indication of where we are up to.
+ * This is written to the superblock.
+ * If reshape_position is MaxSector, then no reshape is happening (yet).
+ */
+ sector_t reshape_position;
+ int delta_disks, new_level, new_layout, new_chunk;
+
+ struct mdk_thread_s *thread; /* management thread */
+ struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */
+ sector_t curr_resync; /* last block scheduled */
+ /* As resync requests can complete out of order, we cannot easily track
+ * how much resync has been completed. So we occasionally pause until
+ * everything completes, then set curr_resync_completed to curr_resync.
+ * As such it may be well behind the real resync mark, but it is a value
+ * we are certain of.
+ */
+ sector_t curr_resync_completed;
+ unsigned long resync_mark; /* a recent timestamp */
+ sector_t resync_mark_cnt;/* blocks written at resync_mark */
+ sector_t curr_mark_cnt; /* blocks scheduled now */
+
+ sector_t resync_max_sectors; /* may be set by personality */
+
+ sector_t resync_mismatches; /* count of sectors where
+ * parity/replica mismatch found
+ */
+
+ /* allow user-space to request suspension of IO to regions of the array */
+ sector_t suspend_lo;
+ sector_t suspend_hi;
+ /* if zero, use the system-wide default */
+ int sync_speed_min;
+ int sync_speed_max;
+
+ /* resync even though the same disks are shared among md-devices */
+ int parallel_resync;
+
+ int ok_start_degraded;
+ /* recovery/resync flags
+ * NEEDED: we might need to start a resync/recover
+ * RUNNING: a thread is running, or about to be started
+ * SYNC: actually doing a resync, not a recovery
+ * RECOVER: doing recovery, or need to try it.
+ * INTR: resync needs to be aborted for some reason
+ * DONE: thread is done and is waiting to be reaped
+ * REQUEST: user-space has requested a sync (used with SYNC)
+ * CHECK: user-space request for for check-only, no repair
+ * RESHAPE: A reshape is happening
+ *
+ * If neither SYNC or RESHAPE are set, then it is a recovery.
+ */
+#define MD_RECOVERY_RUNNING 0
+#define MD_RECOVERY_SYNC 1
+#define MD_RECOVERY_RECOVER 2
+#define MD_RECOVERY_INTR 3
+#define MD_RECOVERY_DONE 4
+#define MD_RECOVERY_NEEDED 5
+#define MD_RECOVERY_REQUESTED 6
+#define MD_RECOVERY_CHECK 7
+#define MD_RECOVERY_RESHAPE 8
+#define MD_RECOVERY_FROZEN 9
+
+ unsigned long recovery;
+ int recovery_disabled; /* if we detect that recovery
+ * will always fail, set this
+ * so we don't loop trying */
+
+ int in_sync; /* know to not need resync */
+ struct mutex reconfig_mutex;
+ atomic_t active; /* general refcount */
+ atomic_t openers; /* number of active opens */
+
+ int changed; /* true if we might need to reread partition info */
+ int degraded; /* whether md should consider
+ * adding a spare
+ */
+ int barriers_work; /* initialised to true, cleared as soon
+ * as a barrier request to slave
+ * fails. Only supported
+ */
+ struct bio *biolist; /* bios that need to be retried
+ * because BIO_RW_BARRIER is not supported
+ */
+
+ atomic_t recovery_active; /* blocks scheduled, but not written */
+ wait_queue_head_t recovery_wait;
+ sector_t recovery_cp;
+ sector_t resync_min; /* user requested sync
+ * starts here */
+ sector_t resync_max; /* resync should pause
+ * when it gets here */
+
+ struct sysfs_dirent *sysfs_state; /* handle for 'array_state'
+ * file in sysfs.
+ */
+ struct sysfs_dirent *sysfs_action; /* handle for 'sync_action' */
+
+ struct work_struct del_work; /* used for delayed sysfs removal */
+
+ spinlock_t write_lock;
+ wait_queue_head_t sb_wait; /* for waiting on superblock updates */
+ atomic_t pending_writes; /* number of active superblock writes */
+
+ unsigned int safemode; /* if set, update "clean" superblock
+ * when no writes pending.
+ */
+ unsigned int safemode_delay;
+ struct timer_list safemode_timer;
+ atomic_t writes_pending;
+ struct request_queue *queue; /* for plugging ... */
+
+ atomic_t write_behind; /* outstanding async IO */
+ unsigned int max_write_behind; /* 0 = sync */
+
+ struct bitmap *bitmap; /* the bitmap for the device */
+ struct file *bitmap_file; /* the bitmap file */
+ long bitmap_offset; /* offset from superblock of
+ * start of bitmap. May be
+ * negative, but not '0'
+ */
+ long default_bitmap_offset; /* this is the offset to use when
+ * hot-adding a bitmap. It should
+ * eventually be settable by sysfs.
+ */
+
+ struct list_head all_mddevs;
+};
+
+
+static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev)
+{
+ int faulty = test_bit(Faulty, &rdev->flags);
+ if (atomic_dec_and_test(&rdev->nr_pending) && faulty)
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+}
+
+static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
+{
+ atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
+}
+
+struct mdk_personality
+{
+ char *name;
+ int level;
+ struct list_head list;
+ struct module *owner;
+ int (*make_request)(struct request_queue *q, struct bio *bio);
+ int (*run)(mddev_t *mddev);
+ int (*stop)(mddev_t *mddev);
+ void (*status)(struct seq_file *seq, mddev_t *mddev);
+ /* error_handler must set ->faulty and clear ->in_sync
+ * if appropriate, and should abort recovery if needed
+ */
+ void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev);
+ int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
+ int (*hot_remove_disk) (mddev_t *mddev, int number);
+ int (*spare_active) (mddev_t *mddev);
+ sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
+ int (*resize) (mddev_t *mddev, sector_t sectors);
+ sector_t (*size) (mddev_t *mddev, sector_t sectors, int raid_disks);
+ int (*check_reshape) (mddev_t *mddev);
+ int (*start_reshape) (mddev_t *mddev);
+ void (*finish_reshape) (mddev_t *mddev);
+ int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
+ /* quiesce moves between quiescence states
+ * 0 - fully active
+ * 1 - no new requests allowed
+ * others - reserved
+ */
+ void (*quiesce) (mddev_t *mddev, int state);
+ /* takeover is used to transition an array from one
+ * personality to another. The new personality must be able
+ * to handle the data in the current layout.
+ * e.g. 2drive raid1 -> 2drive raid5
+ * ndrive raid5 -> degraded n+1drive raid6 with special layout
+ * If the takeover succeeds, a new 'private' structure is returned.
+ * This needs to be installed and then ->run used to activate the
+ * array.
+ */
+ void *(*takeover) (mddev_t *mddev);
+};
+
+
+struct md_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(mddev_t *, char *);
+ ssize_t (*store)(mddev_t *, const char *, size_t);
+};
+
+
+static inline char * mdname (mddev_t * mddev)
+{
+ return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
+}
+
+/*
+ * iterates through some rdev ringlist. It's safe to remove the
+ * current 'rdev'. Dont touch 'tmp' though.
+ */
+#define rdev_for_each_list(rdev, tmp, head) \
+ list_for_each_entry_safe(rdev, tmp, head, same_set)
+
+/*
+ * iterates through the 'same array disks' ringlist
+ */
+#define rdev_for_each(rdev, tmp, mddev) \
+ list_for_each_entry_safe(rdev, tmp, &((mddev)->disks), same_set)
+
+#define rdev_for_each_rcu(rdev, mddev) \
+ list_for_each_entry_rcu(rdev, &((mddev)->disks), same_set)
+
+typedef struct mdk_thread_s {
+ void (*run) (mddev_t *mddev);
+ mddev_t *mddev;
+ wait_queue_head_t wqueue;
+ unsigned long flags;
+ struct task_struct *tsk;
+ unsigned long timeout;
+} mdk_thread_t;
+
+#define THREAD_WAKEUP 0
+
+#define __wait_event_lock_irq(wq, condition, lock, cmd) \
+do { \
+ wait_queue_t __wait; \
+ init_waitqueue_entry(&__wait, current); \
+ \
+ add_wait_queue(&wq, &__wait); \
+ for (;;) { \
+ set_current_state(TASK_UNINTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ spin_unlock_irq(&lock); \
+ cmd; \
+ schedule(); \
+ spin_lock_irq(&lock); \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&wq, &__wait); \
+} while (0)
+
+#define wait_event_lock_irq(wq, condition, lock, cmd) \
+do { \
+ if (condition) \
+ break; \
+ __wait_event_lock_irq(wq, condition, lock, cmd); \
+} while (0)
+
+static inline void safe_put_page(struct page *p)
+{
+ if (p) put_page(p);
+}
+
+#endif /* CONFIG_BLOCK */
+#endif
+
+
+extern int register_md_personality(struct mdk_personality *p);
+extern int unregister_md_personality(struct mdk_personality *p);
+extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
+ mddev_t *mddev, const char *name);
+extern void md_unregister_thread(mdk_thread_t *thread);
+extern void md_wakeup_thread(mdk_thread_t *thread);
+extern void md_check_recovery(mddev_t *mddev);
+extern void md_write_start(mddev_t *mddev, struct bio *bi);
+extern void md_write_end(mddev_t *mddev);
+extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
+extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
+
+extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
+ sector_t sector, int size, struct page *page);
+extern void md_super_wait(mddev_t *mddev);
+extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
+ struct page *page, int rw);
+extern void md_do_sync(mddev_t *mddev);
+extern void md_new_event(mddev_t *mddev);
+extern int md_allow_write(mddev_t *mddev);
+extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
+extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
diff --git a/drivers/md/mktables.c b/drivers/md/mktables.c
index b61d5767aae..3b1500843bb 100644
--- a/drivers/md/mktables.c
+++ b/drivers/md/mktables.c
@@ -59,7 +59,7 @@ int main(int argc, char *argv[])
uint8_t v;
uint8_t exptbl[256], invtbl[256];
- printf("#include \"raid6.h\"\n");
+ printf("#include <linux/raid/pq.h>\n");
/* Compute multiplication table */
printf("\nconst u8 __attribute__((aligned(256)))\n"
@@ -76,6 +76,9 @@ int main(int argc, char *argv[])
printf("\t},\n");
}
printf("};\n");
+ printf("#ifdef __KERNEL__\n");
+ printf("EXPORT_SYMBOL(raid6_gfmul);\n");
+ printf("#endif\n");
/* Compute power-of-2 table (exponent) */
v = 1;
@@ -92,6 +95,9 @@ int main(int argc, char *argv[])
}
}
printf("};\n");
+ printf("#ifdef __KERNEL__\n");
+ printf("EXPORT_SYMBOL(raid6_gfexp);\n");
+ printf("#endif\n");
/* Compute inverse table x^-1 == x^254 */
printf("\nconst u8 __attribute__((aligned(256)))\n"
@@ -104,6 +110,9 @@ int main(int argc, char *argv[])
}
}
printf("};\n");
+ printf("#ifdef __KERNEL__\n");
+ printf("EXPORT_SYMBOL(raid6_gfinv);\n");
+ printf("#endif\n");
/* Compute inv(2^x + 1) (exponent-xor-inverse) table */
printf("\nconst u8 __attribute__((aligned(256)))\n"
@@ -115,6 +124,9 @@ int main(int argc, char *argv[])
(j == 7) ? '\n' : ' ');
}
printf("};\n");
+ printf("#ifdef __KERNEL__\n");
+ printf("EXPORT_SYMBOL(raid6_gfexi);\n");
+ printf("#endif\n");
return 0;
}
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index f6d08f24167..41ced0cbe82 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -19,7 +19,11 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/raid/multipath.h>
+#include <linux/blkdev.h>
+#include <linux/raid/md_u.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "multipath.h"
#define MAX_WORK_PER_DISK 128
@@ -402,6 +406,14 @@ static void multipathd (mddev_t *mddev)
spin_unlock_irqrestore(&conf->device_lock, flags);
}
+static sector_t multipath_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+ WARN_ONCE(sectors || raid_disks,
+ "%s does not support generic reshape\n", __func__);
+
+ return mddev->dev_sectors;
+}
+
static int multipath_run (mddev_t *mddev)
{
multipath_conf_t *conf;
@@ -498,7 +510,7 @@ static int multipath_run (mddev_t *mddev)
/*
* Ok, everything is just fine now
*/
- mddev->array_sectors = mddev->size * 2;
+ md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
mddev->queue->unplug_fn = multipath_unplug;
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
@@ -543,6 +555,7 @@ static struct mdk_personality multipath_personality =
.error_handler = multipath_error,
.hot_add_disk = multipath_add_disk,
.hot_remove_disk= multipath_remove_disk,
+ .size = multipath_size,
};
static int __init multipath_init (void)
diff --git a/drivers/md/multipath.h b/drivers/md/multipath.h
new file mode 100644
index 00000000000..6fa70b400cd
--- /dev/null
+++ b/drivers/md/multipath.h
@@ -0,0 +1,40 @@
+#ifndef _MULTIPATH_H
+#define _MULTIPATH_H
+
+struct multipath_info {
+ mdk_rdev_t *rdev;
+};
+
+struct multipath_private_data {
+ mddev_t *mddev;
+ struct multipath_info *multipaths;
+ int raid_disks;
+ int working_disks;
+ spinlock_t device_lock;
+ struct list_head retry_list;
+
+ mempool_t *pool;
+};
+
+typedef struct multipath_private_data multipath_conf_t;
+
+/*
+ * this is the only point in the RAID code where we violate
+ * C type safety. mddev->private is an 'opaque' pointer.
+ */
+#define mddev_to_conf(mddev) ((multipath_conf_t *) mddev->private)
+
+/*
+ * this is our 'private' 'collective' MULTIPATH buffer head.
+ * it contains information about what kind of IO operations were started
+ * for this MULTIPATH operation, and about their status:
+ */
+
+struct multipath_bh {
+ mddev_t *mddev;
+ struct bio *master_bio;
+ struct bio bio;
+ int path;
+ struct list_head retry_list;
+};
+#endif
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c605ba80558..c08d7559be5 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -18,7 +18,10 @@
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/raid/raid0.h>
+#include <linux/blkdev.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "raid0.h"
static void raid0_unplug(struct request_queue *q)
{
@@ -73,16 +76,15 @@ static int create_strip_zones (mddev_t *mddev)
list_for_each_entry(rdev2, &mddev->disks, same_set) {
printk(KERN_INFO "raid0: comparing %s(%llu)",
bdevname(rdev1->bdev,b),
- (unsigned long long)rdev1->size);
+ (unsigned long long)rdev1->sectors);
printk(KERN_INFO " with %s(%llu)\n",
bdevname(rdev2->bdev,b),
- (unsigned long long)rdev2->size);
+ (unsigned long long)rdev2->sectors);
if (rdev2 == rdev1) {
printk(KERN_INFO "raid0: END\n");
break;
}
- if (rdev2->size == rdev1->size)
- {
+ if (rdev2->sectors == rdev1->sectors) {
/*
* Not unique, don't count it as a new
* group
@@ -145,7 +147,7 @@ static int create_strip_zones (mddev_t *mddev)
mddev->queue->max_sectors > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
- if (!smallest || (rdev1->size <smallest->size))
+ if (!smallest || (rdev1->sectors < smallest->sectors))
smallest = rdev1;
cnt++;
}
@@ -155,10 +157,10 @@ static int create_strip_zones (mddev_t *mddev)
goto abort;
}
zone->nb_dev = cnt;
- zone->sectors = smallest->size * cnt * 2;
+ zone->sectors = smallest->sectors * cnt;
zone->zone_start = 0;
- current_start = smallest->size * 2;
+ current_start = smallest->sectors;
curr_zone_start = zone->sectors;
/* now do the other zones */
@@ -177,29 +179,29 @@ static int create_strip_zones (mddev_t *mddev)
rdev = conf->strip_zone[0].dev[j];
printk(KERN_INFO "raid0: checking %s ...",
bdevname(rdev->bdev, b));
- if (rdev->size > current_start / 2) {
- printk(KERN_INFO " contained as device %d\n",
- c);
- zone->dev[c] = rdev;
- c++;
- if (!smallest || (rdev->size <smallest->size)) {
- smallest = rdev;
- printk(KERN_INFO " (%llu) is smallest!.\n",
- (unsigned long long)rdev->size);
- }
- } else
+ if (rdev->sectors <= current_start) {
printk(KERN_INFO " nope.\n");
+ continue;
+ }
+ printk(KERN_INFO " contained as device %d\n", c);
+ zone->dev[c] = rdev;
+ c++;
+ if (!smallest || rdev->sectors < smallest->sectors) {
+ smallest = rdev;
+ printk(KERN_INFO " (%llu) is smallest!.\n",
+ (unsigned long long)rdev->sectors);
+ }
}
zone->nb_dev = c;
- zone->sectors = (smallest->size * 2 - current_start) * c;
+ zone->sectors = (smallest->sectors - current_start) * c;
printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
zone->nb_dev, (unsigned long long)zone->sectors);
zone->zone_start = curr_zone_start;
curr_zone_start += zone->sectors;
- current_start = smallest->size * 2;
+ current_start = smallest->sectors;
printk(KERN_INFO "raid0: current zone start: %llu\n",
(unsigned long long)current_start);
}
@@ -261,12 +263,25 @@ static int raid0_mergeable_bvec(struct request_queue *q,
return max;
}
+static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+ sector_t array_sectors = 0;
+ mdk_rdev_t *rdev;
+
+ WARN_ONCE(sectors || raid_disks,
+ "%s does not support generic reshape\n", __func__);
+
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ array_sectors += rdev->sectors;
+
+ return array_sectors;
+}
+
static int raid0_run (mddev_t *mddev)
{
unsigned cur=0, i=0, nb_zone;
s64 sectors;
raid0_conf_t *conf;
- mdk_rdev_t *rdev;
if (mddev->chunk_size == 0) {
printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
@@ -291,16 +306,14 @@ static int raid0_run (mddev_t *mddev)
goto out_free_conf;
/* calculate array device size */
- mddev->array_sectors = 0;
- list_for_each_entry(rdev, &mddev->disks, same_set)
- mddev->array_sectors += rdev->size * 2;
+ md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
(unsigned long long)mddev->array_sectors);
printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
(unsigned long long)conf->spacing);
{
- sector_t s = mddev->array_sectors;
+ sector_t s = raid0_size(mddev, 0, 0);
sector_t space = conf->spacing;
int round;
conf->sector_shift = 0;
@@ -509,6 +522,7 @@ static struct mdk_personality raid0_personality=
.run = raid0_run,
.stop = raid0_stop,
.status = raid0_status,
+ .size = raid0_size,
};
static int __init raid0_init (void)
diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h
new file mode 100644
index 00000000000..824b12eb1d4
--- /dev/null
+++ b/drivers/md/raid0.h
@@ -0,0 +1,28 @@
+#ifndef _RAID0_H
+#define _RAID0_H
+
+struct strip_zone
+{
+ sector_t zone_start; /* Zone offset in md_dev (in sectors) */
+ sector_t dev_start; /* Zone offset in real dev (in sectors) */
+ sector_t sectors; /* Zone size in sectors */
+ int nb_dev; /* # of devices attached to the zone */
+ mdk_rdev_t **dev; /* Devices attached to the zone */
+};
+
+struct raid0_private_data
+{
+ struct strip_zone **hash_table; /* Table of indexes into strip_zone */
+ struct strip_zone *strip_zone;
+ mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
+ int nr_strip_zones;
+
+ sector_t spacing;
+ int sector_shift; /* shift this before divide by spacing */
+};
+
+typedef struct raid0_private_data raid0_conf_t;
+
+#define mddev_to_conf(mddev) ((raid0_conf_t *) mddev->private)
+
+#endif
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e2466425d9c..b4f4badc006 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -31,10 +31,13 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include "dm-bio-list.h"
#include <linux/delay.h>
-#include <linux/raid/raid1.h>
-#include <linux/raid/bitmap.h>
+#include <linux/blkdev.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "dm-bio-list.h"
+#include "raid1.h"
+#include "bitmap.h"
#define DEBUG 0
#if DEBUG
@@ -1723,7 +1726,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return 0;
}
- max_sector = mddev->size << 1;
+ max_sector = mddev->dev_sectors;
if (sector_nr >= max_sector) {
/* If we aborted, we need to abort the
* sync on the 'current' bitmap chunk (there will
@@ -1919,6 +1922,14 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return nr_sectors;
}
+static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+ if (sectors)
+ return sectors;
+
+ return mddev->dev_sectors;
+}
+
static int run(mddev_t *mddev)
{
conf_t *conf;
@@ -2048,7 +2059,7 @@ static int run(mddev_t *mddev)
/*
* Ok, everything is just fine now
*/
- mddev->array_sectors = mddev->size * 2;
+ md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
mddev->queue->unplug_fn = raid1_unplug;
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
@@ -2089,6 +2100,9 @@ static int stop(mddev_t *mddev)
/* need to kick something here to make sure I/O goes? */
}
+ raise_barrier(conf);
+ lower_barrier(conf);
+
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
@@ -2110,15 +2124,17 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
* any io in the removed space completes, but it hardly seems
* worth it.
*/
- mddev->array_sectors = sectors;
+ md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0));
+ if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
+ return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1;
- if (mddev->array_sectors / 2 > mddev->size &&
+ if (sectors > mddev->dev_sectors &&
mddev->recovery_cp == MaxSector) {
- mddev->recovery_cp = mddev->size << 1;
+ mddev->recovery_cp = mddev->dev_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
- mddev->size = mddev->array_sectors / 2;
+ mddev->dev_sectors = sectors;
mddev->resync_max_sectors = sectors;
return 0;
}
@@ -2264,6 +2280,7 @@ static struct mdk_personality raid1_personality =
.spare_active = raid1_spare_active,
.sync_request = sync_request,
.resize = raid1_resize,
+ .size = raid1_size,
.check_reshape = raid1_reshape,
.quiesce = raid1_quiesce,
};
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
new file mode 100644
index 00000000000..1620eea3d57
--- /dev/null
+++ b/drivers/md/raid1.h
@@ -0,0 +1,132 @@
+#ifndef _RAID1_H
+#define _RAID1_H
+
+typedef struct mirror_info mirror_info_t;
+
+struct mirror_info {
+ mdk_rdev_t *rdev;
+ sector_t head_position;
+};
+
+/*
+ * memory pools need a pointer to the mddev, so they can force an unplug
+ * when memory is tight, and a count of the number of drives that the
+ * pool was allocated for, so they know how much to allocate and free.
+ * mddev->raid_disks cannot be used, as it can change while a pool is active
+ * These two datums are stored in a kmalloced struct.
+ */
+
+struct pool_info {
+ mddev_t *mddev;
+ int raid_disks;
+};
+
+
+typedef struct r1bio_s r1bio_t;
+
+struct r1_private_data_s {
+ mddev_t *mddev;
+ mirror_info_t *mirrors;
+ int raid_disks;
+ int last_used;
+ sector_t next_seq_sect;
+ spinlock_t device_lock;
+
+ struct list_head retry_list;
+ /* queue pending writes and submit them on unplug */
+ struct bio_list pending_bio_list;
+ /* queue of writes that have been unplugged */
+ struct bio_list flushing_bio_list;
+
+ /* for use when syncing mirrors: */
+
+ spinlock_t resync_lock;
+ int nr_pending;
+ int nr_waiting;
+ int nr_queued;
+ int barrier;
+ sector_t next_resync;
+ int fullsync; /* set to 1 if a full sync is needed,
+ * (fresh device added).
+ * Cleared when a sync completes.
+ */
+
+ wait_queue_head_t wait_barrier;
+
+ struct pool_info *poolinfo;
+
+ struct page *tmppage;
+
+ mempool_t *r1bio_pool;
+ mempool_t *r1buf_pool;
+};
+
+typedef struct r1_private_data_s conf_t;
+
+/*
+ * this is the only point in the RAID code where we violate
+ * C type safety. mddev->private is an 'opaque' pointer.
+ */
+#define mddev_to_conf(mddev) ((conf_t *) mddev->private)
+
+/*
+ * this is our 'private' RAID1 bio.
+ *
+ * it contains information about what kind of IO operations were started
+ * for this RAID1 operation, and about their status:
+ */
+
+struct r1bio_s {
+ atomic_t remaining; /* 'have we finished' count,
+ * used from IRQ handlers
+ */
+ atomic_t behind_remaining; /* number of write-behind ios remaining
+ * in this BehindIO request
+ */
+ sector_t sector;
+ int sectors;
+ unsigned long state;
+ mddev_t *mddev;
+ /*
+ * original bio going to /dev/mdx
+ */
+ struct bio *master_bio;
+ /*
+ * if the IO is in READ direction, then this is where we read
+ */
+ int read_disk;
+
+ struct list_head retry_list;
+ struct bitmap_update *bitmap_update;
+ /*
+ * if the IO is in WRITE direction, then multiple bios are used.
+ * We choose the number when they are allocated.
+ */
+ struct bio *bios[0];
+ /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
+};
+
+/* when we get a read error on a read-only array, we redirect to another
+ * device without failing the first device, or trying to over-write to
+ * correct the read error. To keep track of bad blocks on a per-bio
+ * level, we store IO_BLOCKED in the appropriate 'bios' pointer
+ */
+#define IO_BLOCKED ((struct bio*)1)
+
+/* bits for r1bio.state */
+#define R1BIO_Uptodate 0
+#define R1BIO_IsSync 1
+#define R1BIO_Degraded 2
+#define R1BIO_BehindIO 3
+#define R1BIO_Barrier 4
+#define R1BIO_BarrierRetry 5
+/* For write-behind requests, we call bi_end_io when
+ * the last non-write-behind device completes, providing
+ * any write was successful. Otherwise we call when
+ * any write-behind write succeeds, otherwise we call
+ * with failure when last write completes (and all failed).
+ * Record that bi_end_io was called with this flag...
+ */
+#define R1BIO_Returned 6
+
+#endif
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7301631abe0..e293d92641a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -18,10 +18,13 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include "dm-bio-list.h"
#include <linux/delay.h>
-#include <linux/raid/raid10.h>
-#include <linux/raid/bitmap.h>
+#include <linux/blkdev.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "dm-bio-list.h"
+#include "raid10.h"
+#include "bitmap.h"
/*
* RAID10 provides a combination of RAID0 and RAID1 functionality.
@@ -1695,7 +1698,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return 0;
skipped:
- max_sector = mddev->size << 1;
+ max_sector = mddev->dev_sectors;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
max_sector = mddev->resync_max_sectors;
if (sector_nr >= max_sector) {
@@ -2020,6 +2023,25 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
goto skipped;
}
+static sector_t
+raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+ sector_t size;
+ conf_t *conf = mddev_to_conf(mddev);
+
+ if (!raid_disks)
+ raid_disks = mddev->raid_disks;
+ if (!sectors)
+ sectors = mddev->dev_sectors;
+
+ size = sectors >> conf->chunk_shift;
+ sector_div(size, conf->far_copies);
+ size = size * raid_disks;
+ sector_div(size, conf->near_copies);
+
+ return size << conf->chunk_shift;
+}
+
static int run(mddev_t *mddev)
{
conf_t *conf;
@@ -2076,7 +2098,7 @@ static int run(mddev_t *mddev)
conf->far_offset = fo;
conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
- size = mddev->size >> (conf->chunk_shift-1);
+ size = mddev->dev_sectors >> conf->chunk_shift;
sector_div(size, fc);
size = size * conf->raid_disks;
sector_div(size, nc);
@@ -2089,7 +2111,7 @@ static int run(mddev_t *mddev)
*/
stride += conf->raid_disks - 1;
sector_div(stride, conf->raid_disks);
- mddev->size = stride << (conf->chunk_shift-1);
+ mddev->dev_sectors = stride << conf->chunk_shift;
if (fo)
stride = 1;
@@ -2171,8 +2193,8 @@ static int run(mddev_t *mddev)
/*
* Ok, everything is just fine now
*/
- mddev->array_sectors = size << conf->chunk_shift;
- mddev->resync_max_sectors = size << conf->chunk_shift;
+ md_set_array_sectors(mddev, raid10_size(mddev, 0, 0));
+ mddev->resync_max_sectors = raid10_size(mddev, 0, 0);
mddev->queue->unplug_fn = raid10_unplug;
mddev->queue->backing_dev_info.congested_fn = raid10_congested;
@@ -2208,6 +2230,9 @@ static int stop(mddev_t *mddev)
{
conf_t *conf = mddev_to_conf(mddev);
+ raise_barrier(conf, 0);
+ lower_barrier(conf);
+
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
@@ -2255,6 +2280,7 @@ static struct mdk_personality raid10_personality =
.spare_active = raid10_spare_active,
.sync_request = sync_request,
.quiesce = raid10_quiesce,
+ .size = raid10_size,
};
static int __init raid_init(void)
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
new file mode 100644
index 00000000000..244dbe507a5
--- /dev/null
+++ b/drivers/md/raid10.h
@@ -0,0 +1,121 @@
+#ifndef _RAID10_H
+#define _RAID10_H
+
+typedef struct mirror_info mirror_info_t;
+
+struct mirror_info {
+ mdk_rdev_t *rdev;
+ sector_t head_position;
+};
+
+typedef struct r10bio_s r10bio_t;
+
+struct r10_private_data_s {
+ mddev_t *mddev;
+ mirror_info_t *mirrors;
+ int raid_disks;
+ spinlock_t device_lock;
+
+ /* geometry */
+ int near_copies; /* number of copies layed out raid0 style */
+ int far_copies; /* number of copies layed out
+ * at large strides across drives
+ */
+ int far_offset; /* far_copies are offset by 1 stripe
+ * instead of many
+ */
+ int copies; /* near_copies * far_copies.
+ * must be <= raid_disks
+ */
+ sector_t stride; /* distance between far copies.
+ * This is size / far_copies unless
+ * far_offset, in which case it is
+ * 1 stripe.
+ */
+
+ int chunk_shift; /* shift from chunks to sectors */
+ sector_t chunk_mask;
+
+ struct list_head retry_list;
+ /* queue pending writes and submit them on unplug */
+ struct bio_list pending_bio_list;
+
+
+ spinlock_t resync_lock;
+ int nr_pending;
+ int nr_waiting;
+ int nr_queued;
+ int barrier;
+ sector_t next_resync;
+ int fullsync; /* set to 1 if a full sync is needed,
+ * (fresh device added).
+ * Cleared when a sync completes.
+ */
+
+ wait_queue_head_t wait_barrier;
+
+ mempool_t *r10bio_pool;
+ mempool_t *r10buf_pool;
+ struct page *tmppage;
+};
+
+typedef struct r10_private_data_s conf_t;
+
+/*
+ * this is the only point in the RAID code where we violate
+ * C type safety. mddev->private is an 'opaque' pointer.
+ */
+#define mddev_to_conf(mddev) ((conf_t *) mddev->private)
+
+/*
+ * this is our 'private' RAID10 bio.
+ *
+ * it contains information about what kind of IO operations were started
+ * for this RAID10 operation, and about their status:
+ */
+
+struct r10bio_s {
+ atomic_t remaining; /* 'have we finished' count,
+ * used from IRQ handlers
+ */
+ sector_t sector; /* virtual sector number */
+ int sectors;
+ unsigned long state;
+ mddev_t *mddev;
+ /*
+ * original bio going to /dev/mdx
+ */
+ struct bio *master_bio;
+ /*
+ * if the IO is in READ direction, then this is where we read
+ */
+ int read_slot;
+
+ struct list_head retry_list;
+ /*
+ * if the IO is in WRITE direction, then multiple bios are used,
+ * one for each copy.
+ * When resyncing we also use one for each copy.
+ * When reconstructing, we use 2 bios, one for read, one for write.
+ * We choose the number when they are allocated.
+ */
+ struct {
+ struct bio *bio;
+ sector_t addr;
+ int devnum;
+ } devs[0];
+};
+
+/* when we get a read error on a read-only array, we redirect to another
+ * device without failing the first device, or trying to over-write to
+ * correct the read error. To keep track of bad blocks on a per-bio
+ * level, we store IO_BLOCKED in the appropriate 'bios' pointer
+ */
+#define IO_BLOCKED ((struct bio*)1)
+
+/* bits for r10bio.state */
+#define R10BIO_Uptodate 0
+#define R10BIO_IsSync 1
+#define R10BIO_IsRecover 2
+#define R10BIO_Degraded 3
+#endif
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a5ba080d303..3bbc6d64704 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -43,11 +43,14 @@
* miss any bits.
*/
+#include <linux/blkdev.h>
#include <linux/kthread.h>
-#include "raid6.h"
-
-#include <linux/raid/bitmap.h>
+#include <linux/raid/pq.h>
#include <linux/async_tx.h>
+#include <linux/seq_file.h>
+#include "md.h"
+#include "raid5.h"
+#include "bitmap.h"
/*
* Stripe cache
@@ -91,11 +94,6 @@
#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
-#if !RAID6_USE_EMPTY_ZERO_PAGE
-/* In .bss so it's zeroed */
-const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
-#endif
-
/*
* We maintain a biased count of active stripes in the bottom 16 bits of
* bi_phys_segments, and a count of processed stripes in the upper 16 bits
@@ -130,12 +128,42 @@ static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
}
+/* Find first data disk in a raid6 stripe */
+static inline int raid6_d0(struct stripe_head *sh)
+{
+ if (sh->ddf_layout)
+ /* ddf always start from first device */
+ return 0;
+ /* md starts just after Q block */
+ if (sh->qd_idx == sh->disks - 1)
+ return 0;
+ else
+ return sh->qd_idx + 1;
+}
static inline int raid6_next_disk(int disk, int raid_disks)
{
disk++;
return (disk < raid_disks) ? disk : 0;
}
+/* When walking through the disks in a raid5, starting at raid6_d0,
+ * We need to map each disk to a 'slot', where the data disks are slot
+ * 0 .. raid_disks-3, the parity disk is raid_disks-2 and the Q disk
+ * is raid_disks-1. This help does that mapping.
+ */
+static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
+ int *count, int syndrome_disks)
+{
+ int slot;
+
+ if (idx == sh->pd_idx)
+ return syndrome_disks;
+ if (idx == sh->qd_idx)
+ return syndrome_disks + 1;
+ slot = (*count)++;
+ return slot;
+}
+
static void return_io(struct bio *return_bi)
{
struct bio *bi = return_bi;
@@ -193,6 +221,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
}
}
}
+
static void release_stripe(struct stripe_head *sh)
{
raid5_conf_t *conf = sh->raid_conf;
@@ -270,9 +299,11 @@ static int grow_buffers(struct stripe_head *sh, int num)
return 0;
}
-static void raid5_build_block(struct stripe_head *sh, int i);
+static void raid5_build_block(struct stripe_head *sh, int i, int previous);
+static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
+ struct stripe_head *sh);
-static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int disks)
+static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
{
raid5_conf_t *conf = sh->raid_conf;
int i;
@@ -287,11 +318,12 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
remove_hash(sh);
+ sh->generation = conf->generation - previous;
+ sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
sh->sector = sector;
- sh->pd_idx = pd_idx;
+ stripe_set_idx(sector, conf, previous, sh);
sh->state = 0;
- sh->disks = disks;
for (i = sh->disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
@@ -305,12 +337,13 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
BUG();
}
dev->flags = 0;
- raid5_build_block(sh, i);
+ raid5_build_block(sh, i, previous);
}
insert_hash(conf, sh);
}
-static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, int disks)
+static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector,
+ short generation)
{
struct stripe_head *sh;
struct hlist_node *hn;
@@ -318,7 +351,7 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in
CHECK_DEVLOCK();
pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
- if (sh->sector == sector && sh->disks == disks)
+ if (sh->sector == sector && sh->generation == generation)
return sh;
pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
return NULL;
@@ -327,8 +360,9 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in
static void unplug_slaves(mddev_t *mddev);
static void raid5_unplug_device(struct request_queue *q);
-static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector, int disks,
- int pd_idx, int noblock)
+static struct stripe_head *
+get_active_stripe(raid5_conf_t *conf, sector_t sector,
+ int previous, int noblock)
{
struct stripe_head *sh;
@@ -340,7 +374,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
wait_event_lock_irq(conf->wait_for_stripe,
conf->quiesce == 0,
conf->device_lock, /* nothing */);
- sh = __find_stripe(conf, sector, disks);
+ sh = __find_stripe(conf, sector, conf->generation - previous);
if (!sh) {
if (!conf->inactive_blocked)
sh = get_free_stripe(conf);
@@ -358,10 +392,11 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
);
conf->inactive_blocked = 0;
} else
- init_stripe(sh, sector, pd_idx, disks);
+ init_stripe(sh, sector, previous);
} else {
if (atomic_read(&sh->count)) {
- BUG_ON(!list_empty(&sh->lru));
+ BUG_ON(!list_empty(&sh->lru)
+ && !test_bit(STRIPE_EXPANDING, &sh->state));
} else {
if (!test_bit(STRIPE_HANDLE, &sh->state))
atomic_inc(&conf->active_stripes);
@@ -895,8 +930,10 @@ static int grow_stripes(raid5_conf_t *conf, int num)
struct kmem_cache *sc;
int devs = conf->raid_disks;
- sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));
- sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));
+ sprintf(conf->cache_name[0],
+ "raid%d-%s", conf->level, mdname(conf->mddev));
+ sprintf(conf->cache_name[1],
+ "raid%d-%s-alt", conf->level, mdname(conf->mddev));
conf->active_name = 0;
sc = kmem_cache_create(conf->cache_name[conf->active_name],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -911,7 +948,6 @@ static int grow_stripes(raid5_conf_t *conf, int num)
return 0;
}
-#ifdef CONFIG_MD_RAID5_RESHAPE
static int resize_stripes(raid5_conf_t *conf, int newsize)
{
/* Make all the stripes able to hold 'newsize' devices.
@@ -1036,7 +1072,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
conf->pool_size = newsize;
return err;
}
-#endif
static int drop_one_stripe(raid5_conf_t *conf)
{
@@ -1066,7 +1101,7 @@ static void shrink_stripes(raid5_conf_t *conf)
static void raid5_end_read_request(struct bio * bi, int error)
{
- struct stripe_head *sh = bi->bi_private;
+ struct stripe_head *sh = bi->bi_private;
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1148,7 +1183,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
static void raid5_end_write_request(struct bio *bi, int error)
{
- struct stripe_head *sh = bi->bi_private;
+ struct stripe_head *sh = bi->bi_private;
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1176,9 +1211,9 @@ static void raid5_end_write_request(struct bio *bi, int error)
}
-static sector_t compute_blocknr(struct stripe_head *sh, int i);
+static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
-static void raid5_build_block(struct stripe_head *sh, int i)
+static void raid5_build_block(struct stripe_head *sh, int i, int previous)
{
struct r5dev *dev = &sh->dev[i];
@@ -1194,7 +1229,7 @@ static void raid5_build_block(struct stripe_head *sh, int i)
dev->req.bi_private = sh;
dev->flags = 0;
- dev->sector = compute_blocknr(sh, i);
+ dev->sector = compute_blocknr(sh, i, previous);
}
static void error(mddev_t *mddev, mdk_rdev_t *rdev)
@@ -1227,15 +1262,23 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
* Input: a 'big' sector number,
* Output: index of the data and parity disk, and the sector # in them.
*/
-static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
- unsigned int data_disks, unsigned int * dd_idx,
- unsigned int * pd_idx, raid5_conf_t *conf)
+static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
+ int previous, int *dd_idx,
+ struct stripe_head *sh)
{
long stripe;
unsigned long chunk_number;
unsigned int chunk_offset;
+ int pd_idx, qd_idx;
+ int ddf_layout = 0;
sector_t new_sector;
- int sectors_per_chunk = conf->chunk_size >> 9;
+ int algorithm = previous ? conf->prev_algo
+ : conf->algorithm;
+ int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
+ : (conf->chunk_size >> 9);
+ int raid_disks = previous ? conf->previous_raid_disks
+ : conf->raid_disks;
+ int data_disks = raid_disks - conf->max_degraded;
/* First compute the information on this sector */
@@ -1259,68 +1302,170 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
/*
* Select the parity disk based on the user selected algorithm.
*/
+ pd_idx = qd_idx = ~0;
switch(conf->level) {
case 4:
- *pd_idx = data_disks;
+ pd_idx = data_disks;
break;
case 5:
- switch (conf->algorithm) {
+ switch (algorithm) {
case ALGORITHM_LEFT_ASYMMETRIC:
- *pd_idx = data_disks - stripe % raid_disks;
- if (*dd_idx >= *pd_idx)
+ pd_idx = data_disks - stripe % raid_disks;
+ if (*dd_idx >= pd_idx)
(*dd_idx)++;
break;
case ALGORITHM_RIGHT_ASYMMETRIC:
- *pd_idx = stripe % raid_disks;
- if (*dd_idx >= *pd_idx)
+ pd_idx = stripe % raid_disks;
+ if (*dd_idx >= pd_idx)
(*dd_idx)++;
break;
case ALGORITHM_LEFT_SYMMETRIC:
- *pd_idx = data_disks - stripe % raid_disks;
- *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;
+ pd_idx = data_disks - stripe % raid_disks;
+ *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
break;
case ALGORITHM_RIGHT_SYMMETRIC:
- *pd_idx = stripe % raid_disks;
- *dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;
+ pd_idx = stripe % raid_disks;
+ *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
+ break;
+ case ALGORITHM_PARITY_0:
+ pd_idx = 0;
+ (*dd_idx)++;
+ break;
+ case ALGORITHM_PARITY_N:
+ pd_idx = data_disks;
break;
default:
printk(KERN_ERR "raid5: unsupported algorithm %d\n",
- conf->algorithm);
+ algorithm);
+ BUG();
}
break;
case 6:
- /**** FIX THIS ****/
- switch (conf->algorithm) {
+ switch (algorithm) {
case ALGORITHM_LEFT_ASYMMETRIC:
- *pd_idx = raid_disks - 1 - (stripe % raid_disks);
- if (*pd_idx == raid_disks-1)
- (*dd_idx)++; /* Q D D D P */
- else if (*dd_idx >= *pd_idx)
+ pd_idx = raid_disks - 1 - (stripe % raid_disks);
+ qd_idx = pd_idx + 1;
+ if (pd_idx == raid_disks-1) {
+ (*dd_idx)++; /* Q D D D P */
+ qd_idx = 0;
+ } else if (*dd_idx >= pd_idx)
(*dd_idx) += 2; /* D D P Q D */
break;
case ALGORITHM_RIGHT_ASYMMETRIC:
- *pd_idx = stripe % raid_disks;
- if (*pd_idx == raid_disks-1)
- (*dd_idx)++; /* Q D D D P */
- else if (*dd_idx >= *pd_idx)
+ pd_idx = stripe % raid_disks;
+ qd_idx = pd_idx + 1;
+ if (pd_idx == raid_disks-1) {
+ (*dd_idx)++; /* Q D D D P */
+ qd_idx = 0;
+ } else if (*dd_idx >= pd_idx)
(*dd_idx) += 2; /* D D P Q D */
break;
case ALGORITHM_LEFT_SYMMETRIC:
- *pd_idx = raid_disks - 1 - (stripe % raid_disks);
- *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
+ pd_idx = raid_disks - 1 - (stripe % raid_disks);
+ qd_idx = (pd_idx + 1) % raid_disks;
+ *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
break;
case ALGORITHM_RIGHT_SYMMETRIC:
- *pd_idx = stripe % raid_disks;
- *dd_idx = (*pd_idx + 2 + *dd_idx) % raid_disks;
+ pd_idx = stripe % raid_disks;
+ qd_idx = (pd_idx + 1) % raid_disks;
+ *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
+ break;
+
+ case ALGORITHM_PARITY_0:
+ pd_idx = 0;
+ qd_idx = 1;
+ (*dd_idx) += 2;
+ break;
+ case ALGORITHM_PARITY_N:
+ pd_idx = data_disks;
+ qd_idx = data_disks + 1;
break;
+
+ case ALGORITHM_ROTATING_ZERO_RESTART:
+ /* Exactly the same as RIGHT_ASYMMETRIC, but or
+ * of blocks for computing Q is different.
+ */
+ pd_idx = stripe % raid_disks;
+ qd_idx = pd_idx + 1;
+ if (pd_idx == raid_disks-1) {
+ (*dd_idx)++; /* Q D D D P */
+ qd_idx = 0;
+ } else if (*dd_idx >= pd_idx)
+ (*dd_idx) += 2; /* D D P Q D */
+ ddf_layout = 1;
+ break;
+
+ case ALGORITHM_ROTATING_N_RESTART:
+ /* Same a left_asymmetric, by first stripe is
+ * D D D P Q rather than
+ * Q D D D P
+ */
+ pd_idx = raid_disks - 1 - ((stripe + 1) % raid_disks);
+ qd_idx = pd_idx + 1;
+ if (pd_idx == raid_disks-1) {
+ (*dd_idx)++; /* Q D D D P */
+ qd_idx = 0;
+ } else if (*dd_idx >= pd_idx)
+ (*dd_idx) += 2; /* D D P Q D */
+ ddf_layout = 1;
+ break;
+
+ case ALGORITHM_ROTATING_N_CONTINUE:
+ /* Same as left_symmetric but Q is before P */
+ pd_idx = raid_disks - 1 - (stripe % raid_disks);
+ qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
+ *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
+ ddf_layout = 1;
+ break;
+
+ case ALGORITHM_LEFT_ASYMMETRIC_6:
+ /* RAID5 left_asymmetric, with Q on last device */
+ pd_idx = data_disks - stripe % (raid_disks-1);
+ if (*dd_idx >= pd_idx)
+ (*dd_idx)++;
+ qd_idx = raid_disks - 1;
+ break;
+
+ case ALGORITHM_RIGHT_ASYMMETRIC_6:
+ pd_idx = stripe % (raid_disks-1);
+ if (*dd_idx >= pd_idx)
+ (*dd_idx)++;
+ qd_idx = raid_disks - 1;
+ break;
+
+ case ALGORITHM_LEFT_SYMMETRIC_6:
+ pd_idx = data_disks - stripe % (raid_disks-1);
+ *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
+ qd_idx = raid_disks - 1;
+ break;
+
+ case ALGORITHM_RIGHT_SYMMETRIC_6:
+ pd_idx = stripe % (raid_disks-1);
+ *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
+ qd_idx = raid_disks - 1;
+ break;
+
+ case ALGORITHM_PARITY_0_6:
+ pd_idx = 0;
+ (*dd_idx)++;
+ qd_idx = raid_disks - 1;
+ break;
+
+
default:
printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
- conf->algorithm);
+ algorithm);
+ BUG();
}
break;
}
+ if (sh) {
+ sh->pd_idx = pd_idx;
+ sh->qd_idx = qd_idx;
+ sh->ddf_layout = ddf_layout;
+ }
/*
* Finally, compute the new sector number
*/
@@ -1329,17 +1474,21 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
}
-static sector_t compute_blocknr(struct stripe_head *sh, int i)
+static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
{
raid5_conf_t *conf = sh->raid_conf;
int raid_disks = sh->disks;
int data_disks = raid_disks - conf->max_degraded;
sector_t new_sector = sh->sector, check;
- int sectors_per_chunk = conf->chunk_size >> 9;
+ int sectors_per_chunk = previous ? (conf->prev_chunk >> 9)
+ : (conf->chunk_size >> 9);
+ int algorithm = previous ? conf->prev_algo
+ : conf->algorithm;
sector_t stripe;
int chunk_offset;
- int chunk_number, dummy1, dummy2, dd_idx = i;
+ int chunk_number, dummy1, dd_idx = i;
sector_t r_sector;
+ struct stripe_head sh2;
chunk_offset = sector_div(new_sector, sectors_per_chunk);
@@ -1351,7 +1500,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
switch(conf->level) {
case 4: break;
case 5:
- switch (conf->algorithm) {
+ switch (algorithm) {
case ALGORITHM_LEFT_ASYMMETRIC:
case ALGORITHM_RIGHT_ASYMMETRIC:
if (i > sh->pd_idx)
@@ -1363,19 +1512,27 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
i += raid_disks;
i -= (sh->pd_idx + 1);
break;
+ case ALGORITHM_PARITY_0:
+ i -= 1;
+ break;
+ case ALGORITHM_PARITY_N:
+ break;
default:
printk(KERN_ERR "raid5: unsupported algorithm %d\n",
- conf->algorithm);
+ algorithm);
+ BUG();
}
break;
case 6:
- if (i == raid6_next_disk(sh->pd_idx, raid_disks))
+ if (i == sh->qd_idx)
return 0; /* It is the Q disk */
- switch (conf->algorithm) {
+ switch (algorithm) {
case ALGORITHM_LEFT_ASYMMETRIC:
case ALGORITHM_RIGHT_ASYMMETRIC:
- if (sh->pd_idx == raid_disks-1)
- i--; /* Q D D D P */
+ case ALGORITHM_ROTATING_ZERO_RESTART:
+ case ALGORITHM_ROTATING_N_RESTART:
+ if (sh->pd_idx == raid_disks-1)
+ i--; /* Q D D D P */
else if (i > sh->pd_idx)
i -= 2; /* D D P Q D */
break;
@@ -1390,9 +1547,35 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
i -= (sh->pd_idx + 2);
}
break;
+ case ALGORITHM_PARITY_0:
+ i -= 2;
+ break;
+ case ALGORITHM_PARITY_N:
+ break;
+ case ALGORITHM_ROTATING_N_CONTINUE:
+ if (sh->pd_idx == 0)
+ i--; /* P D D D Q */
+ else if (i > sh->pd_idx)
+ i -= 2; /* D D Q P D */
+ break;
+ case ALGORITHM_LEFT_ASYMMETRIC_6:
+ case ALGORITHM_RIGHT_ASYMMETRIC_6:
+ if (i > sh->pd_idx)
+ i--;
+ break;
+ case ALGORITHM_LEFT_SYMMETRIC_6:
+ case ALGORITHM_RIGHT_SYMMETRIC_6:
+ if (i < sh->pd_idx)
+ i += data_disks + 1;
+ i -= (sh->pd_idx + 1);
+ break;
+ case ALGORITHM_PARITY_0_6:
+ i -= 1;
+ break;
default:
printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
- conf->algorithm);
+ algorithm);
+ BUG();
}
break;
}
@@ -1400,8 +1583,10 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
chunk_number = stripe * data_disks + i;
r_sector = (sector_t)chunk_number * sectors_per_chunk + chunk_offset;
- check = raid5_compute_sector(r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf);
- if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) {
+ check = raid5_compute_sector(conf, r_sector,
+ previous, &dummy1, &sh2);
+ if (check != sh->sector || dummy1 != dd_idx || sh2.pd_idx != sh->pd_idx
+ || sh2.qd_idx != sh->qd_idx) {
printk(KERN_ERR "compute_blocknr: map not correct\n");
return 0;
}
@@ -1468,14 +1653,16 @@ static void copy_data(int frombio, struct bio *bio,
static void compute_parity6(struct stripe_head *sh, int method)
{
- raid6_conf_t *conf = sh->raid_conf;
- int i, pd_idx = sh->pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
+ raid5_conf_t *conf = sh->raid_conf;
+ int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
+ int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
struct bio *chosen;
/**** FIX THIS: This could be very bad if disks is close to 256 ****/
- void *ptrs[disks];
+ void *ptrs[syndrome_disks+2];
- qd_idx = raid6_next_disk(pd_idx, disks);
- d0_idx = raid6_next_disk(qd_idx, disks);
+ pd_idx = sh->pd_idx;
+ qd_idx = sh->qd_idx;
+ d0_idx = raid6_d0(sh);
pr_debug("compute_parity, stripe %llu, method %d\n",
(unsigned long long)sh->sector, method);
@@ -1513,24 +1700,29 @@ static void compute_parity6(struct stripe_head *sh, int method)
set_bit(R5_UPTODATE, &sh->dev[i].flags);
}
-// switch(method) {
-// case RECONSTRUCT_WRITE:
-// case CHECK_PARITY:
-// case UPDATE_PARITY:
- /* Note that unlike RAID-5, the ordering of the disks matters greatly. */
- /* FIX: Is this ordering of drives even remotely optimal? */
- count = 0;
- i = d0_idx;
- do {
- ptrs[count++] = page_address(sh->dev[i].page);
- if (count <= disks-2 && !test_bit(R5_UPTODATE, &sh->dev[i].flags))
- printk("block %d/%d not uptodate on parity calc\n", i,count);
- i = raid6_next_disk(i, disks);
- } while ( i != d0_idx );
-// break;
-// }
-
- raid6_call.gen_syndrome(disks, STRIPE_SIZE, ptrs);
+ /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
+
+ for (i = 0; i < disks; i++)
+ ptrs[i] = (void *)raid6_empty_zero_page;
+
+ count = 0;
+ i = d0_idx;
+ do {
+ int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+ ptrs[slot] = page_address(sh->dev[i].page);
+ if (slot < syndrome_disks &&
+ !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
+ printk(KERN_ERR "block %d/%d not uptodate "
+ "on parity calc\n", i, count);
+ BUG();
+ }
+
+ i = raid6_next_disk(i, disks);
+ } while (i != d0_idx);
+ BUG_ON(count != syndrome_disks);
+
+ raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
switch(method) {
case RECONSTRUCT_WRITE:
@@ -1552,8 +1744,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
{
int i, count, disks = sh->disks;
void *ptr[MAX_XOR_BLOCKS], *dest, *p;
- int pd_idx = sh->pd_idx;
- int qd_idx = raid6_next_disk(pd_idx, disks);
+ int qd_idx = sh->qd_idx;
pr_debug("compute_block_1, stripe %llu, idx %d\n",
(unsigned long long)sh->sector, dd_idx);
@@ -1589,63 +1780,65 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
{
int i, count, disks = sh->disks;
- int pd_idx = sh->pd_idx;
- int qd_idx = raid6_next_disk(pd_idx, disks);
- int d0_idx = raid6_next_disk(qd_idx, disks);
- int faila, failb;
+ int syndrome_disks = sh->ddf_layout ? disks : disks-2;
+ int d0_idx = raid6_d0(sh);
+ int faila = -1, failb = -1;
+ /**** FIX THIS: This could be very bad if disks is close to 256 ****/
+ void *ptrs[syndrome_disks+2];
- /* faila and failb are disk numbers relative to d0_idx */
- /* pd_idx become disks-2 and qd_idx become disks-1 */
- faila = (dd_idx1 < d0_idx) ? dd_idx1+(disks-d0_idx) : dd_idx1-d0_idx;
- failb = (dd_idx2 < d0_idx) ? dd_idx2+(disks-d0_idx) : dd_idx2-d0_idx;
+ for (i = 0; i < disks ; i++)
+ ptrs[i] = (void *)raid6_empty_zero_page;
+ count = 0;
+ i = d0_idx;
+ do {
+ int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+ ptrs[slot] = page_address(sh->dev[i].page);
+
+ if (i == dd_idx1)
+ faila = slot;
+ if (i == dd_idx2)
+ failb = slot;
+ i = raid6_next_disk(i, disks);
+ } while (i != d0_idx);
+ BUG_ON(count != syndrome_disks);
BUG_ON(faila == failb);
if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
- (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb);
+ (unsigned long long)sh->sector, dd_idx1, dd_idx2,
+ faila, failb);
- if ( failb == disks-1 ) {
+ if (failb == syndrome_disks+1) {
/* Q disk is one of the missing disks */
- if ( faila == disks-2 ) {
+ if (faila == syndrome_disks) {
/* Missing P+Q, just recompute */
compute_parity6(sh, UPDATE_PARITY);
return;
} else {
/* We're missing D+Q; recompute D from P */
- compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0);
+ compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
+ dd_idx2 : dd_idx1),
+ 0);
compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
return;
}
}
- /* We're missing D+P or D+D; build pointer table */
- {
- /**** FIX THIS: This could be very bad if disks is close to 256 ****/
- void *ptrs[disks];
-
- count = 0;
- i = d0_idx;
- do {
- ptrs[count++] = page_address(sh->dev[i].page);
- i = raid6_next_disk(i, disks);
- if (i != dd_idx1 && i != dd_idx2 &&
- !test_bit(R5_UPTODATE, &sh->dev[i].flags))
- printk("compute_2 with missing block %d/%d\n", count, i);
- } while ( i != d0_idx );
-
- if ( failb == disks-2 ) {
- /* We're missing D+P. */
- raid6_datap_recov(disks, STRIPE_SIZE, faila, ptrs);
- } else {
- /* We're missing D+D. */
- raid6_2data_recov(disks, STRIPE_SIZE, faila, failb, ptrs);
- }
-
- /* Both the above update both missing blocks */
- set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
- set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
+ /* We're missing D+P or D+D; */
+ if (failb == syndrome_disks) {
+ /* We're missing D+P. */
+ raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
+ } else {
+ /* We're missing D+D. */
+ raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
+ ptrs);
}
+
+ /* Both the above update both missing blocks */
+ set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
+ set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
}
static void
@@ -1800,17 +1993,21 @@ static int page_is_zero(struct page *p)
memcmp(a, a+4, STRIPE_SIZE-4)==0);
}
-static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
+static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
+ struct stripe_head *sh)
{
- int sectors_per_chunk = conf->chunk_size >> 9;
- int pd_idx, dd_idx;
+ int sectors_per_chunk =
+ previous ? (conf->prev_chunk >> 9)
+ : (conf->chunk_size >> 9);
+ int dd_idx;
int chunk_offset = sector_div(stripe, sectors_per_chunk);
+ int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
- raid5_compute_sector(stripe * (disks - conf->max_degraded)
+ raid5_compute_sector(conf,
+ stripe * (disks - conf->max_degraded)
*sectors_per_chunk + chunk_offset,
- disks, disks - conf->max_degraded,
- &dd_idx, &pd_idx, conf);
- return pd_idx;
+ previous,
+ &dd_idx, sh);
}
static void
@@ -2181,7 +2378,7 @@ static void handle_stripe_dirtying6(raid5_conf_t *conf,
struct r6_state *r6s, int disks)
{
int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
- int qd_idx = r6s->qd_idx;
+ int qd_idx = sh->qd_idx;
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
/* Would I have to read this buffer for reconstruct_write */
@@ -2371,7 +2568,7 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
int update_p = 0, update_q = 0;
struct r5dev *dev;
int pd_idx = sh->pd_idx;
- int qd_idx = r6s->qd_idx;
+ int qd_idx = sh->qd_idx;
set_bit(STRIPE_HANDLE, &sh->state);
@@ -2467,17 +2664,14 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
struct dma_async_tx_descriptor *tx = NULL;
clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
for (i = 0; i < sh->disks; i++)
- if (i != sh->pd_idx && (!r6s || i != r6s->qd_idx)) {
- int dd_idx, pd_idx, j;
+ if (i != sh->pd_idx && i != sh->qd_idx) {
+ int dd_idx, j;
struct stripe_head *sh2;
- sector_t bn = compute_blocknr(sh, i);
- sector_t s = raid5_compute_sector(bn, conf->raid_disks,
- conf->raid_disks -
- conf->max_degraded, &dd_idx,
- &pd_idx, conf);
- sh2 = get_active_stripe(conf, s, conf->raid_disks,
- pd_idx, 1);
+ sector_t bn = compute_blocknr(sh, i, 1);
+ sector_t s = raid5_compute_sector(conf, bn, 0,
+ &dd_idx, NULL);
+ sh2 = get_active_stripe(conf, s, 0, 1);
if (sh2 == NULL)
/* so far only the early blocks of this stripe
* have been requested. When later blocks
@@ -2500,8 +2694,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
for (j = 0; j < conf->raid_disks; j++)
if (j != sh2->pd_idx &&
- (!r6s || j != raid6_next_disk(sh2->pd_idx,
- sh2->disks)) &&
+ (!r6s || j != sh2->qd_idx) &&
!test_bit(R5_Expanded, &sh2->dev[j].flags))
break;
if (j == conf->raid_disks) {
@@ -2750,6 +2943,23 @@ static bool handle_stripe5(struct stripe_head *sh)
/* Finish reconstruct operations initiated by the expansion process */
if (sh->reconstruct_state == reconstruct_state_result) {
+ struct stripe_head *sh2
+ = get_active_stripe(conf, sh->sector, 1, 1);
+ if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
+ /* sh cannot be written until sh2 has been read.
+ * so arrange for sh to be delayed a little
+ */
+ set_bit(STRIPE_DELAYED, &sh->state);
+ set_bit(STRIPE_HANDLE, &sh->state);
+ if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
+ &sh2->state))
+ atomic_inc(&conf->preread_active_stripes);
+ release_stripe(sh2);
+ goto unlock;
+ }
+ if (sh2)
+ release_stripe(sh2);
+
sh->reconstruct_state = reconstruct_state_idle;
clear_bit(STRIPE_EXPANDING, &sh->state);
for (i = conf->raid_disks; i--; ) {
@@ -2763,8 +2973,7 @@ static bool handle_stripe5(struct stripe_head *sh)
!sh->reconstruct_state) {
/* Need to write out all blocks after computing parity */
sh->disks = conf->raid_disks;
- sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
- conf->raid_disks);
+ stripe_set_idx(sh->sector, conf, 0, sh);
schedule_reconstruction5(sh, &s, 1, 1);
} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
@@ -2796,20 +3005,19 @@ static bool handle_stripe5(struct stripe_head *sh)
static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
{
- raid6_conf_t *conf = sh->raid_conf;
+ raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks;
struct bio *return_bi = NULL;
- int i, pd_idx = sh->pd_idx;
+ int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx;
struct stripe_head_state s;
struct r6_state r6s;
struct r5dev *dev, *pdev, *qdev;
mdk_rdev_t *blocked_rdev = NULL;
- r6s.qd_idx = raid6_next_disk(pd_idx, disks);
pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
"pd_idx=%d, qd_idx=%d\n",
(unsigned long long)sh->sector, sh->state,
- atomic_read(&sh->count), pd_idx, r6s.qd_idx);
+ atomic_read(&sh->count), pd_idx, qd_idx);
memset(&s, 0, sizeof(s));
spin_lock(&sh->lock);
@@ -2920,9 +3128,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
pdev = &sh->dev[pd_idx];
r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx)
|| (s.failed >= 2 && r6s.failed_num[1] == pd_idx);
- qdev = &sh->dev[r6s.qd_idx];
- r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx)
- || (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx);
+ qdev = &sh->dev[qd_idx];
+ r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == qd_idx)
+ || (s.failed >= 2 && r6s.failed_num[1] == qd_idx);
if ( s.written &&
( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
@@ -2980,10 +3188,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
}
if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+ struct stripe_head *sh2
+ = get_active_stripe(conf, sh->sector, 1, 1);
+ if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
+ /* sh cannot be written until sh2 has been read.
+ * so arrange for sh to be delayed a little
+ */
+ set_bit(STRIPE_DELAYED, &sh->state);
+ set_bit(STRIPE_HANDLE, &sh->state);
+ if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
+ &sh2->state))
+ atomic_inc(&conf->preread_active_stripes);
+ release_stripe(sh2);
+ goto unlock;
+ }
+ if (sh2)
+ release_stripe(sh2);
+
/* Need to write out all blocks after computing P&Q */
sh->disks = conf->raid_disks;
- sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
- conf->raid_disks);
+ stripe_set_idx(sh->sector, conf, 0, sh);
compute_parity6(sh, RECONSTRUCT_WRITE);
for (i = conf->raid_disks ; i-- ; ) {
set_bit(R5_LOCKED, &sh->dev[i].flags);
@@ -3134,6 +3358,8 @@ static int raid5_mergeable_bvec(struct request_queue *q,
if ((bvm->bi_rw & 1) == WRITE)
return biovec->bv_len; /* always allow writes to be mergeable */
+ if (mddev->new_chunk < mddev->chunk_size)
+ chunk_sectors = mddev->new_chunk >> 9;
max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
if (max < 0) max = 0;
if (max <= biovec->bv_len && bio_sectors == 0)
@@ -3149,6 +3375,8 @@ static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
unsigned int chunk_sectors = mddev->chunk_size >> 9;
unsigned int bio_sectors = bio->bi_size >> 9;
+ if (mddev->new_chunk < mddev->chunk_size)
+ chunk_sectors = mddev->new_chunk >> 9;
return chunk_sectors >=
((sector & (chunk_sectors - 1)) + bio_sectors);
}
@@ -3255,9 +3483,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
{
mddev_t *mddev = q->queuedata;
raid5_conf_t *conf = mddev_to_conf(mddev);
- const unsigned int raid_disks = conf->raid_disks;
- const unsigned int data_disks = raid_disks - conf->max_degraded;
- unsigned int dd_idx, pd_idx;
+ unsigned int dd_idx;
struct bio* align_bi;
mdk_rdev_t *rdev;
@@ -3266,7 +3492,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
return 0;
}
/*
- * use bio_clone to make a copy of the bio
+ * use bio_clone to make a copy of the bio
*/
align_bi = bio_clone(raid_bio, GFP_NOIO);
if (!align_bi)
@@ -3280,12 +3506,9 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
/*
* compute position
*/
- align_bi->bi_sector = raid5_compute_sector(raid_bio->bi_sector,
- raid_disks,
- data_disks,
- &dd_idx,
- &pd_idx,
- conf);
+ align_bi->bi_sector = raid5_compute_sector(conf, raid_bio->bi_sector,
+ 0,
+ &dd_idx, NULL);
rcu_read_lock();
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
@@ -3377,7 +3600,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
{
mddev_t *mddev = q->queuedata;
raid5_conf_t *conf = mddev_to_conf(mddev);
- unsigned int dd_idx, pd_idx;
+ int dd_idx;
sector_t new_sector;
sector_t logical_sector, last_sector;
struct stripe_head *sh;
@@ -3400,7 +3623,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
if (rw == READ &&
mddev->reshape_position == MaxSector &&
chunk_aligned_read(q,bi))
- return 0;
+ return 0;
logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
last_sector = bi->bi_sector + (bi->bi_size>>9);
@@ -3410,26 +3633,31 @@ static int make_request(struct request_queue *q, struct bio * bi)
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
int disks, data_disks;
+ int previous;
retry:
+ previous = 0;
+ disks = conf->raid_disks;
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
- if (likely(conf->expand_progress == MaxSector))
- disks = conf->raid_disks;
- else {
- /* spinlock is needed as expand_progress may be
+ if (unlikely(conf->reshape_progress != MaxSector)) {
+ /* spinlock is needed as reshape_progress may be
* 64bit on a 32bit platform, and so it might be
* possible to see a half-updated value
- * Ofcourse expand_progress could change after
+ * Ofcourse reshape_progress could change after
* the lock is dropped, so once we get a reference
* to the stripe that we think it is, we will have
* to check again.
*/
spin_lock_irq(&conf->device_lock);
- disks = conf->raid_disks;
- if (logical_sector >= conf->expand_progress)
+ if (mddev->delta_disks < 0
+ ? logical_sector < conf->reshape_progress
+ : logical_sector >= conf->reshape_progress) {
disks = conf->previous_raid_disks;
- else {
- if (logical_sector >= conf->expand_lo) {
+ previous = 1;
+ } else {
+ if (mddev->delta_disks < 0
+ ? logical_sector < conf->reshape_safe
+ : logical_sector >= conf->reshape_safe) {
spin_unlock_irq(&conf->device_lock);
schedule();
goto retry;
@@ -3439,15 +3667,17 @@ static int make_request(struct request_queue *q, struct bio * bi)
}
data_disks = disks - conf->max_degraded;
- new_sector = raid5_compute_sector(logical_sector, disks, data_disks,
- &dd_idx, &pd_idx, conf);
+ new_sector = raid5_compute_sector(conf, logical_sector,
+ previous,
+ &dd_idx, NULL);
pr_debug("raid5: make_request, sector %llu logical %llu\n",
(unsigned long long)new_sector,
(unsigned long long)logical_sector);
- sh = get_active_stripe(conf, new_sector, disks, pd_idx, (bi->bi_rw&RWA_MASK));
+ sh = get_active_stripe(conf, new_sector, previous,
+ (bi->bi_rw&RWA_MASK));
if (sh) {
- if (unlikely(conf->expand_progress != MaxSector)) {
+ if (unlikely(previous)) {
/* expansion might have moved on while waiting for a
* stripe, so we must do the range check again.
* Expansion could still move past after this
@@ -3458,8 +3688,9 @@ static int make_request(struct request_queue *q, struct bio * bi)
*/
int must_retry = 0;
spin_lock_irq(&conf->device_lock);
- if (logical_sector < conf->expand_progress &&
- disks == conf->previous_raid_disks)
+ if (mddev->delta_disks < 0
+ ? logical_sector >= conf->reshape_progress
+ : logical_sector < conf->reshape_progress)
/* mismatch, need to try again */
must_retry = 1;
spin_unlock_irq(&conf->device_lock);
@@ -3514,6 +3745,8 @@ static int make_request(struct request_queue *q, struct bio * bi)
return 0;
}
+static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks);
+
static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped)
{
/* reshaping is quite different to recovery/resync so it is
@@ -3527,61 +3760,118 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
*/
raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
struct stripe_head *sh;
- int pd_idx;
sector_t first_sector, last_sector;
int raid_disks = conf->previous_raid_disks;
int data_disks = raid_disks - conf->max_degraded;
int new_data_disks = conf->raid_disks - conf->max_degraded;
int i;
int dd_idx;
- sector_t writepos, safepos, gap;
-
- if (sector_nr == 0 &&
- conf->expand_progress != 0) {
- /* restarting in the middle, skip the initial sectors */
- sector_nr = conf->expand_progress;
+ sector_t writepos, readpos, safepos;
+ sector_t stripe_addr;
+ int reshape_sectors;
+ struct list_head stripes;
+
+ if (sector_nr == 0) {
+ /* If restarting in the middle, skip the initial sectors */
+ if (mddev->delta_disks < 0 &&
+ conf->reshape_progress < raid5_size(mddev, 0, 0)) {
+ sector_nr = raid5_size(mddev, 0, 0)
+ - conf->reshape_progress;
+ } else if (mddev->delta_disks > 0 &&
+ conf->reshape_progress > 0)
+ sector_nr = conf->reshape_progress;
sector_div(sector_nr, new_data_disks);
- *skipped = 1;
- return sector_nr;
+ if (sector_nr) {
+ *skipped = 1;
+ return sector_nr;
+ }
}
+ /* We need to process a full chunk at a time.
+ * If old and new chunk sizes differ, we need to process the
+ * largest of these
+ */
+ if (mddev->new_chunk > mddev->chunk_size)
+ reshape_sectors = mddev->new_chunk / 512;
+ else
+ reshape_sectors = mddev->chunk_size / 512;
+
/* we update the metadata when there is more than 3Meg
* in the block range (that is rather arbitrary, should
* probably be time based) or when the data about to be
* copied would over-write the source of the data at
* the front of the range.
- * i.e. one new_stripe forward from expand_progress new_maps
- * to after where expand_lo old_maps to
+ * i.e. one new_stripe along from reshape_progress new_maps
+ * to after where reshape_safe old_maps to
*/
- writepos = conf->expand_progress +
- conf->chunk_size/512*(new_data_disks);
+ writepos = conf->reshape_progress;
sector_div(writepos, new_data_disks);
- safepos = conf->expand_lo;
+ readpos = conf->reshape_progress;
+ sector_div(readpos, data_disks);
+ safepos = conf->reshape_safe;
sector_div(safepos, data_disks);
- gap = conf->expand_progress - conf->expand_lo;
+ if (mddev->delta_disks < 0) {
+ writepos -= reshape_sectors;
+ readpos += reshape_sectors;
+ safepos += reshape_sectors;
+ } else {
+ writepos += reshape_sectors;
+ readpos -= reshape_sectors;
+ safepos -= reshape_sectors;
+ }
- if (writepos >= safepos ||
- gap > (new_data_disks)*3000*2 /*3Meg*/) {
+ /* 'writepos' is the most advanced device address we might write.
+ * 'readpos' is the least advanced device address we might read.
+ * 'safepos' is the least address recorded in the metadata as having
+ * been reshaped.
+ * If 'readpos' is behind 'writepos', then there is no way that we can
+ * ensure safety in the face of a crash - that must be done by userspace
+ * making a backup of the data. So in that case there is no particular
+ * rush to update metadata.
+ * Otherwise if 'safepos' is behind 'writepos', then we really need to
+ * update the metadata to advance 'safepos' to match 'readpos' so that
+ * we can be safe in the event of a crash.
+ * So we insist on updating metadata if safepos is behind writepos and
+ * readpos is beyond writepos.
+ * In any case, update the metadata every 10 seconds.
+ * Maybe that number should be configurable, but I'm not sure it is
+ * worth it.... maybe it could be a multiple of safemode_delay???
+ */
+ if ((mddev->delta_disks < 0
+ ? (safepos > writepos && readpos < writepos)
+ : (safepos < writepos && readpos > writepos)) ||
+ time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
/* Cannot proceed until we've updated the superblock... */
wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes)==0);
- mddev->reshape_position = conf->expand_progress;
+ mddev->reshape_position = conf->reshape_progress;
+ conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait, mddev->flags == 0 ||
kthread_should_stop());
spin_lock_irq(&conf->device_lock);
- conf->expand_lo = mddev->reshape_position;
+ conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap);
}
- for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) {
+ if (mddev->delta_disks < 0) {
+ BUG_ON(conf->reshape_progress == 0);
+ stripe_addr = writepos;
+ BUG_ON((mddev->dev_sectors &
+ ~((sector_t)reshape_sectors - 1))
+ - reshape_sectors - stripe_addr
+ != sector_nr);
+ } else {
+ BUG_ON(writepos != sector_nr + reshape_sectors);
+ stripe_addr = sector_nr;
+ }
+ INIT_LIST_HEAD(&stripes);
+ for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
int j;
int skipped = 0;
- pd_idx = stripe_to_pdidx(sector_nr+i, conf, conf->raid_disks);
- sh = get_active_stripe(conf, sector_nr+i,
- conf->raid_disks, pd_idx, 0);
+ sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
set_bit(STRIPE_EXPANDING, &sh->state);
atomic_inc(&conf->reshape_stripes);
/* If any of this stripe is beyond the end of the old
@@ -3592,10 +3882,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
if (j == sh->pd_idx)
continue;
if (conf->level == 6 &&
- j == raid6_next_disk(sh->pd_idx, sh->disks))
+ j == sh->qd_idx)
continue;
- s = compute_blocknr(sh, j);
- if (s < mddev->array_sectors) {
+ s = compute_blocknr(sh, j, 0);
+ if (s < raid5_size(mddev, 0, 0)) {
skipped = 1;
continue;
}
@@ -3607,10 +3897,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
set_bit(STRIPE_EXPAND_READY, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
}
- release_stripe(sh);
+ list_add(&sh->lru, &stripes);
}
spin_lock_irq(&conf->device_lock);
- conf->expand_progress = (sector_nr + i) * new_data_disks;
+ if (mddev->delta_disks < 0)
+ conf->reshape_progress -= reshape_sectors * new_data_disks;
+ else
+ conf->reshape_progress += reshape_sectors * new_data_disks;
spin_unlock_irq(&conf->device_lock);
/* Ok, those stripe are ready. We can start scheduling
* reads on the source stripes.
@@ -3618,46 +3911,50 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
* block on the destination stripes.
*/
first_sector =
- raid5_compute_sector(sector_nr*(new_data_disks),
- raid_disks, data_disks,
- &dd_idx, &pd_idx, conf);
+ raid5_compute_sector(conf, stripe_addr*(new_data_disks),
+ 1, &dd_idx, NULL);
last_sector =
- raid5_compute_sector((sector_nr+conf->chunk_size/512)
- *(new_data_disks) -1,
- raid_disks, data_disks,
- &dd_idx, &pd_idx, conf);
- if (last_sector >= (mddev->size<<1))
- last_sector = (mddev->size<<1)-1;
+ raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
+ *(new_data_disks) - 1),
+ 1, &dd_idx, NULL);
+ if (last_sector >= mddev->dev_sectors)
+ last_sector = mddev->dev_sectors - 1;
while (first_sector <= last_sector) {
- pd_idx = stripe_to_pdidx(first_sector, conf,
- conf->previous_raid_disks);
- sh = get_active_stripe(conf, first_sector,
- conf->previous_raid_disks, pd_idx, 0);
+ sh = get_active_stripe(conf, first_sector, 1, 0);
set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
first_sector += STRIPE_SECTORS;
}
+ /* Now that the sources are clearly marked, we can release
+ * the destination stripes
+ */
+ while (!list_empty(&stripes)) {
+ sh = list_entry(stripes.next, struct stripe_head, lru);
+ list_del_init(&sh->lru);
+ release_stripe(sh);
+ }
/* If this takes us to the resync_max point where we have to pause,
* then we need to write out the superblock.
*/
- sector_nr += conf->chunk_size>>9;
+ sector_nr += reshape_sectors;
if (sector_nr >= mddev->resync_max) {
/* Cannot proceed until we've updated the superblock... */
wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes) == 0);
- mddev->reshape_position = conf->expand_progress;
+ mddev->reshape_position = conf->reshape_progress;
+ conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_DEVS, &mddev->flags)
|| kthread_should_stop());
spin_lock_irq(&conf->device_lock);
- conf->expand_lo = mddev->reshape_position;
+ conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap);
}
- return conf->chunk_size>>9;
+ return reshape_sectors;
}
/* FIXME go_faster isn't used */
@@ -3665,9 +3962,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
{
raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
struct stripe_head *sh;
- int pd_idx;
- int raid_disks = conf->raid_disks;
- sector_t max_sector = mddev->size << 1;
+ sector_t max_sector = mddev->dev_sectors;
int sync_blocks;
int still_degraded = 0;
int i;
@@ -3675,6 +3970,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
if (sector_nr >= max_sector) {
/* just being told to finish up .. nothing much to do */
unplug_slaves(mddev);
+
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
end_reshape(conf);
return 0;
@@ -3705,7 +4001,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
*/
if (mddev->degraded >= conf->max_degraded &&
test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
- sector_t rv = (mddev->size << 1) - sector_nr;
+ sector_t rv = mddev->dev_sectors - sector_nr;
*skipped = 1;
return rv;
}
@@ -3721,10 +4017,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
bitmap_cond_end_sync(mddev->bitmap, sector_nr);
- pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks);
- sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
+ sh = get_active_stripe(conf, sector_nr, 0, 1);
if (sh == NULL) {
- sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 0);
+ sh = get_active_stripe(conf, sector_nr, 0, 0);
/* make sure we don't swamp the stripe cache if someone else
* is trying to get access
*/
@@ -3766,19 +4061,15 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
* it will be only one 'dd_idx' and only need one call to raid5_compute_sector.
*/
struct stripe_head *sh;
- int dd_idx, pd_idx;
+ int dd_idx;
sector_t sector, logical_sector, last_sector;
int scnt = 0;
int remaining;
int handled = 0;
logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
- sector = raid5_compute_sector( logical_sector,
- conf->raid_disks,
- conf->raid_disks - conf->max_degraded,
- &dd_idx,
- &pd_idx,
- conf);
+ sector = raid5_compute_sector(conf, logical_sector,
+ 0, &dd_idx, NULL);
last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
for (; logical_sector < last_sector;
@@ -3790,7 +4081,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
/* already done this stripe */
continue;
- sh = get_active_stripe(conf, sector, conf->raid_disks, pd_idx, 1);
+ sh = get_active_stripe(conf, sector, 0, 1);
if (!sh) {
/* failed to get a stripe - must wait */
@@ -3992,89 +4283,69 @@ static struct attribute_group raid5_attrs_group = {
.attrs = raid5_attrs,
};
-static int run(mddev_t *mddev)
+static sector_t
+raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
+{
+ raid5_conf_t *conf = mddev_to_conf(mddev);
+
+ if (!sectors)
+ sectors = mddev->dev_sectors;
+ if (!raid_disks) {
+ /* size is defined by the smallest of previous and new size */
+ if (conf->raid_disks < conf->previous_raid_disks)
+ raid_disks = conf->raid_disks;
+ else
+ raid_disks = conf->previous_raid_disks;
+ }
+
+ sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
+ sectors &= ~((sector_t)mddev->new_chunk/512 - 1);
+ return sectors * (raid_disks - conf->max_degraded);
+}
+
+static raid5_conf_t *setup_conf(mddev_t *mddev)
{
raid5_conf_t *conf;
int raid_disk, memory;
mdk_rdev_t *rdev;
struct disk_info *disk;
- int working_disks = 0;
- if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) {
+ if (mddev->new_level != 5
+ && mddev->new_level != 4
+ && mddev->new_level != 6) {
printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n",
- mdname(mddev), mddev->level);
- return -EIO;
+ mdname(mddev), mddev->new_level);
+ return ERR_PTR(-EIO);
}
-
- if (mddev->chunk_size < PAGE_SIZE) {
- printk(KERN_ERR "md/raid5: chunk_size must be at least "
- "PAGE_SIZE but %d < %ld\n",
- mddev->chunk_size, PAGE_SIZE);
- return -EINVAL;
+ if ((mddev->new_level == 5
+ && !algorithm_valid_raid5(mddev->new_layout)) ||
+ (mddev->new_level == 6
+ && !algorithm_valid_raid6(mddev->new_layout))) {
+ printk(KERN_ERR "raid5: %s: layout %d not supported\n",
+ mdname(mddev), mddev->new_layout);
+ return ERR_PTR(-EIO);
}
-
- if (mddev->reshape_position != MaxSector) {
- /* Check that we can continue the reshape.
- * Currently only disks can change, it must
- * increase, and we must be past the point where
- * a stripe over-writes itself
- */
- sector_t here_new, here_old;
- int old_disks;
- int max_degraded = (mddev->level == 5 ? 1 : 2);
-
- if (mddev->new_level != mddev->level ||
- mddev->new_layout != mddev->layout ||
- mddev->new_chunk != mddev->chunk_size) {
- printk(KERN_ERR "raid5: %s: unsupported reshape "
- "required - aborting.\n",
- mdname(mddev));
- return -EINVAL;
- }
- if (mddev->delta_disks <= 0) {
- printk(KERN_ERR "raid5: %s: unsupported reshape "
- "(reduce disks) required - aborting.\n",
- mdname(mddev));
- return -EINVAL;
- }
- old_disks = mddev->raid_disks - mddev->delta_disks;
- /* reshape_position must be on a new-stripe boundary, and one
- * further up in new geometry must map after here in old
- * geometry.
- */
- here_new = mddev->reshape_position;
- if (sector_div(here_new, (mddev->chunk_size>>9)*
- (mddev->raid_disks - max_degraded))) {
- printk(KERN_ERR "raid5: reshape_position not "
- "on a stripe boundary\n");
- return -EINVAL;
- }
- /* here_new is the stripe we will write to */
- here_old = mddev->reshape_position;
- sector_div(here_old, (mddev->chunk_size>>9)*
- (old_disks-max_degraded));
- /* here_old is the first stripe that we might need to read
- * from */
- if (here_new >= here_old) {
- /* Reading from the same stripe as writing to - bad */
- printk(KERN_ERR "raid5: reshape_position too early for "
- "auto-recovery - aborting.\n");
- return -EINVAL;
- }
- printk(KERN_INFO "raid5: reshape will continue\n");
- /* OK, we should be able to continue; */
+ if (mddev->new_level == 6 && mddev->raid_disks < 4) {
+ printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
+ mdname(mddev), mddev->raid_disks);
+ return ERR_PTR(-EINVAL);
}
+ if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) {
+ printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
+ mddev->new_chunk, mdname(mddev));
+ return ERR_PTR(-EINVAL);
+ }
- mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL);
- if ((conf = mddev->private) == NULL)
+ conf = kzalloc(sizeof(raid5_conf_t), GFP_KERNEL);
+ if (conf == NULL)
goto abort;
- if (mddev->reshape_position == MaxSector) {
- conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks;
- } else {
- conf->raid_disks = mddev->raid_disks;
+
+ conf->raid_disks = mddev->raid_disks;
+ if (mddev->reshape_position == MaxSector)
+ conf->previous_raid_disks = mddev->raid_disks;
+ else
conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
- }
conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info),
GFP_KERNEL);
@@ -4086,13 +4357,12 @@ static int run(mddev_t *mddev)
if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
goto abort;
- if (mddev->level == 6) {
+ if (mddev->new_level == 6) {
conf->spare_page = alloc_page(GFP_KERNEL);
if (!conf->spare_page)
goto abort;
}
spin_lock_init(&conf->device_lock);
- mddev->queue->queue_lock = &conf->device_lock;
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap);
INIT_LIST_HEAD(&conf->handle_list);
@@ -4121,47 +4391,134 @@ static int run(mddev_t *mddev)
printk(KERN_INFO "raid5: device %s operational as raid"
" disk %d\n", bdevname(rdev->bdev,b),
raid_disk);
- working_disks++;
} else
/* Cannot rely on bitmap to complete recovery */
conf->fullsync = 1;
}
- /*
- * 0 for a fully functional array, 1 or 2 for a degraded array.
- */
- mddev->degraded = conf->raid_disks - working_disks;
- conf->mddev = mddev;
- conf->chunk_size = mddev->chunk_size;
- conf->level = mddev->level;
+ conf->chunk_size = mddev->new_chunk;
+ conf->level = mddev->new_level;
if (conf->level == 6)
conf->max_degraded = 2;
else
conf->max_degraded = 1;
- conf->algorithm = mddev->layout;
+ conf->algorithm = mddev->new_layout;
conf->max_nr_stripes = NR_STRIPES;
- conf->expand_progress = mddev->reshape_position;
-
- /* device size must be a multiple of chunk size */
- mddev->size &= ~(mddev->chunk_size/1024 -1);
- mddev->resync_max_sectors = mddev->size << 1;
+ conf->reshape_progress = mddev->reshape_position;
+ if (conf->reshape_progress != MaxSector) {
+ conf->prev_chunk = mddev->chunk_size;
+ conf->prev_algo = mddev->layout;
+ }
- if (conf->level == 6 && conf->raid_disks < 4) {
- printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
- mdname(mddev), conf->raid_disks);
+ memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
+ conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
+ if (grow_stripes(conf, conf->max_nr_stripes)) {
+ printk(KERN_ERR
+ "raid5: couldn't allocate %dkB for buffers\n", memory);
goto abort;
- }
- if (!conf->chunk_size || conf->chunk_size % 4) {
- printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
- conf->chunk_size, mdname(mddev));
+ } else
+ printk(KERN_INFO "raid5: allocated %dkB for %s\n",
+ memory, mdname(mddev));
+
+ conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+ if (!conf->thread) {
+ printk(KERN_ERR
+ "raid5: couldn't allocate thread for %s\n",
+ mdname(mddev));
goto abort;
}
- if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) {
- printk(KERN_ERR
- "raid5: unsupported parity algorithm %d for %s\n",
- conf->algorithm, mdname(mddev));
- goto abort;
+
+ return conf;
+
+ abort:
+ if (conf) {
+ shrink_stripes(conf);
+ safe_put_page(conf->spare_page);
+ kfree(conf->disks);
+ kfree(conf->stripe_hashtbl);
+ kfree(conf);
+ return ERR_PTR(-EIO);
+ } else
+ return ERR_PTR(-ENOMEM);
+}
+
+static int run(mddev_t *mddev)
+{
+ raid5_conf_t *conf;
+ int working_disks = 0;
+ mdk_rdev_t *rdev;
+
+ if (mddev->reshape_position != MaxSector) {
+ /* Check that we can continue the reshape.
+ * Currently only disks can change, it must
+ * increase, and we must be past the point where
+ * a stripe over-writes itself
+ */
+ sector_t here_new, here_old;
+ int old_disks;
+ int max_degraded = (mddev->level == 6 ? 2 : 1);
+
+ if (mddev->new_level != mddev->level) {
+ printk(KERN_ERR "raid5: %s: unsupported reshape "
+ "required - aborting.\n",
+ mdname(mddev));
+ return -EINVAL;
+ }
+ old_disks = mddev->raid_disks - mddev->delta_disks;
+ /* reshape_position must be on a new-stripe boundary, and one
+ * further up in new geometry must map after here in old
+ * geometry.
+ */
+ here_new = mddev->reshape_position;
+ if (sector_div(here_new, (mddev->new_chunk>>9)*
+ (mddev->raid_disks - max_degraded))) {
+ printk(KERN_ERR "raid5: reshape_position not "
+ "on a stripe boundary\n");
+ return -EINVAL;
+ }
+ /* here_new is the stripe we will write to */
+ here_old = mddev->reshape_position;
+ sector_div(here_old, (mddev->chunk_size>>9)*
+ (old_disks-max_degraded));
+ /* here_old is the first stripe that we might need to read
+ * from */
+ if (here_new >= here_old) {
+ /* Reading from the same stripe as writing to - bad */
+ printk(KERN_ERR "raid5: reshape_position too early for "
+ "auto-recovery - aborting.\n");
+ return -EINVAL;
+ }
+ printk(KERN_INFO "raid5: reshape will continue\n");
+ /* OK, we should be able to continue; */
+ } else {
+ BUG_ON(mddev->level != mddev->new_level);
+ BUG_ON(mddev->layout != mddev->new_layout);
+ BUG_ON(mddev->chunk_size != mddev->new_chunk);
+ BUG_ON(mddev->delta_disks != 0);
}
+
+ if (mddev->private == NULL)
+ conf = setup_conf(mddev);
+ else
+ conf = mddev->private;
+
+ if (IS_ERR(conf))
+ return PTR_ERR(conf);
+
+ mddev->thread = conf->thread;
+ conf->thread = NULL;
+ mddev->private = conf;
+
+ /*
+ * 0 for a fully functional array, 1 or 2 for a degraded array.
+ */
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk >= 0 &&
+ test_bit(In_sync, &rdev->flags))
+ working_disks++;
+
+ mddev->degraded = conf->raid_disks - working_disks;
+
if (mddev->degraded > conf->max_degraded) {
printk(KERN_ERR "raid5: not enough operational devices for %s"
" (%d/%d failed)\n",
@@ -4169,6 +4526,10 @@ static int run(mddev_t *mddev)
goto abort;
}
+ /* device size must be a multiple of chunk size */
+ mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
+ mddev->resync_max_sectors = mddev->dev_sectors;
+
if (mddev->degraded > 0 &&
mddev->recovery_cp != MaxSector) {
if (mddev->ok_start_degraded)
@@ -4184,43 +4545,22 @@ static int run(mddev_t *mddev)
}
}
- {
- mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5");
- if (!mddev->thread) {
- printk(KERN_ERR
- "raid5: couldn't allocate thread for %s\n",
- mdname(mddev));
- goto abort;
- }
- }
- memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
- conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
- if (grow_stripes(conf, conf->max_nr_stripes)) {
- printk(KERN_ERR
- "raid5: couldn't allocate %dkB for buffers\n", memory);
- shrink_stripes(conf);
- md_unregister_thread(mddev->thread);
- goto abort;
- } else
- printk(KERN_INFO "raid5: allocated %dkB for %s\n",
- memory, mdname(mddev));
-
if (mddev->degraded == 0)
printk("raid5: raid level %d set %s active with %d out of %d"
- " devices, algorithm %d\n", conf->level, mdname(mddev),
- mddev->raid_disks-mddev->degraded, mddev->raid_disks,
- conf->algorithm);
+ " devices, algorithm %d\n", conf->level, mdname(mddev),
+ mddev->raid_disks-mddev->degraded, mddev->raid_disks,
+ mddev->new_layout);
else
printk(KERN_ALERT "raid5: raid level %d set %s active with %d"
" out of %d devices, algorithm %d\n", conf->level,
mdname(mddev), mddev->raid_disks - mddev->degraded,
- mddev->raid_disks, conf->algorithm);
+ mddev->raid_disks, mddev->new_layout);
print_raid5_conf(conf);
- if (conf->expand_progress != MaxSector) {
+ if (conf->reshape_progress != MaxSector) {
printk("...ok start reshape thread\n");
- conf->expand_lo = conf->expand_progress;
+ conf->reshape_safe = conf->reshape_progress;
atomic_set(&conf->reshape_stripes, 0);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
@@ -4247,18 +4587,22 @@ static int run(mddev_t *mddev)
"raid5: failed to create sysfs attributes for %s\n",
mdname(mddev));
+ mddev->queue->queue_lock = &conf->device_lock;
+
mddev->queue->unplug_fn = raid5_unplug_device;
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;
- mddev->array_sectors = 2 * mddev->size * (conf->previous_raid_disks -
- conf->max_degraded);
+ md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
return 0;
abort:
+ md_unregister_thread(mddev->thread);
+ mddev->thread = NULL;
if (conf) {
+ shrink_stripes(conf);
print_raid5_conf(conf);
safe_put_page(conf->spare_page);
kfree(conf->disks);
@@ -4396,6 +4740,10 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
print_raid5_conf(conf);
rdev = p->rdev;
if (rdev) {
+ if (number >= conf->raid_disks &&
+ conf->reshape_progress == MaxSector)
+ clear_bit(In_sync, &rdev->flags);
+
if (test_bit(In_sync, &rdev->flags) ||
atomic_read(&rdev->nr_pending)) {
err = -EBUSY;
@@ -4405,7 +4753,8 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
* isn't possible.
*/
if (!test_bit(Faulty, &rdev->flags) &&
- mddev->degraded <= conf->max_degraded) {
+ mddev->degraded <= conf->max_degraded &&
+ number < conf->raid_disks) {
err = -EBUSY;
goto abort;
}
@@ -4472,36 +4821,48 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
* any io in the removed space completes, but it hardly seems
* worth it.
*/
- raid5_conf_t *conf = mddev_to_conf(mddev);
-
sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
- mddev->array_sectors = sectors * (mddev->raid_disks
- - conf->max_degraded);
+ md_set_array_sectors(mddev, raid5_size(mddev, sectors,
+ mddev->raid_disks));
+ if (mddev->array_sectors >
+ raid5_size(mddev, sectors, mddev->raid_disks))
+ return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1;
- if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) {
- mddev->recovery_cp = mddev->size << 1;
+ if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
+ mddev->recovery_cp = mddev->dev_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
- mddev->size = sectors /2;
+ mddev->dev_sectors = sectors;
mddev->resync_max_sectors = sectors;
return 0;
}
-#ifdef CONFIG_MD_RAID5_RESHAPE
static int raid5_check_reshape(mddev_t *mddev)
{
raid5_conf_t *conf = mddev_to_conf(mddev);
- int err;
- if (mddev->delta_disks < 0 ||
- mddev->new_level != mddev->level)
- return -EINVAL; /* Cannot shrink array or change level yet */
- if (mddev->delta_disks == 0)
- return 0; /* nothing to do */
+ if (mddev->delta_disks == 0 &&
+ mddev->new_layout == mddev->layout &&
+ mddev->new_chunk == mddev->chunk_size)
+ return -EINVAL; /* nothing to do */
if (mddev->bitmap)
/* Cannot grow a bitmap yet */
return -EBUSY;
+ if (mddev->degraded > conf->max_degraded)
+ return -EINVAL;
+ if (mddev->delta_disks < 0) {
+ /* We might be able to shrink, but the devices must
+ * be made bigger first.
+ * For raid6, 4 is the minimum size.
+ * Otherwise 2 is the minimum
+ */
+ int min = 2;
+ if (mddev->level == 6)
+ min = 4;
+ if (mddev->raid_disks + mddev->delta_disks < min)
+ return -EINVAL;
+ }
/* Can only proceed if there are plenty of stripe_heads.
* We need a minimum of one full stripe,, and for sensible progress
@@ -4514,18 +4875,12 @@ static int raid5_check_reshape(mddev_t *mddev)
if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes ||
(mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n",
- (mddev->chunk_size / STRIPE_SIZE)*4);
+ (max(mddev->chunk_size, mddev->new_chunk)
+ / STRIPE_SIZE)*4);
return -ENOSPC;
}
- err = resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
- if (err)
- return err;
-
- if (mddev->degraded > conf->max_degraded)
- return -EINVAL;
- /* looks like we might be able to manage this */
- return 0;
+ return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
}
static int raid5_start_reshape(mddev_t *mddev)
@@ -4550,12 +4905,31 @@ static int raid5_start_reshape(mddev_t *mddev)
*/
return -EINVAL;
+ /* Refuse to reduce size of the array. Any reductions in
+ * array size must be through explicit setting of array_size
+ * attribute.
+ */
+ if (raid5_size(mddev, 0, conf->raid_disks + mddev->delta_disks)
+ < mddev->array_sectors) {
+ printk(KERN_ERR "md: %s: array size must be reduced "
+ "before number of disks\n", mdname(mddev));
+ return -EINVAL;
+ }
+
atomic_set(&conf->reshape_stripes, 0);
spin_lock_irq(&conf->device_lock);
conf->previous_raid_disks = conf->raid_disks;
conf->raid_disks += mddev->delta_disks;
- conf->expand_progress = 0;
- conf->expand_lo = 0;
+ conf->prev_chunk = conf->chunk_size;
+ conf->chunk_size = mddev->new_chunk;
+ conf->prev_algo = conf->algorithm;
+ conf->algorithm = mddev->new_layout;
+ if (mddev->delta_disks < 0)
+ conf->reshape_progress = raid5_size(mddev, 0, 0);
+ else
+ conf->reshape_progress = 0;
+ conf->reshape_safe = conf->reshape_progress;
+ conf->generation++;
spin_unlock_irq(&conf->device_lock);
/* Add some new drives, as many as will fit.
@@ -4580,9 +4954,12 @@ static int raid5_start_reshape(mddev_t *mddev)
break;
}
- spin_lock_irqsave(&conf->device_lock, flags);
- mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices;
- spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (mddev->delta_disks > 0) {
+ spin_lock_irqsave(&conf->device_lock, flags);
+ mddev->degraded = (conf->raid_disks - conf->previous_raid_disks)
+ - added_devices;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ }
mddev->raid_disks = conf->raid_disks;
mddev->reshape_position = 0;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -4597,52 +4974,86 @@ static int raid5_start_reshape(mddev_t *mddev)
mddev->recovery = 0;
spin_lock_irq(&conf->device_lock);
mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
- conf->expand_progress = MaxSector;
+ conf->reshape_progress = MaxSector;
spin_unlock_irq(&conf->device_lock);
return -EAGAIN;
}
+ conf->reshape_checkpoint = jiffies;
md_wakeup_thread(mddev->sync_thread);
md_new_event(mddev);
return 0;
}
-#endif
+/* This is called from the reshape thread and should make any
+ * changes needed in 'conf'
+ */
static void end_reshape(raid5_conf_t *conf)
{
- struct block_device *bdev;
if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
- conf->mddev->array_sectors = 2 * conf->mddev->size *
- (conf->raid_disks - conf->max_degraded);
- set_capacity(conf->mddev->gendisk, conf->mddev->array_sectors);
- conf->mddev->changed = 1;
-
- bdev = bdget_disk(conf->mddev->gendisk, 0);
- if (bdev) {
- mutex_lock(&bdev->bd_inode->i_mutex);
- i_size_write(bdev->bd_inode,
- (loff_t)conf->mddev->array_sectors << 9);
- mutex_unlock(&bdev->bd_inode->i_mutex);
- bdput(bdev);
- }
+
spin_lock_irq(&conf->device_lock);
- conf->expand_progress = MaxSector;
+ conf->previous_raid_disks = conf->raid_disks;
+ conf->reshape_progress = MaxSector;
spin_unlock_irq(&conf->device_lock);
- conf->mddev->reshape_position = MaxSector;
+ wake_up(&conf->wait_for_overlap);
/* read-ahead size must cover two whole stripes, which is
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices
*/
{
- int data_disks = conf->previous_raid_disks - conf->max_degraded;
- int stripe = data_disks *
- (conf->mddev->chunk_size / PAGE_SIZE);
+ int data_disks = conf->raid_disks - conf->max_degraded;
+ int stripe = data_disks * (conf->chunk_size
+ / PAGE_SIZE);
if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
}
}
}
+/* This is called from the raid5d thread with mddev_lock held.
+ * It makes config changes to the device.
+ */
+static void raid5_finish_reshape(mddev_t *mddev)
+{
+ struct block_device *bdev;
+ raid5_conf_t *conf = mddev_to_conf(mddev);
+
+ if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+
+ if (mddev->delta_disks > 0) {
+ md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
+ set_capacity(mddev->gendisk, mddev->array_sectors);
+ mddev->changed = 1;
+
+ bdev = bdget_disk(mddev->gendisk, 0);
+ if (bdev) {
+ mutex_lock(&bdev->bd_inode->i_mutex);
+ i_size_write(bdev->bd_inode,
+ (loff_t)mddev->array_sectors << 9);
+ mutex_unlock(&bdev->bd_inode->i_mutex);
+ bdput(bdev);
+ }
+ } else {
+ int d;
+ mddev->degraded = conf->raid_disks;
+ for (d = 0; d < conf->raid_disks ; d++)
+ if (conf->disks[d].rdev &&
+ test_bit(In_sync,
+ &conf->disks[d].rdev->flags))
+ mddev->degraded--;
+ for (d = conf->raid_disks ;
+ d < conf->raid_disks - mddev->delta_disks;
+ d++)
+ raid5_remove_disk(mddev, d);
+ }
+ mddev->layout = conf->algorithm;
+ mddev->chunk_size = conf->chunk_size;
+ mddev->reshape_position = MaxSector;
+ mddev->delta_disks = 0;
+ }
+}
+
static void raid5_quiesce(mddev_t *mddev, int state)
{
raid5_conf_t *conf = mddev_to_conf(mddev);
@@ -4672,6 +5083,212 @@ static void raid5_quiesce(mddev_t *mddev, int state)
}
}
+
+static void *raid5_takeover_raid1(mddev_t *mddev)
+{
+ int chunksect;
+
+ if (mddev->raid_disks != 2 ||
+ mddev->degraded > 1)
+ return ERR_PTR(-EINVAL);
+
+ /* Should check if there are write-behind devices? */
+
+ chunksect = 64*2; /* 64K by default */
+
+ /* The array must be an exact multiple of chunksize */
+ while (chunksect && (mddev->array_sectors & (chunksect-1)))
+ chunksect >>= 1;
+
+ if ((chunksect<<9) < STRIPE_SIZE)
+ /* array size does not allow a suitable chunk size */
+ return ERR_PTR(-EINVAL);
+
+ mddev->new_level = 5;
+ mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
+ mddev->new_chunk = chunksect << 9;
+
+ return setup_conf(mddev);
+}
+
+static void *raid5_takeover_raid6(mddev_t *mddev)
+{
+ int new_layout;
+
+ switch (mddev->layout) {
+ case ALGORITHM_LEFT_ASYMMETRIC_6:
+ new_layout = ALGORITHM_LEFT_ASYMMETRIC;
+ break;
+ case ALGORITHM_RIGHT_ASYMMETRIC_6:
+ new_layout = ALGORITHM_RIGHT_ASYMMETRIC;
+ break;
+ case ALGORITHM_LEFT_SYMMETRIC_6:
+ new_layout = ALGORITHM_LEFT_SYMMETRIC;
+ break;
+ case ALGORITHM_RIGHT_SYMMETRIC_6:
+ new_layout = ALGORITHM_RIGHT_SYMMETRIC;
+ break;
+ case ALGORITHM_PARITY_0_6:
+ new_layout = ALGORITHM_PARITY_0;
+ break;
+ case ALGORITHM_PARITY_N:
+ new_layout = ALGORITHM_PARITY_N;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+ mddev->new_level = 5;
+ mddev->new_layout = new_layout;
+ mddev->delta_disks = -1;
+ mddev->raid_disks -= 1;
+ return setup_conf(mddev);
+}
+
+
+static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+{
+ /* For a 2-drive array, the layout and chunk size can be changed
+ * immediately as not restriping is needed.
+ * For larger arrays we record the new value - after validation
+ * to be used by a reshape pass.
+ */
+ raid5_conf_t *conf = mddev_to_conf(mddev);
+
+ if (new_layout >= 0 && !algorithm_valid_raid5(new_layout))
+ return -EINVAL;
+ if (new_chunk > 0) {
+ if (new_chunk & (new_chunk-1))
+ /* not a power of 2 */
+ return -EINVAL;
+ if (new_chunk < PAGE_SIZE)
+ return -EINVAL;
+ if (mddev->array_sectors & ((new_chunk>>9)-1))
+ /* not factor of array size */
+ return -EINVAL;
+ }
+
+ /* They look valid */
+
+ if (mddev->raid_disks == 2) {
+
+ if (new_layout >= 0) {
+ conf->algorithm = new_layout;
+ mddev->layout = mddev->new_layout = new_layout;
+ }
+ if (new_chunk > 0) {
+ conf->chunk_size = new_chunk;
+ mddev->chunk_size = mddev->new_chunk = new_chunk;
+ }
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ md_wakeup_thread(mddev->thread);
+ } else {
+ if (new_layout >= 0)
+ mddev->new_layout = new_layout;
+ if (new_chunk > 0)
+ mddev->new_chunk = new_chunk;
+ }
+ return 0;
+}
+
+static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
+{
+ if (new_layout >= 0 && !algorithm_valid_raid6(new_layout))
+ return -EINVAL;
+ if (new_chunk > 0) {
+ if (new_chunk & (new_chunk-1))
+ /* not a power of 2 */
+ return -EINVAL;
+ if (new_chunk < PAGE_SIZE)
+ return -EINVAL;
+ if (mddev->array_sectors & ((new_chunk>>9)-1))
+ /* not factor of array size */
+ return -EINVAL;
+ }
+
+ /* They look valid */
+
+ if (new_layout >= 0)
+ mddev->new_layout = new_layout;
+ if (new_chunk > 0)
+ mddev->new_chunk = new_chunk;
+
+ return 0;
+}
+
+static void *raid5_takeover(mddev_t *mddev)
+{
+ /* raid5 can take over:
+ * raid0 - if all devices are the same - make it a raid4 layout
+ * raid1 - if there are two drives. We need to know the chunk size
+ * raid4 - trivial - just use a raid4 layout.
+ * raid6 - Providing it is a *_6 layout
+ *
+ * For now, just do raid1
+ */
+
+ if (mddev->level == 1)
+ return raid5_takeover_raid1(mddev);
+ if (mddev->level == 4) {
+ mddev->new_layout = ALGORITHM_PARITY_N;
+ mddev->new_level = 5;
+ return setup_conf(mddev);
+ }
+ if (mddev->level == 6)
+ return raid5_takeover_raid6(mddev);
+
+ return ERR_PTR(-EINVAL);
+}
+
+
+static struct mdk_personality raid5_personality;
+
+static void *raid6_takeover(mddev_t *mddev)
+{
+ /* Currently can only take over a raid5. We map the
+ * personality to an equivalent raid6 personality
+ * with the Q block at the end.
+ */
+ int new_layout;
+
+ if (mddev->pers != &raid5_personality)
+ return ERR_PTR(-EINVAL);
+ if (mddev->degraded > 1)
+ return ERR_PTR(-EINVAL);
+ if (mddev->raid_disks > 253)
+ return ERR_PTR(-EINVAL);
+ if (mddev->raid_disks < 3)
+ return ERR_PTR(-EINVAL);
+
+ switch (mddev->layout) {
+ case ALGORITHM_LEFT_ASYMMETRIC:
+ new_layout = ALGORITHM_LEFT_ASYMMETRIC_6;
+ break;
+ case ALGORITHM_RIGHT_ASYMMETRIC:
+ new_layout = ALGORITHM_RIGHT_ASYMMETRIC_6;
+ break;
+ case ALGORITHM_LEFT_SYMMETRIC:
+ new_layout = ALGORITHM_LEFT_SYMMETRIC_6;
+ break;
+ case ALGORITHM_RIGHT_SYMMETRIC:
+ new_layout = ALGORITHM_RIGHT_SYMMETRIC_6;
+ break;
+ case ALGORITHM_PARITY_0:
+ new_layout = ALGORITHM_PARITY_0_6;
+ break;
+ case ALGORITHM_PARITY_N:
+ new_layout = ALGORITHM_PARITY_N;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+ mddev->new_level = 6;
+ mddev->new_layout = new_layout;
+ mddev->delta_disks = 1;
+ mddev->raid_disks += 1;
+ return setup_conf(mddev);
+}
+
+
static struct mdk_personality raid6_personality =
{
.name = "raid6",
@@ -4687,11 +5304,13 @@ static struct mdk_personality raid6_personality =
.spare_active = raid5_spare_active,
.sync_request = sync_request,
.resize = raid5_resize,
-#ifdef CONFIG_MD_RAID5_RESHAPE
+ .size = raid5_size,
.check_reshape = raid5_check_reshape,
.start_reshape = raid5_start_reshape,
-#endif
+ .finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
+ .takeover = raid6_takeover,
+ .reconfig = raid6_reconfig,
};
static struct mdk_personality raid5_personality =
{
@@ -4708,11 +5327,13 @@ static struct mdk_personality raid5_personality =
.spare_active = raid5_spare_active,
.sync_request = sync_request,
.resize = raid5_resize,
-#ifdef CONFIG_MD_RAID5_RESHAPE
+ .size = raid5_size,
.check_reshape = raid5_check_reshape,
.start_reshape = raid5_start_reshape,
-#endif
+ .finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
+ .takeover = raid5_takeover,
+ .reconfig = raid5_reconfig,
};
static struct mdk_personality raid4_personality =
@@ -4730,20 +5351,15 @@ static struct mdk_personality raid4_personality =
.spare_active = raid5_spare_active,
.sync_request = sync_request,
.resize = raid5_resize,
-#ifdef CONFIG_MD_RAID5_RESHAPE
+ .size = raid5_size,
.check_reshape = raid5_check_reshape,
.start_reshape = raid5_start_reshape,
-#endif
+ .finish_reshape = raid5_finish_reshape,
.quiesce = raid5_quiesce,
};
static int __init raid5_init(void)
{
- int e;
-
- e = raid6_select_algo();
- if ( e )
- return e;
register_md_personality(&raid6_personality);
register_md_personality(&raid5_personality);
register_md_personality(&raid4_personality);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
new file mode 100644
index 00000000000..52ba99954de
--- /dev/null
+++ b/drivers/md/raid5.h
@@ -0,0 +1,474 @@
+#ifndef _RAID5_H
+#define _RAID5_H
+
+#include <linux/raid/xor.h>
+
+/*
+ *
+ * Each stripe contains one buffer per disc. Each buffer can be in
+ * one of a number of states stored in "flags". Changes between
+ * these states happen *almost* exclusively under a per-stripe
+ * spinlock. Some very specific changes can happen in bi_end_io, and
+ * these are not protected by the spin lock.
+ *
+ * The flag bits that are used to represent these states are:
+ * R5_UPTODATE and R5_LOCKED
+ *
+ * State Empty == !UPTODATE, !LOCK
+ * We have no data, and there is no active request
+ * State Want == !UPTODATE, LOCK
+ * A read request is being submitted for this block
+ * State Dirty == UPTODATE, LOCK
+ * Some new data is in this buffer, and it is being written out
+ * State Clean == UPTODATE, !LOCK
+ * We have valid data which is the same as on disc
+ *
+ * The possible state transitions are:
+ *
+ * Empty -> Want - on read or write to get old data for parity calc
+ * Empty -> Dirty - on compute_parity to satisfy write/sync request.(RECONSTRUCT_WRITE)
+ * Empty -> Clean - on compute_block when computing a block for failed drive
+ * Want -> Empty - on failed read
+ * Want -> Clean - on successful completion of read request
+ * Dirty -> Clean - on successful completion of write request
+ * Dirty -> Clean - on failed write
+ * Clean -> Dirty - on compute_parity to satisfy write/sync (RECONSTRUCT or RMW)
+ *
+ * The Want->Empty, Want->Clean, Dirty->Clean, transitions
+ * all happen in b_end_io at interrupt time.
+ * Each sets the Uptodate bit before releasing the Lock bit.
+ * This leaves one multi-stage transition:
+ * Want->Dirty->Clean
+ * This is safe because thinking that a Clean buffer is actually dirty
+ * will at worst delay some action, and the stripe will be scheduled
+ * for attention after the transition is complete.
+ *
+ * There is one possibility that is not covered by these states. That
+ * is if one drive has failed and there is a spare being rebuilt. We
+ * can't distinguish between a clean block that has been generated
+ * from parity calculations, and a clean block that has been
+ * successfully written to the spare ( or to parity when resyncing).
+ * To distingush these states we have a stripe bit STRIPE_INSYNC that
+ * is set whenever a write is scheduled to the spare, or to the parity
+ * disc if there is no spare. A sync request clears this bit, and
+ * when we find it set with no buffers locked, we know the sync is
+ * complete.
+ *
+ * Buffers for the md device that arrive via make_request are attached
+ * to the appropriate stripe in one of two lists linked on b_reqnext.
+ * One list (bh_read) for read requests, one (bh_write) for write.
+ * There should never be more than one buffer on the two lists
+ * together, but we are not guaranteed of that so we allow for more.
+ *
+ * If a buffer is on the read list when the associated cache buffer is
+ * Uptodate, the data is copied into the read buffer and it's b_end_io
+ * routine is called. This may happen in the end_request routine only
+ * if the buffer has just successfully been read. end_request should
+ * remove the buffers from the list and then set the Uptodate bit on
+ * the buffer. Other threads may do this only if they first check
+ * that the Uptodate bit is set. Once they have checked that they may
+ * take buffers off the read queue.
+ *
+ * When a buffer on the write list is committed for write it is copied
+ * into the cache buffer, which is then marked dirty, and moved onto a
+ * third list, the written list (bh_written). Once both the parity
+ * block and the cached buffer are successfully written, any buffer on
+ * a written list can be returned with b_end_io.
+ *
+ * The write list and read list both act as fifos. The read list is
+ * protected by the device_lock. The write and written lists are
+ * protected by the stripe lock. The device_lock, which can be
+ * claimed while the stipe lock is held, is only for list
+ * manipulations and will only be held for a very short time. It can
+ * be claimed from interrupts.
+ *
+ *
+ * Stripes in the stripe cache can be on one of two lists (or on
+ * neither). The "inactive_list" contains stripes which are not
+ * currently being used for any request. They can freely be reused
+ * for another stripe. The "handle_list" contains stripes that need
+ * to be handled in some way. Both of these are fifo queues. Each
+ * stripe is also (potentially) linked to a hash bucket in the hash
+ * table so that it can be found by sector number. Stripes that are
+ * not hashed must be on the inactive_list, and will normally be at
+ * the front. All stripes start life this way.
+ *
+ * The inactive_list, handle_list and hash bucket lists are all protected by the
+ * device_lock.
+ * - stripes on the inactive_list never have their stripe_lock held.
+ * - stripes have a reference counter. If count==0, they are on a list.
+ * - If a stripe might need handling, STRIPE_HANDLE is set.
+ * - When refcount reaches zero, then if STRIPE_HANDLE it is put on
+ * handle_list else inactive_list
+ *
+ * This, combined with the fact that STRIPE_HANDLE is only ever
+ * cleared while a stripe has a non-zero count means that if the
+ * refcount is 0 and STRIPE_HANDLE is set, then it is on the
+ * handle_list and if recount is 0 and STRIPE_HANDLE is not set, then
+ * the stripe is on inactive_list.
+ *
+ * The possible transitions are:
+ * activate an unhashed/inactive stripe (get_active_stripe())
+ * lockdev check-hash unlink-stripe cnt++ clean-stripe hash-stripe unlockdev
+ * activate a hashed, possibly active stripe (get_active_stripe())
+ * lockdev check-hash if(!cnt++)unlink-stripe unlockdev
+ * attach a request to an active stripe (add_stripe_bh())
+ * lockdev attach-buffer unlockdev
+ * handle a stripe (handle_stripe())
+ * lockstripe clrSTRIPE_HANDLE ...
+ * (lockdev check-buffers unlockdev) ..
+ * change-state ..
+ * record io/ops needed unlockstripe schedule io/ops
+ * release an active stripe (release_stripe())
+ * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
+ *
+ * The refcount counts each thread that have activated the stripe,
+ * plus raid5d if it is handling it, plus one for each active request
+ * on a cached buffer, and plus one if the stripe is undergoing stripe
+ * operations.
+ *
+ * Stripe operations are performed outside the stripe lock,
+ * the stripe operations are:
+ * -copying data between the stripe cache and user application buffers
+ * -computing blocks to save a disk access, or to recover a missing block
+ * -updating the parity on a write operation (reconstruct write and
+ * read-modify-write)
+ * -checking parity correctness
+ * -running i/o to disk
+ * These operations are carried out by raid5_run_ops which uses the async_tx
+ * api to (optionally) offload operations to dedicated hardware engines.
+ * When requesting an operation handle_stripe sets the pending bit for the
+ * operation and increments the count. raid5_run_ops is then run whenever
+ * the count is non-zero.
+ * There are some critical dependencies between the operations that prevent some
+ * from being requested while another is in flight.
+ * 1/ Parity check operations destroy the in cache version of the parity block,
+ * so we prevent parity dependent operations like writes and compute_blocks
+ * from starting while a check is in progress. Some dma engines can perform
+ * the check without damaging the parity block, in these cases the parity
+ * block is re-marked up to date (assuming the check was successful) and is
+ * not re-read from disk.
+ * 2/ When a write operation is requested we immediately lock the affected
+ * blocks, and mark them as not up to date. This causes new read requests
+ * to be held off, as well as parity checks and compute block operations.
+ * 3/ Once a compute block operation has been requested handle_stripe treats
+ * that block as if it is up to date. raid5_run_ops guaruntees that any
+ * operation that is dependent on the compute block result is initiated after
+ * the compute block completes.
+ */
+
+/*
+ * Operations state - intermediate states that are visible outside of sh->lock
+ * In general _idle indicates nothing is running, _run indicates a data
+ * processing operation is active, and _result means the data processing result
+ * is stable and can be acted upon. For simple operations like biofill and
+ * compute that only have an _idle and _run state they are indicated with
+ * sh->state flags (STRIPE_BIOFILL_RUN and STRIPE_COMPUTE_RUN)
+ */
+/**
+ * enum check_states - handles syncing / repairing a stripe
+ * @check_state_idle - check operations are quiesced
+ * @check_state_run - check operation is running
+ * @check_state_result - set outside lock when check result is valid
+ * @check_state_compute_run - check failed and we are repairing
+ * @check_state_compute_result - set outside lock when compute result is valid
+ */
+enum check_states {
+ check_state_idle = 0,
+ check_state_run, /* parity check */
+ check_state_check_result,
+ check_state_compute_run, /* parity repair */
+ check_state_compute_result,
+};
+
+/**
+ * enum reconstruct_states - handles writing or expanding a stripe
+ */
+enum reconstruct_states {
+ reconstruct_state_idle = 0,
+ reconstruct_state_prexor_drain_run, /* prexor-write */
+ reconstruct_state_drain_run, /* write */
+ reconstruct_state_run, /* expand */
+ reconstruct_state_prexor_drain_result,
+ reconstruct_state_drain_result,
+ reconstruct_state_result,
+};
+
+struct stripe_head {
+ struct hlist_node hash;
+ struct list_head lru; /* inactive_list or handle_list */
+ struct raid5_private_data *raid_conf;
+ short generation; /* increments with every
+ * reshape */
+ sector_t sector; /* sector of this row */
+ short pd_idx; /* parity disk index */
+ short qd_idx; /* 'Q' disk index for raid6 */
+ short ddf_layout;/* use DDF ordering to calculate Q */
+ unsigned long state; /* state flags */
+ atomic_t count; /* nr of active thread/requests */
+ spinlock_t lock;
+ int bm_seq; /* sequence number for bitmap flushes */
+ int disks; /* disks in stripe */
+ enum check_states check_state;
+ enum reconstruct_states reconstruct_state;
+ /* stripe_operations
+ * @target - STRIPE_OP_COMPUTE_BLK target
+ */
+ struct stripe_operations {
+ int target;
+ u32 zero_sum_result;
+ } ops;
+ struct r5dev {
+ struct bio req;
+ struct bio_vec vec;
+ struct page *page;
+ struct bio *toread, *read, *towrite, *written;
+ sector_t sector; /* sector of this page */
+ unsigned long flags;
+ } dev[1]; /* allocated with extra space depending of RAID geometry */
+};
+
+/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
+ * for handle_stripe. It is only valid under spin_lock(sh->lock);
+ */
+struct stripe_head_state {
+ int syncing, expanding, expanded;
+ int locked, uptodate, to_read, to_write, failed, written;
+ int to_fill, compute, req_compute, non_overwrite;
+ int failed_num;
+ unsigned long ops_request;
+};
+
+/* r6_state - extra state data only relevant to r6 */
+struct r6_state {
+ int p_failed, q_failed, failed_num[2];
+};
+
+/* Flags */
+#define R5_UPTODATE 0 /* page contains current data */
+#define R5_LOCKED 1 /* IO has been submitted on "req" */
+#define R5_OVERWRITE 2 /* towrite covers whole page */
+/* and some that are internal to handle_stripe */
+#define R5_Insync 3 /* rdev && rdev->in_sync at start */
+#define R5_Wantread 4 /* want to schedule a read */
+#define R5_Wantwrite 5
+#define R5_Overlap 7 /* There is a pending overlapping request on this block */
+#define R5_ReadError 8 /* seen a read error here recently */
+#define R5_ReWrite 9 /* have tried to over-write the readerror */
+
+#define R5_Expanded 10 /* This block now has post-expand data */
+#define R5_Wantcompute 11 /* compute_block in progress treat as
+ * uptodate
+ */
+#define R5_Wantfill 12 /* dev->toread contains a bio that needs
+ * filling
+ */
+#define R5_Wantdrain 13 /* dev->towrite needs to be drained */
+/*
+ * Write method
+ */
+#define RECONSTRUCT_WRITE 1
+#define READ_MODIFY_WRITE 2
+/* not a write method, but a compute_parity mode */
+#define CHECK_PARITY 3
+/* Additional compute_parity mode -- updates the parity w/o LOCKING */
+#define UPDATE_PARITY 4
+
+/*
+ * Stripe state
+ */
+#define STRIPE_HANDLE 2
+#define STRIPE_SYNCING 3
+#define STRIPE_INSYNC 4
+#define STRIPE_PREREAD_ACTIVE 5
+#define STRIPE_DELAYED 6
+#define STRIPE_DEGRADED 7
+#define STRIPE_BIT_DELAY 8
+#define STRIPE_EXPANDING 9
+#define STRIPE_EXPAND_SOURCE 10
+#define STRIPE_EXPAND_READY 11
+#define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */
+#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */
+#define STRIPE_BIOFILL_RUN 14
+#define STRIPE_COMPUTE_RUN 15
+/*
+ * Operation request flags
+ */
+#define STRIPE_OP_BIOFILL 0
+#define STRIPE_OP_COMPUTE_BLK 1
+#define STRIPE_OP_PREXOR 2
+#define STRIPE_OP_BIODRAIN 3
+#define STRIPE_OP_POSTXOR 4
+#define STRIPE_OP_CHECK 5
+
+/*
+ * Plugging:
+ *
+ * To improve write throughput, we need to delay the handling of some
+ * stripes until there has been a chance that several write requests
+ * for the one stripe have all been collected.
+ * In particular, any write request that would require pre-reading
+ * is put on a "delayed" queue until there are no stripes currently
+ * in a pre-read phase. Further, if the "delayed" queue is empty when
+ * a stripe is put on it then we "plug" the queue and do not process it
+ * until an unplug call is made. (the unplug_io_fn() is called).
+ *
+ * When preread is initiated on a stripe, we set PREREAD_ACTIVE and add
+ * it to the count of prereading stripes.
+ * When write is initiated, or the stripe refcnt == 0 (just in case) we
+ * clear the PREREAD_ACTIVE flag and decrement the count
+ * Whenever the 'handle' queue is empty and the device is not plugged, we
+ * move any strips from delayed to handle and clear the DELAYED flag and set
+ * PREREAD_ACTIVE.
+ * In stripe_handle, if we find pre-reading is necessary, we do it if
+ * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
+ * HANDLE gets cleared if stripe_handle leave nothing locked.
+ */
+
+
+struct disk_info {
+ mdk_rdev_t *rdev;
+};
+
+struct raid5_private_data {
+ struct hlist_head *stripe_hashtbl;
+ mddev_t *mddev;
+ struct disk_info *spare;
+ int chunk_size, level, algorithm;
+ int max_degraded;
+ int raid_disks;
+ int max_nr_stripes;
+
+ /* reshape_progress is the leading edge of a 'reshape'
+ * It has value MaxSector when no reshape is happening
+ * If delta_disks < 0, it is the last sector we started work on,
+ * else is it the next sector to work on.
+ */
+ sector_t reshape_progress;
+ /* reshape_safe is the trailing edge of a reshape. We know that
+ * before (or after) this address, all reshape has completed.
+ */
+ sector_t reshape_safe;
+ int previous_raid_disks;
+ int prev_chunk, prev_algo;
+ short generation; /* increments with every reshape */
+ unsigned long reshape_checkpoint; /* Time we last updated
+ * metadata */
+
+ struct list_head handle_list; /* stripes needing handling */
+ struct list_head hold_list; /* preread ready stripes */
+ struct list_head delayed_list; /* stripes that have plugged requests */
+ struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */
+ struct bio *retry_read_aligned; /* currently retrying aligned bios */
+ struct bio *retry_read_aligned_list; /* aligned bios retry list */
+ atomic_t preread_active_stripes; /* stripes with scheduled io */
+ atomic_t active_aligned_reads;
+ atomic_t pending_full_writes; /* full write backlog */
+ int bypass_count; /* bypassed prereads */
+ int bypass_threshold; /* preread nice */
+ struct list_head *last_hold; /* detect hold_list promotions */
+
+ atomic_t reshape_stripes; /* stripes with pending writes for reshape */
+ /* unfortunately we need two cache names as we temporarily have
+ * two caches.
+ */
+ int active_name;
+ char cache_name[2][20];
+ struct kmem_cache *slab_cache; /* for allocating stripes */
+
+ int seq_flush, seq_write;
+ int quiesce;
+
+ int fullsync; /* set to 1 if a full sync is needed,
+ * (fresh device added).
+ * Cleared when a sync completes.
+ */
+
+ struct page *spare_page; /* Used when checking P/Q in raid6 */
+
+ /*
+ * Free stripes pool
+ */
+ atomic_t active_stripes;
+ struct list_head inactive_list;
+ wait_queue_head_t wait_for_stripe;
+ wait_queue_head_t wait_for_overlap;
+ int inactive_blocked; /* release of inactive stripes blocked,
+ * waiting for 25% to be free
+ */
+ int pool_size; /* number of disks in stripeheads in pool */
+ spinlock_t device_lock;
+ struct disk_info *disks;
+
+ /* When taking over an array from a different personality, we store
+ * the new thread here until we fully activate the array.
+ */
+ struct mdk_thread_s *thread;
+};
+
+typedef struct raid5_private_data raid5_conf_t;
+
+#define mddev_to_conf(mddev) ((raid5_conf_t *) mddev->private)
+
+/*
+ * Our supported algorithms
+ */
+#define ALGORITHM_LEFT_ASYMMETRIC 0 /* Rotating Parity N with Data Restart */
+#define ALGORITHM_RIGHT_ASYMMETRIC 1 /* Rotating Parity 0 with Data Restart */
+#define ALGORITHM_LEFT_SYMMETRIC 2 /* Rotating Parity N with Data Continuation */
+#define ALGORITHM_RIGHT_SYMMETRIC 3 /* Rotating Parity 0 with Data Continuation */
+
+/* Define non-rotating (raid4) algorithms. These allow
+ * conversion of raid4 to raid5.
+ */
+#define ALGORITHM_PARITY_0 4 /* P or P,Q are initial devices */
+#define ALGORITHM_PARITY_N 5 /* P or P,Q are final devices. */
+
+/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
+ * Firstly, the exact positioning of the parity block is slightly
+ * different between the 'LEFT_*' modes of md and the "_N_*" modes
+ * of DDF.
+ * Secondly, or order of datablocks over which the Q syndrome is computed
+ * is different.
+ * Consequently we have different layouts for DDF/raid6 than md/raid6.
+ * These layouts are from the DDFv1.2 spec.
+ * Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but
+ * leaves RLQ=3 as 'Vendor Specific'
+ */
+
+#define ALGORITHM_ROTATING_ZERO_RESTART 8 /* DDF PRL=6 RLQ=1 */
+#define ALGORITHM_ROTATING_N_RESTART 9 /* DDF PRL=6 RLQ=2 */
+#define ALGORITHM_ROTATING_N_CONTINUE 10 /*DDF PRL=6 RLQ=3 */
+
+
+/* For every RAID5 algorithm we define a RAID6 algorithm
+ * with exactly the same layout for data and parity, and
+ * with the Q block always on the last device (N-1).
+ * This allows trivial conversion from RAID5 to RAID6
+ */
+#define ALGORITHM_LEFT_ASYMMETRIC_6 16
+#define ALGORITHM_RIGHT_ASYMMETRIC_6 17
+#define ALGORITHM_LEFT_SYMMETRIC_6 18
+#define ALGORITHM_RIGHT_SYMMETRIC_6 19
+#define ALGORITHM_PARITY_0_6 20
+#define ALGORITHM_PARITY_N_6 ALGORITHM_PARITY_N
+
+static inline int algorithm_valid_raid5(int layout)
+{
+ return (layout >= 0) &&
+ (layout <= 5);
+}
+static inline int algorithm_valid_raid6(int layout)
+{
+ return (layout >= 0 && layout <= 5)
+ ||
+ (layout == 8 || layout == 10)
+ ||
+ (layout >= 16 && layout <= 20);
+}
+
+static inline int algorithm_is_DDF(int layout)
+{
+ return layout >= 8 && layout <= 10;
+}
+#endif
diff --git a/drivers/md/raid6.h b/drivers/md/raid6.h
deleted file mode 100644
index 98dcde88470..00000000000
--- a/drivers/md/raid6.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *
- * Copyright 2003 H. Peter Anvin - All Rights Reserved
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Bostom MA 02111-1307, USA; either version 2 of the License, or
- * (at your option) any later version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-#ifndef LINUX_RAID_RAID6_H
-#define LINUX_RAID_RAID6_H
-
-#ifdef __KERNEL__
-
-/* Set to 1 to use kernel-wide empty_zero_page */
-#define RAID6_USE_EMPTY_ZERO_PAGE 0
-
-#include <linux/raid/md.h>
-#include <linux/raid/raid5.h>
-
-typedef raid5_conf_t raid6_conf_t; /* Same configuration */
-
-/* Additional compute_parity mode -- updates the parity w/o LOCKING */
-#define UPDATE_PARITY 4
-
-/* We need a pre-zeroed page... if we don't want to use the kernel-provided
- one define it here */
-#if RAID6_USE_EMPTY_ZERO_PAGE
-# define raid6_empty_zero_page empty_zero_page
-#else
-extern const char raid6_empty_zero_page[PAGE_SIZE];
-#endif
-
-#else /* ! __KERNEL__ */
-/* Used for testing in user space */
-
-#include <errno.h>
-#include <inttypes.h>
-#include <limits.h>
-#include <stddef.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-
-/* Not standard, but glibc defines it */
-#define BITS_PER_LONG __WORDSIZE
-
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint64_t u64;
-
-#ifndef PAGE_SIZE
-# define PAGE_SIZE 4096
-#endif
-extern const char raid6_empty_zero_page[PAGE_SIZE];
-
-#define __init
-#define __exit
-#define __attribute_const__ __attribute__((const))
-#define noinline __attribute__((noinline))
-
-#define preempt_enable()
-#define preempt_disable()
-#define cpu_has_feature(x) 1
-#define enable_kernel_altivec()
-#define disable_kernel_altivec()
-
-#endif /* __KERNEL__ */
-
-/* Routine choices */
-struct raid6_calls {
- void (*gen_syndrome)(int, size_t, void **);
- int (*valid)(void); /* Returns 1 if this routine set is usable */
- const char *name; /* Name of this routine set */
- int prefer; /* Has special performance attribute */
-};
-
-/* Selected algorithm */
-extern struct raid6_calls raid6_call;
-
-/* Algorithm list */
-extern const struct raid6_calls * const raid6_algos[];
-int raid6_select_algo(void);
-
-/* Return values from chk_syndrome */
-#define RAID6_OK 0
-#define RAID6_P_BAD 1
-#define RAID6_Q_BAD 2
-#define RAID6_PQ_BAD 3
-
-/* Galois field tables */
-extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256)));
-extern const u8 raid6_gfexp[256] __attribute__((aligned(256)));
-extern const u8 raid6_gfinv[256] __attribute__((aligned(256)));
-extern const u8 raid6_gfexi[256] __attribute__((aligned(256)));
-
-/* Recovery routines */
-void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, void **ptrs);
-void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs);
-void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs);
-
-/* Some definitions to allow code to be compiled for testing in userspace */
-#ifndef __KERNEL__
-
-# define jiffies raid6_jiffies()
-# define printk printf
-# define GFP_KERNEL 0
-# define __get_free_pages(x,y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0))
-# define free_pages(x,y) munmap((void *)(x), (y)*PAGE_SIZE)
-
-static inline void cpu_relax(void)
-{
- /* Nothing */
-}
-
-#undef HZ
-#define HZ 1000
-static inline uint32_t raid6_jiffies(void)
-{
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return tv.tv_sec*1000 + tv.tv_usec/1000;
-}
-
-#endif /* ! __KERNEL__ */
-
-#endif /* LINUX_RAID_RAID6_H */
diff --git a/drivers/md/raid6algos.c b/drivers/md/raid6algos.c
index 21987e3dbe6..866215ac7f2 100644
--- a/drivers/md/raid6algos.c
+++ b/drivers/md/raid6algos.c
@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Bostom MA 02111-1307, USA; either version 2 of the License, or
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
@@ -16,13 +16,20 @@
* Algorithm list and algorithm selection for RAID-6
*/
-#include "raid6.h"
+#include <linux/raid/pq.h>
#ifndef __KERNEL__
#include <sys/mman.h>
#include <stdio.h>
+#else
+#if !RAID6_USE_EMPTY_ZERO_PAGE
+/* In .bss so it's zeroed */
+const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
+EXPORT_SYMBOL(raid6_empty_zero_page);
+#endif
#endif
struct raid6_calls raid6_call;
+EXPORT_SYMBOL_GPL(raid6_call);
/* Various routine sets */
extern const struct raid6_calls raid6_intx1;
@@ -79,6 +86,7 @@ const struct raid6_calls * const raid6_algos[] = {
#else
/* Need more time to be stable in userspace */
#define RAID6_TIME_JIFFIES_LG2 9
+#define time_before(x, y) ((x) < (y))
#endif
/* Try to pick the best algorithm */
@@ -152,3 +160,12 @@ int __init raid6_select_algo(void)
return best ? 0 : -EINVAL;
}
+
+static void raid6_exit(void)
+{
+ do { } while (0);
+}
+
+subsys_initcall(raid6_select_algo);
+module_exit(raid6_exit);
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/raid6altivec.uc b/drivers/md/raid6altivec.uc
index b9afd35b881..699dfeee494 100644
--- a/drivers/md/raid6altivec.uc
+++ b/drivers/md/raid6altivec.uc
@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Bostom MA 02111-1307, USA; either version 2 of the License, or
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
@@ -22,7 +22,7 @@
* bracked this with preempt_disable/enable or in a lock)
*/
-#include "raid6.h"
+#include <linux/raid/pq.h>
#ifdef CONFIG_ALTIVEC
diff --git a/drivers/md/raid6int.uc b/drivers/md/raid6int.uc
index ad004cee0e2..f9bf9cba357 100644
--- a/drivers/md/raid6int.uc
+++ b/drivers/md/raid6int.uc
@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Bostom MA 02111-1307, USA; either version 2 of the License, or
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
@@ -18,7 +18,7 @@
* This file is postprocessed using unroll.pl
*/
-#include "raid6.h"
+#include <linux/raid/pq.h>
/*
* This is the C data type to use
diff --git a/drivers/md/raid6mmx.c b/drivers/md/raid6mmx.c
index d4e4a1bd70a..e7f6c13132b 100644
--- a/drivers/md/raid6mmx.c
+++ b/drivers/md/raid6mmx.c
@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Bostom MA 02111-1307, USA; either version 2 of the License, or
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
@@ -18,7 +18,7 @@
#if defined(__i386__) && !defined(__arch_um__)
-#include "raid6.h"
+#include <linux/raid/pq.h>
#include "raid6x86.h"
/* Shared with raid6sse1.c */
diff --git a/drivers/md/raid6recov.c b/drivers/md/raid6recov.c
index a8c4d9451bd..2609f00e0d6 100644
--- a/drivers/md/raid6recov.c
+++ b/drivers/md/raid6recov.c
@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Bostom MA 02111-1307, USA; either version 2 of the License, or
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
@@ -18,7 +18,7 @@
* the syndrome.)
*/
-#include "raid6.h"
+#include <linux/raid/pq.h>
/* Recover two failed data blocks. */
void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
@@ -63,9 +63,7 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
p++; q++;
}
}
-
-
-
+EXPORT_SYMBOL_GPL(raid6_2data_recov);
/* Recover failure of one data block plus the P block */
void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
@@ -97,9 +95,10 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
q++; dq++;
}
}
+EXPORT_SYMBOL_GPL(raid6_datap_recov);
-
-#ifndef __KERNEL__ /* Testing only */
+#ifndef __KERNEL__
+/* Testing only */
/* Recover two failed blocks. */
void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
diff --git a/drivers/md/raid6sse1.c b/drivers/md/raid6sse1.c
index 0666237276f..b274dd5eab8 100644
--- a/drivers/md/raid6sse1.c
+++ b/drivers/md/raid6sse1.c
@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Bostom MA 02111-1307, USA; either version 2 of the License, or
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
@@ -23,7 +23,7 @@
#if defined(__i386__) && !defined(__arch_um__)
-#include "raid6.h"
+#include <linux/raid/pq.h>
#include "raid6x86.h"
/* Defined in raid6mmx.c */
diff --git a/drivers/md/raid6sse2.c b/drivers/md/raid6sse2.c
index b034ad86803..6ed6c6c0389 100644
--- a/drivers/md/raid6sse2.c
+++ b/drivers/md/raid6sse2.c
@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Bostom MA 02111-1307, USA; either version 2 of the License, or
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
@@ -19,7 +19,7 @@
#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
-#include "raid6.h"
+#include <linux/raid/pq.h>
#include "raid6x86.h"
static const struct raid6_sse_constants {
diff --git a/drivers/md/raid6test/Makefile b/drivers/md/raid6test/Makefile
index 78e0396adf2..58ffdf4f516 100644
--- a/drivers/md/raid6test/Makefile
+++ b/drivers/md/raid6test/Makefile
@@ -5,7 +5,7 @@
CC = gcc
OPTFLAGS = -O2 # Adjust as desired
-CFLAGS = -I.. -g $(OPTFLAGS)
+CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
LD = ld
PERL = perl
AR = ar
diff --git a/drivers/md/raid6test/test.c b/drivers/md/raid6test/test.c
index 559cc41b258..7a930318b17 100644
--- a/drivers/md/raid6test/test.c
+++ b/drivers/md/raid6test/test.c
@@ -17,7 +17,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
-#include "raid6.h"
+#include <linux/raid/pq.h>
#define NDISKS 16 /* Including P and Q */
diff --git a/drivers/md/raid6x86.h b/drivers/md/raid6x86.h
index 99fea7a70ca..4c22c156855 100644
--- a/drivers/md/raid6x86.h
+++ b/drivers/md/raid6x86.h
@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- * Bostom MA 02111-1307, USA; either version 2 of the License, or
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index 68826f1e36b..ec90e953adc 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -592,11 +592,9 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
/* maybe add LDOs that are omitted on cost-reduced parts */
if (twl_has_regulator() && !(features & TPS_SUBSET)) {
- /*
child = add_regulator(TWL4030_REG_VPLL2, pdata->vpll2);
if (IS_ERR(child))
return PTR_ERR(child);
- */
child = add_regulator(TWL4030_REG_VMMC2, pdata->vmmc2);
if (IS_ERR(child))
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index df6ce4a06cf..1445ea8f10a 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -21,6 +21,7 @@
#include <linux/leds.h>
#include <linux/scatterlist.h>
#include <linux/log2.h>
+#include <linux/regulator/consumer.h>
#include <linux/mmc/card.h>
#include <linux/mmc/host.h>
@@ -523,6 +524,105 @@ u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max)
}
EXPORT_SYMBOL(mmc_vddrange_to_ocrmask);
+#ifdef CONFIG_REGULATOR
+
+/**
+ * mmc_regulator_get_ocrmask - return mask of supported voltages
+ * @supply: regulator to use
+ *
+ * This returns either a negative errno, or a mask of voltages that
+ * can be provided to MMC/SD/SDIO devices using the specified voltage
+ * regulator. This would normally be called before registering the
+ * MMC host adapter.
+ */
+int mmc_regulator_get_ocrmask(struct regulator *supply)
+{
+ int result = 0;
+ int count;
+ int i;
+
+ count = regulator_count_voltages(supply);
+ if (count < 0)
+ return count;
+
+ for (i = 0; i < count; i++) {
+ int vdd_uV;
+ int vdd_mV;
+
+ vdd_uV = regulator_list_voltage(supply, i);
+ if (vdd_uV <= 0)
+ continue;
+
+ vdd_mV = vdd_uV / 1000;
+ result |= mmc_vddrange_to_ocrmask(vdd_mV, vdd_mV);
+ }
+
+ return result;
+}
+EXPORT_SYMBOL(mmc_regulator_get_ocrmask);
+
+/**
+ * mmc_regulator_set_ocr - set regulator to match host->ios voltage
+ * @vdd_bit: zero for power off, else a bit number (host->ios.vdd)
+ * @supply: regulator to use
+ *
+ * Returns zero on success, else negative errno.
+ *
+ * MMC host drivers may use this to enable or disable a regulator using
+ * a particular supply voltage. This would normally be called from the
+ * set_ios() method.
+ */
+int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit)
+{
+ int result = 0;
+ int min_uV, max_uV;
+ int enabled;
+
+ enabled = regulator_is_enabled(supply);
+ if (enabled < 0)
+ return enabled;
+
+ if (vdd_bit) {
+ int tmp;
+ int voltage;
+
+ /* REVISIT mmc_vddrange_to_ocrmask() may have set some
+ * bits this regulator doesn't quite support ... don't
+ * be too picky, most cards and regulators are OK with
+ * a 0.1V range goof (it's a small error percentage).
+ */
+ tmp = vdd_bit - ilog2(MMC_VDD_165_195);
+ if (tmp == 0) {
+ min_uV = 1650 * 1000;
+ max_uV = 1950 * 1000;
+ } else {
+ min_uV = 1900 * 1000 + tmp * 100 * 1000;
+ max_uV = min_uV + 100 * 1000;
+ }
+
+ /* avoid needless changes to this voltage; the regulator
+ * might not allow this operation
+ */
+ voltage = regulator_get_voltage(supply);
+ if (voltage < 0)
+ result = voltage;
+ else if (voltage < min_uV || voltage > max_uV)
+ result = regulator_set_voltage(supply, min_uV, max_uV);
+ else
+ result = 0;
+
+ if (result == 0 && !enabled)
+ result = regulator_enable(supply);
+ } else if (enabled) {
+ result = regulator_disable(supply);
+ }
+
+ return result;
+}
+EXPORT_SYMBOL(mmc_regulator_set_ocr);
+
+#endif
+
/*
* Mask off any voltages we don't support and select
* the lowest voltage
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index e9026cb1c5b..572d32fdf38 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -117,7 +117,7 @@ static int __init pxa2xx_flash_probe(struct platform_device *pdev)
return 0;
}
-static int __exit pxa2xx_flash_remove(struct platform_device *dev)
+static int __devexit pxa2xx_flash_remove(struct platform_device *dev)
{
struct pxa2xx_flash_info *info = platform_get_drvdata(dev);
diff --git a/drivers/parisc/asp.c b/drivers/parisc/asp.c
index 7931133526c..9ca21098b14 100644
--- a/drivers/parisc/asp.c
+++ b/drivers/parisc/asp.c
@@ -81,7 +81,7 @@ static int __init asp_init_chip(struct parisc_device *dev)
asp.hpa = ASP_INTERRUPT_ADDR;
printk(KERN_INFO "%s version %d at 0x%lx found.\n",
- asp.name, asp.version, dev->hpa.start);
+ asp.name, asp.version, (unsigned long)dev->hpa.start);
/* the IRQ ASP should use */
ret = -EBUSY;
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index cd4dd7ed2c0..5d610cbcfe8 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -406,8 +406,6 @@ resource_found:
}
ioc->avg_search[ioc->avg_idx++] = cr_start;
ioc->avg_idx &= CCIO_SEARCH_SAMPLE - 1;
-#endif
-#ifdef CCIO_COLLECT_STATS
ioc->used_pages += pages_needed;
#endif
/*
@@ -453,10 +451,10 @@ ccio_free_range(struct ioc *ioc, dma_addr_t iova, unsigned long pages_mapped)
unsigned long mask = ~(~0UL >> pages_mapped);
CCIO_FREE_MAPPINGS(ioc, res_idx, mask, 8);
#else
- CCIO_FREE_MAPPINGS(ioc, res_idx, 0xff, 8);
+ CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffUL, 8);
#endif
} else if(pages_mapped <= 16) {
- CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffff, 16);
+ CCIO_FREE_MAPPINGS(ioc, res_idx, 0xffffUL, 16);
} else if(pages_mapped <= 32) {
CCIO_FREE_MAPPINGS(ioc, res_idx, ~(unsigned int)0, 32);
#ifdef __LP64__
@@ -1028,8 +1026,10 @@ static int ccio_proc_info(struct seq_file *m, void *p)
while (ioc != NULL) {
unsigned int total_pages = ioc->res_size << 3;
+#ifdef CCIO_COLLECT_STATS
unsigned long avg = 0, min, max;
int j;
+#endif
len += seq_printf(m, "%s\n", ioc->name);
@@ -1060,8 +1060,7 @@ static int ccio_proc_info(struct seq_file *m, void *p)
avg /= CCIO_SEARCH_SAMPLE;
len += seq_printf(m, " Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles)\n",
min, avg, max);
-#endif
-#ifdef CCIO_COLLECT_STATS
+
len += seq_printf(m, "pci_map_single(): %8ld calls %8ld pages (avg %d/1000)\n",
ioc->msingle_calls, ioc->msingle_pages,
(int)((ioc->msingle_pages * 1000)/ioc->msingle_calls));
@@ -1400,7 +1399,7 @@ ccio_init_resource(struct resource *res, char *name, void __iomem *ioaddr)
result = insert_resource(&iomem_resource, res);
if (result < 0) {
printk(KERN_ERR "%s() failed to claim CCIO bus address space (%08lx,%08lx)\n",
- __func__, res->start, res->end);
+ __func__, (unsigned long)res->start, (unsigned long)res->end);
}
}
@@ -1551,7 +1550,8 @@ static int __init ccio_probe(struct parisc_device *dev)
ioc->name = dev->id.hversion == U2_IOA_RUNWAY ? "U2" : "UTurn";
- printk(KERN_INFO "Found %s at 0x%lx\n", ioc->name, dev->hpa.start);
+ printk(KERN_INFO "Found %s at 0x%lx\n", ioc->name,
+ (unsigned long)dev->hpa.start);
for (i = 0; i < ioc_count; i++) {
ioc_p = &(*ioc_p)->next;
diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c
index bb5a1c9597c..52ae0b1d470 100644
--- a/drivers/parisc/dino.c
+++ b/drivers/parisc/dino.c
@@ -819,7 +819,9 @@ dino_bridge_init(struct dino_device *dino_dev, const char *name)
result = ccio_request_resource(dino_dev->hba.dev, &res[i]);
if (result < 0) {
- printk(KERN_ERR "%s: failed to claim PCI Bus address space %d (0x%lx-0x%lx)!\n", name, i, res[i].start, res[i].end);
+ printk(KERN_ERR "%s: failed to claim PCI Bus address "
+ "space %d (0x%lx-0x%lx)!\n", name, i,
+ (unsigned long)res[i].start, (unsigned long)res[i].end);
return result;
}
}
@@ -899,7 +901,8 @@ static int __init dino_common_init(struct parisc_device *dev,
if (request_resource(&ioport_resource, res) < 0) {
printk(KERN_ERR "%s: request I/O Port region failed "
"0x%lx/%lx (hpa 0x%p)\n",
- name, res->start, res->end, dino_dev->hba.base_addr);
+ name, (unsigned long)res->start, (unsigned long)res->end,
+ dino_dev->hba.base_addr);
return 1;
}
diff --git a/drivers/parisc/eisa.c b/drivers/parisc/eisa.c
index 7891db50c48..f415fdd9a88 100644
--- a/drivers/parisc/eisa.c
+++ b/drivers/parisc/eisa.c
@@ -314,7 +314,7 @@ static int __init eisa_probe(struct parisc_device *dev)
char *name = is_mongoose(dev) ? "Mongoose" : "Wax";
printk(KERN_INFO "%s EISA Adapter found at 0x%08lx\n",
- name, dev->hpa.start);
+ name, (unsigned long)dev->hpa.start);
eisa_dev.hba.dev = dev;
eisa_dev.hba.iommu = ccio_get_iommu(dev);
diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c
index 6d8aae003f6..c709ecc2b7f 100644
--- a/drivers/parisc/eisa_enumerator.c
+++ b/drivers/parisc/eisa_enumerator.c
@@ -98,7 +98,7 @@ static int configure_memory(const unsigned char *buf,
res->start = mem_parent->start + get_24(buf+len+2);
res->end = res->start + get_16(buf+len+5)*1024;
res->flags = IORESOURCE_MEM;
- printk("memory %lx-%lx ", res->start, res->end);
+ printk("memory %lx-%lx ", (unsigned long)res->start, (unsigned long)res->end);
result = request_resource(mem_parent, res);
if (result < 0) {
printk("\n" KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n");
@@ -188,7 +188,7 @@ static int configure_port(const unsigned char *buf, struct resource *io_parent,
res->start = get_16(buf+len+1);
res->end = get_16(buf+len+1)+(c&HPEE_PORT_SIZE_MASK)+1;
res->flags = IORESOURCE_IO;
- printk("ioports %lx-%lx ", res->start, res->end);
+ printk("ioports %lx-%lx ", (unsigned long)res->start, (unsigned long)res->end);
result = request_resource(io_parent, res);
if (result < 0) {
printk("\n" KERN_ERR "EISA Enumerator: failed to claim EISA Bus address space!\n");
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index 501aaf1f253..73348c4047e 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -714,7 +714,7 @@ static void iosapic_set_affinity_irq(unsigned int irq,
if (dest_cpu < 0)
return;
- irq_desc[irq].affinity = cpumask_of_cpu(dest_cpu);
+ cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu));
vi->txn_addr = txn_affinity_addr(irq, dest_cpu);
spin_lock_irqsave(&iosapic_lock, flags);
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index 454b6532e40..9581d361945 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -3,7 +3,7 @@
*
* (c) Copyright 2000 Red Hat Software
* (c) Copyright 2000 Helge Deller <hdeller@redhat.com>
- * (c) Copyright 2001-2005 Helge Deller <deller@gmx.de>
+ * (c) Copyright 2001-2009 Helge Deller <deller@gmx.de>
* (c) Copyright 2001 Randolph Chung <tausq@debian.org>
*
* This program is free software; you can redistribute it and/or modify
@@ -243,13 +243,11 @@ static int __init led_create_procfs(void)
proc_pdc_root = proc_mkdir("pdc", 0);
if (!proc_pdc_root) return -1;
- proc_pdc_root->owner = THIS_MODULE;
ent = create_proc_entry("led", S_IFREG|S_IRUGO|S_IWUSR, proc_pdc_root);
if (!ent) return -1;
ent->data = (void *)LED_NOLCD; /* LED */
ent->read_proc = led_proc_read;
ent->write_proc = led_proc_write;
- ent->owner = THIS_MODULE;
if (led_type == LED_HASLCD)
{
@@ -258,7 +256,6 @@ static int __init led_create_procfs(void)
ent->data = (void *)LED_HASLCD; /* LCD */
ent->read_proc = led_proc_read;
ent->write_proc = led_proc_write;
- ent->owner = THIS_MODULE;
}
return 0;
@@ -463,9 +460,20 @@ static void led_work_func (struct work_struct *unused)
if (likely(led_lanrxtx)) currentleds |= led_get_net_activity();
if (likely(led_diskio)) currentleds |= led_get_diskio_activity();
- /* blink all LEDs twice a second if we got an Oops (HPMC) */
- if (unlikely(oops_in_progress))
- currentleds = (count_HZ<=(HZ/2)) ? 0 : 0xff;
+ /* blink LEDs if we got an Oops (HPMC) */
+ if (unlikely(oops_in_progress)) {
+ if (boot_cpu_data.cpu_type >= pcxl2) {
+ /* newer machines don't have loadavg. LEDs, so we
+ * let all LEDs blink twice per second instead */
+ currentleds = (count_HZ <= (HZ/2)) ? 0 : 0xff;
+ } else {
+ /* old machines: blink loadavg. LEDs twice per second */
+ if (count_HZ <= (HZ/2))
+ currentleds &= ~(LED4|LED5|LED6|LED7);
+ else
+ currentleds |= (LED4|LED5|LED6|LED7);
+ }
+ }
if (currentleds != lastleds)
{
@@ -511,7 +519,7 @@ static int led_halt(struct notifier_block *nb, unsigned long event, void *buf)
/* Cancel the work item and delete the queue */
if (led_wq) {
- cancel_rearming_delayed_workqueue(led_wq, &led_task);
+ cancel_delayed_work_sync(&led_task);
destroy_workqueue(led_wq);
led_wq = NULL;
}
@@ -630,7 +638,7 @@ int lcd_print( const char *str )
/* temporarily disable the led work task */
if (led_wq)
- cancel_rearming_delayed_workqueue(led_wq, &led_task);
+ cancel_delayed_work_sync(&led_task);
/* copy display string to buffer for procfs */
strlcpy(lcd_text, str, sizeof(lcd_text));
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 9dbd5066aca..23e56a564e0 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -164,7 +164,8 @@ static inline void context_clear_entry(struct context_entry *context)
* 1: writable
* 2-6: reserved
* 7: super page
- * 8-11: available
+ * 8-10: available
+ * 11: snoop behavior
* 12-63: Host physcial address
*/
struct dma_pte {
@@ -186,6 +187,11 @@ static inline void dma_set_pte_writable(struct dma_pte *pte)
pte->val |= DMA_PTE_WRITE;
}
+static inline void dma_set_pte_snp(struct dma_pte *pte)
+{
+ pte->val |= DMA_PTE_SNP;
+}
+
static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
{
pte->val = (pte->val & ~3) | (prot & 3);
@@ -231,6 +237,7 @@ struct dmar_domain {
int flags; /* flags to find out type of domain */
int iommu_coherency;/* indicate coherency of iommu access */
+ int iommu_snooping; /* indicate snooping control feature*/
int iommu_count; /* reference count of iommu */
spinlock_t iommu_lock; /* protect iommu set in domain */
u64 max_addr; /* maximum mapped address */
@@ -421,7 +428,6 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
return g_iommus[iommu_id];
}
-/* "Coherency" capability may be different across iommus */
static void domain_update_iommu_coherency(struct dmar_domain *domain)
{
int i;
@@ -438,6 +444,29 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
}
}
+static void domain_update_iommu_snooping(struct dmar_domain *domain)
+{
+ int i;
+
+ domain->iommu_snooping = 1;
+
+ i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
+ for (; i < g_num_of_iommus; ) {
+ if (!ecap_sc_support(g_iommus[i]->ecap)) {
+ domain->iommu_snooping = 0;
+ break;
+ }
+ i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
+ }
+}
+
+/* Some capabilities may be different across iommus */
+static void domain_update_iommu_cap(struct dmar_domain *domain)
+{
+ domain_update_iommu_coherency(domain);
+ domain_update_iommu_snooping(domain);
+}
+
static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
{
struct dmar_drhd_unit *drhd = NULL;
@@ -689,15 +718,17 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
{
int addr_width = agaw_to_width(domain->agaw);
+ int npages;
start &= (((u64)1) << addr_width) - 1;
end &= (((u64)1) << addr_width) - 1;
/* in case it's partial page */
start = PAGE_ALIGN(start);
end &= PAGE_MASK;
+ npages = (end - start) / VTD_PAGE_SIZE;
/* we don't need lock here, nobody else touches the iova range */
- while (start < end) {
+ while (npages--) {
dma_pte_clear_one(domain, start);
start += VTD_PAGE_SIZE;
}
@@ -1241,6 +1272,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
else
domain->iommu_coherency = 0;
+ if (ecap_sc_support(iommu->ecap))
+ domain->iommu_snooping = 1;
+ else
+ domain->iommu_snooping = 0;
+
domain->iommu_count = 1;
/* always allocate the top pgd */
@@ -1369,7 +1405,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
spin_lock_irqsave(&domain->iommu_lock, flags);
if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
domain->iommu_count++;
- domain_update_iommu_coherency(domain);
+ domain_update_iommu_cap(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags);
return 0;
@@ -1469,6 +1505,8 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
BUG_ON(dma_pte_addr(pte));
dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
dma_set_pte_prot(pte, prot);
+ if (prot & DMA_PTE_SNP)
+ dma_set_pte_snp(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
start_pfn++;
index++;
@@ -2119,7 +2157,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
error:
if (iova)
__free_iova(&domain->iovad, iova);
- printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
+ printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
pci_name(pdev), size, (unsigned long long)paddr, dir);
return 0;
}
@@ -2218,7 +2256,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
start_addr = iova->pfn_lo << PAGE_SHIFT;
size = aligned_size((u64)dev_addr, size);
- pr_debug("Device %s unmapping: %lx@%llx\n",
+ pr_debug("Device %s unmapping: %zx@%llx\n",
pci_name(pdev), size, (unsigned long long)start_addr);
/* clear the whole page */
@@ -2282,8 +2320,6 @@ static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
free_pages((unsigned long)vaddr, order);
}
-#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
-
static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
int nelems, enum dma_data_direction dir,
struct dma_attrs *attrs)
@@ -2294,7 +2330,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
unsigned long start_addr;
struct iova *iova;
size_t size = 0;
- void *addr;
+ phys_addr_t addr;
struct scatterlist *sg;
struct intel_iommu *iommu;
@@ -2310,7 +2346,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
if (!iova)
return;
for_each_sg(sglist, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
+ addr = page_to_phys(sg_page(sg)) + sg->offset;
size += aligned_size((u64)addr, sg->length);
}
@@ -2337,7 +2373,7 @@ static int intel_nontranslate_map_sg(struct device *hddev,
for_each_sg(sglist, sg, nelems, i) {
BUG_ON(!sg_page(sg));
- sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
+ sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
sg->dma_length = sg->length;
}
return nelems;
@@ -2346,7 +2382,7 @@ static int intel_nontranslate_map_sg(struct device *hddev,
static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
- void *addr;
+ phys_addr_t addr;
int i;
struct pci_dev *pdev = to_pci_dev(hwdev);
struct dmar_domain *domain;
@@ -2370,8 +2406,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
iommu = domain_get_iommu(domain);
for_each_sg(sglist, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
- addr = (void *)virt_to_phys(addr);
+ addr = page_to_phys(sg_page(sg)) + sg->offset;
size += aligned_size((u64)addr, sg->length);
}
@@ -2394,8 +2429,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
start_addr = iova->pfn_lo << PAGE_SHIFT;
offset = 0;
for_each_sg(sglist, sg, nelems, i) {
- addr = SG_ENT_VIRT_ADDRESS(sg);
- addr = (void *)virt_to_phys(addr);
+ addr = page_to_phys(sg_page(sg)) + sg->offset;
size = aligned_size((u64)addr, sg->length);
ret = domain_page_mapping(domain, start_addr + offset,
((u64)addr) & PAGE_MASK,
@@ -2628,6 +2662,33 @@ static int vm_domain_add_dev_info(struct dmar_domain *domain,
return 0;
}
+static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
+ struct pci_dev *pdev)
+{
+ struct pci_dev *tmp, *parent;
+
+ if (!iommu || !pdev)
+ return;
+
+ /* dependent device detach */
+ tmp = pci_find_upstream_pcie_bridge(pdev);
+ /* Secondary interface's bus number and devfn 0 */
+ if (tmp) {
+ parent = pdev->bus->self;
+ while (parent != tmp) {
+ iommu_detach_dev(iommu, parent->bus->number,
+ parent->devfn);
+ parent = parent->bus->self;
+ }
+ if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
+ iommu_detach_dev(iommu,
+ tmp->subordinate->number, 0);
+ else /* this is a legacy PCI bridge */
+ iommu_detach_dev(iommu,
+ tmp->bus->number, tmp->devfn);
+ }
+}
+
static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
struct pci_dev *pdev)
{
@@ -2653,6 +2714,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
spin_unlock_irqrestore(&device_domain_lock, flags);
iommu_detach_dev(iommu, info->bus, info->devfn);
+ iommu_detach_dependent_devices(iommu, pdev);
free_devinfo_mem(info);
spin_lock_irqsave(&device_domain_lock, flags);
@@ -2676,7 +2738,7 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
clear_bit(iommu->seq_id, &domain->iommu_bmp);
domain->iommu_count--;
- domain_update_iommu_coherency(domain);
+ domain_update_iommu_cap(domain);
spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
}
@@ -2702,15 +2764,16 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
iommu = device_to_iommu(info->bus, info->devfn);
iommu_detach_dev(iommu, info->bus, info->devfn);
+ iommu_detach_dependent_devices(iommu, info->dev);
/* clear this iommu in iommu_bmp, update iommu count
- * and coherency
+ * and capabilities
*/
spin_lock_irqsave(&domain->iommu_lock, flags2);
if (test_and_clear_bit(iommu->seq_id,
&domain->iommu_bmp)) {
domain->iommu_count--;
- domain_update_iommu_coherency(domain);
+ domain_update_iommu_cap(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags2);
@@ -2933,6 +2996,8 @@ static int intel_iommu_map_range(struct iommu_domain *domain,
prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE)
prot |= DMA_PTE_WRITE;
+ if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
+ prot |= DMA_PTE_SNP;
max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
if (dmar_domain->max_addr < max_addr) {
@@ -2986,6 +3051,17 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
return phys;
}
+static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
+ unsigned long cap)
+{
+ struct dmar_domain *dmar_domain = domain->priv;
+
+ if (cap == IOMMU_CAP_CACHE_COHERENCY)
+ return dmar_domain->iommu_snooping;
+
+ return 0;
+}
+
static struct iommu_ops intel_iommu_ops = {
.domain_init = intel_iommu_domain_init,
.domain_destroy = intel_iommu_domain_destroy,
@@ -2994,6 +3070,7 @@ static struct iommu_ops intel_iommu_ops = {
.map = intel_iommu_map_range,
.unmap = intel_iommu_unmap_range,
.iova_to_phys = intel_iommu_iova_to_phys,
+ .domain_has_cap = intel_iommu_domain_has_cap,
};
static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
diff --git a/drivers/pcmcia/pxa2xx_cm_x255.c b/drivers/pcmcia/pxa2xx_cm_x255.c
index 4ed64d8e95e..5143a760153 100644
--- a/drivers/pcmcia/pxa2xx_cm_x255.c
+++ b/drivers/pcmcia/pxa2xx_cm_x255.c
@@ -63,7 +63,7 @@ static void cmx255_pcmcia_socket_state(struct soc_pcmcia_socket *skt,
struct pcmcia_state *state)
{
int cd = skt->nr ? GPIO_PCMCIA_S1_CD_VALID : GPIO_PCMCIA_S0_CD_VALID;
- int rdy = skt->nr ? GPIO_PCMCIA_S0_RDYINT : GPIO_PCMCIA_S1_RDYINT;
+ int rdy = skt->nr ? GPIO_PCMCIA_S1_RDYINT : GPIO_PCMCIA_S0_RDYINT;
state->detect = !gpio_get_value(cd);
state->ready = !!gpio_get_value(rdy);
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index e7e0cf102d6..e58c0ce65aa 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -29,8 +29,12 @@ config REGULATOR_DEBUG
Say yes here to enable debugging support.
config REGULATOR_FIXED_VOLTAGE
- tristate
+ tristate "Fixed voltage regulator support"
default n
+ help
+ This driver provides support for fixed voltage regulators,
+ useful for systems which use a combination of software
+ managed regulators and simple non-configurable regulators.
config REGULATOR_VIRTUAL_CONSUMER
tristate "Virtual regulator consumer support"
@@ -52,6 +56,13 @@ config REGULATOR_BQ24022
charging select between 100 mA and 500 mA charging current
limit.
+config REGULATOR_TWL4030
+ bool "TI TWL4030/TWL5030/TPS695x0 PMIC"
+ depends on TWL4030_CORE
+ help
+ This driver supports the voltage regulators provided by
+ this family of companion chips.
+
config REGULATOR_WM8350
tristate "Wolfson Microelectroncis WM8350 AudioPlus PMIC"
depends on MFD_WM8350
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 61b30c6ddec..bac133afc06 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o
obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
+obj-$(CONFIG_REGULATOR_TWL4030) += twl4030-regulator.o
obj-$(CONFIG_REGULATOR_WM8350) += wm8350-regulator.o
obj-$(CONFIG_REGULATOR_WM8400) += wm8400-regulator.o
obj-$(CONFIG_REGULATOR_DA903X) += da903x.o
diff --git a/drivers/regulator/bq24022.c b/drivers/regulator/bq24022.c
index c175e38a4cd..7ecb820ceeb 100644
--- a/drivers/regulator/bq24022.c
+++ b/drivers/regulator/bq24022.c
@@ -105,7 +105,8 @@ static int __init bq24022_probe(struct platform_device *pdev)
ret = gpio_direction_output(pdata->gpio_iset2, 0);
ret = gpio_direction_output(pdata->gpio_nce, 1);
- bq24022 = regulator_register(&bq24022_desc, &pdev->dev, pdata);
+ bq24022 = regulator_register(&bq24022_desc, &pdev->dev,
+ pdata->init_data, pdata);
if (IS_ERR(bq24022)) {
dev_dbg(&pdev->dev, "couldn't register regulator\n");
ret = PTR_ERR(bq24022);
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f511a406fca..01f7702a805 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -28,33 +28,7 @@
static DEFINE_MUTEX(regulator_list_mutex);
static LIST_HEAD(regulator_list);
static LIST_HEAD(regulator_map_list);
-
-/*
- * struct regulator_dev
- *
- * Voltage / Current regulator class device. One for each regulator.
- */
-struct regulator_dev {
- struct regulator_desc *desc;
- int use_count;
-
- /* lists we belong to */
- struct list_head list; /* list of all regulators */
- struct list_head slist; /* list of supplied regulators */
-
- /* lists we own */
- struct list_head consumer_list; /* consumers we supply */
- struct list_head supply_list; /* regulators we supply */
-
- struct blocking_notifier_head notifier;
- struct mutex mutex; /* consumer lock */
- struct module *owner;
- struct device dev;
- struct regulation_constraints *constraints;
- struct regulator_dev *supply; /* for tree */
-
- void *reg_data; /* regulator_dev data */
-};
+static int has_full_constraints;
/*
* struct regulator_map
@@ -79,7 +53,6 @@ struct regulator {
int uA_load;
int min_uV;
int max_uV;
- int enabled; /* count of client enables */
char *supply_name;
struct device_attribute dev_attr;
struct regulator_dev *rdev;
@@ -312,6 +285,47 @@ static ssize_t regulator_state_show(struct device *dev,
}
static DEVICE_ATTR(state, 0444, regulator_state_show, NULL);
+static ssize_t regulator_status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct regulator_dev *rdev = dev_get_drvdata(dev);
+ int status;
+ char *label;
+
+ status = rdev->desc->ops->get_status(rdev);
+ if (status < 0)
+ return status;
+
+ switch (status) {
+ case REGULATOR_STATUS_OFF:
+ label = "off";
+ break;
+ case REGULATOR_STATUS_ON:
+ label = "on";
+ break;
+ case REGULATOR_STATUS_ERROR:
+ label = "error";
+ break;
+ case REGULATOR_STATUS_FAST:
+ label = "fast";
+ break;
+ case REGULATOR_STATUS_NORMAL:
+ label = "normal";
+ break;
+ case REGULATOR_STATUS_IDLE:
+ label = "idle";
+ break;
+ case REGULATOR_STATUS_STANDBY:
+ label = "standby";
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ return sprintf(buf, "%s\n", label);
+}
+static DEVICE_ATTR(status, 0444, regulator_status_show, NULL);
+
static ssize_t regulator_min_uA_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -678,6 +692,73 @@ static int set_machine_constraints(struct regulator_dev *rdev,
else
name = "regulator";
+ /* constrain machine-level voltage specs to fit
+ * the actual range supported by this regulator.
+ */
+ if (ops->list_voltage && rdev->desc->n_voltages) {
+ int count = rdev->desc->n_voltages;
+ int i;
+ int min_uV = INT_MAX;
+ int max_uV = INT_MIN;
+ int cmin = constraints->min_uV;
+ int cmax = constraints->max_uV;
+
+ /* it's safe to autoconfigure fixed-voltage supplies */
+ if (count == 1 && !cmin) {
+ cmin = INT_MIN;
+ cmax = INT_MAX;
+ }
+
+ /* voltage constraints are optional */
+ if ((cmin == 0) && (cmax == 0))
+ goto out;
+
+ /* else require explicit machine-level constraints */
+ if (cmin <= 0 || cmax <= 0 || cmax < cmin) {
+ pr_err("%s: %s '%s' voltage constraints\n",
+ __func__, "invalid", name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* initial: [cmin..cmax] valid, [min_uV..max_uV] not */
+ for (i = 0; i < count; i++) {
+ int value;
+
+ value = ops->list_voltage(rdev, i);
+ if (value <= 0)
+ continue;
+
+ /* maybe adjust [min_uV..max_uV] */
+ if (value >= cmin && value < min_uV)
+ min_uV = value;
+ if (value <= cmax && value > max_uV)
+ max_uV = value;
+ }
+
+ /* final: [min_uV..max_uV] valid iff constraints valid */
+ if (max_uV < min_uV) {
+ pr_err("%s: %s '%s' voltage constraints\n",
+ __func__, "unsupportable", name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* use regulator's subset of machine constraints */
+ if (constraints->min_uV < min_uV) {
+ pr_debug("%s: override '%s' %s, %d -> %d\n",
+ __func__, name, "min_uV",
+ constraints->min_uV, min_uV);
+ constraints->min_uV = min_uV;
+ }
+ if (constraints->max_uV > max_uV) {
+ pr_debug("%s: override '%s' %s, %d -> %d\n",
+ __func__, name, "max_uV",
+ constraints->max_uV, max_uV);
+ constraints->max_uV = max_uV;
+ }
+ }
+
rdev->constraints = constraints;
/* do we need to apply the constraint voltage */
@@ -695,10 +776,6 @@ static int set_machine_constraints(struct regulator_dev *rdev,
}
}
- /* are we enabled at boot time by firmware / bootloader */
- if (rdev->constraints->boot_on)
- rdev->use_count = 1;
-
/* do we need to setup our suspend state */
if (constraints->initial_state) {
ret = suspend_prepare(rdev, constraints->initial_state);
@@ -710,11 +787,27 @@ static int set_machine_constraints(struct regulator_dev *rdev,
}
}
- /* if always_on is set then turn the regulator on if it's not
- * already on. */
- if (constraints->always_on && ops->enable &&
- ((ops->is_enabled && !ops->is_enabled(rdev)) ||
- (!ops->is_enabled && !constraints->boot_on))) {
+ if (constraints->initial_mode) {
+ if (!ops->set_mode) {
+ printk(KERN_ERR "%s: no set_mode operation for %s\n",
+ __func__, name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = ops->set_mode(rdev, constraints->initial_mode);
+ if (ret < 0) {
+ printk(KERN_ERR
+ "%s: failed to set initial mode for %s: %d\n",
+ __func__, name, ret);
+ goto out;
+ }
+ }
+
+ /* If the constraints say the regulator should be on at this point
+ * and we have control then make sure it is enabled.
+ */
+ if ((constraints->always_on || constraints->boot_on) && ops->enable) {
ret = ops->enable(rdev);
if (ret < 0) {
printk(KERN_ERR "%s: failed to enable %s\n",
@@ -817,6 +910,19 @@ static void unset_consumer_device_supply(struct regulator_dev *rdev,
}
}
+static void unset_regulator_supplies(struct regulator_dev *rdev)
+{
+ struct regulator_map *node, *n;
+
+ list_for_each_entry_safe(node, n, &regulator_map_list, list) {
+ if (rdev == node->regulator) {
+ list_del(&node->list);
+ kfree(node);
+ return;
+ }
+ }
+}
+
#define REG_STR_SIZE 32
static struct regulator *create_regulator(struct regulator_dev *rdev,
@@ -898,9 +1004,12 @@ overflow_err:
* @id: Supply name or regulator ID.
*
* Returns a struct regulator corresponding to the regulator producer,
- * or IS_ERR() condition containing errno. Use of supply names
- * configured via regulator_set_device_supply() is strongly
- * encouraged.
+ * or IS_ERR() condition containing errno.
+ *
+ * Use of supply names configured via regulator_set_device_supply() is
+ * strongly encouraged. It is recommended that the supply name used
+ * should match the name used for the supply and/or the relevant
+ * device pins in the datasheet.
*/
struct regulator *regulator_get(struct device *dev, const char *id)
{
@@ -922,8 +1031,6 @@ struct regulator *regulator_get(struct device *dev, const char *id)
goto found;
}
}
- printk(KERN_ERR "regulator: Unable to get requested regulator: %s\n",
- id);
mutex_unlock(&regulator_list_mutex);
return regulator;
@@ -961,10 +1068,6 @@ void regulator_put(struct regulator *regulator)
mutex_lock(&regulator_list_mutex);
rdev = regulator->rdev;
- if (WARN(regulator->enabled, "Releasing supply %s while enabled\n",
- regulator->supply_name))
- _regulator_disable(rdev);
-
/* remove any sysfs entries */
if (regulator->dev) {
sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name);
@@ -1039,12 +1142,7 @@ int regulator_enable(struct regulator *regulator)
int ret = 0;
mutex_lock(&rdev->mutex);
- if (regulator->enabled == 0)
- ret = _regulator_enable(rdev);
- else if (regulator->enabled < 0)
- ret = -EIO;
- if (ret == 0)
- regulator->enabled++;
+ ret = _regulator_enable(rdev);
mutex_unlock(&rdev->mutex);
return ret;
}
@@ -1055,6 +1153,11 @@ static int _regulator_disable(struct regulator_dev *rdev)
{
int ret = 0;
+ if (WARN(rdev->use_count <= 0,
+ "unbalanced disables for %s\n",
+ rdev->desc->name))
+ return -EIO;
+
/* are we the last user and permitted to disable ? */
if (rdev->use_count == 1 && !rdev->constraints->always_on) {
@@ -1103,16 +1206,7 @@ int regulator_disable(struct regulator *regulator)
int ret = 0;
mutex_lock(&rdev->mutex);
- if (regulator->enabled == 1) {
- ret = _regulator_disable(rdev);
- if (ret == 0)
- regulator->uA_load = 0;
- } else if (WARN(regulator->enabled <= 0,
- "unbalanced disables for supply %s\n",
- regulator->supply_name))
- ret = -EIO;
- if (ret == 0)
- regulator->enabled--;
+ ret = _regulator_disable(rdev);
mutex_unlock(&rdev->mutex);
return ret;
}
@@ -1159,7 +1253,6 @@ int regulator_force_disable(struct regulator *regulator)
int ret;
mutex_lock(&regulator->rdev->mutex);
- regulator->enabled = 0;
regulator->uA_load = 0;
ret = _regulator_force_disable(regulator->rdev);
mutex_unlock(&regulator->rdev->mutex);
@@ -1204,6 +1297,56 @@ int regulator_is_enabled(struct regulator *regulator)
EXPORT_SYMBOL_GPL(regulator_is_enabled);
/**
+ * regulator_count_voltages - count regulator_list_voltage() selectors
+ * @regulator: regulator source
+ *
+ * Returns number of selectors, or negative errno. Selectors are
+ * numbered starting at zero, and typically correspond to bitfields
+ * in hardware registers.
+ */
+int regulator_count_voltages(struct regulator *regulator)
+{
+ struct regulator_dev *rdev = regulator->rdev;
+
+ return rdev->desc->n_voltages ? : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(regulator_count_voltages);
+
+/**
+ * regulator_list_voltage - enumerate supported voltages
+ * @regulator: regulator source
+ * @selector: identify voltage to list
+ * Context: can sleep
+ *
+ * Returns a voltage that can be passed to @regulator_set_voltage(),
+ * zero if this selector code can't be used on this sytem, or a
+ * negative errno.
+ */
+int regulator_list_voltage(struct regulator *regulator, unsigned selector)
+{
+ struct regulator_dev *rdev = regulator->rdev;
+ struct regulator_ops *ops = rdev->desc->ops;
+ int ret;
+
+ if (!ops->list_voltage || selector >= rdev->desc->n_voltages)
+ return -EINVAL;
+
+ mutex_lock(&rdev->mutex);
+ ret = ops->list_voltage(rdev, selector);
+ mutex_unlock(&rdev->mutex);
+
+ if (ret > 0) {
+ if (ret < rdev->constraints->min_uV)
+ ret = 0;
+ else if (ret > rdev->constraints->max_uV)
+ ret = 0;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_list_voltage);
+
+/**
* regulator_set_voltage - set regulator output voltage
* @regulator: regulator source
* @min_uV: Minimum required voltage in uV
@@ -1243,6 +1386,7 @@ int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV)
ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV);
out:
+ _notifier_call_chain(rdev, REGULATOR_EVENT_VOLTAGE_CHANGE, NULL);
mutex_unlock(&rdev->mutex);
return ret;
}
@@ -1543,20 +1687,23 @@ int regulator_unregister_notifier(struct regulator *regulator,
}
EXPORT_SYMBOL_GPL(regulator_unregister_notifier);
-/* notify regulator consumers and downstream regulator consumers */
+/* notify regulator consumers and downstream regulator consumers.
+ * Note mutex must be held by caller.
+ */
static void _notifier_call_chain(struct regulator_dev *rdev,
unsigned long event, void *data)
{
struct regulator_dev *_rdev;
/* call rdev chain first */
- mutex_lock(&rdev->mutex);
blocking_notifier_call_chain(&rdev->notifier, event, NULL);
- mutex_unlock(&rdev->mutex);
/* now notify regulator we supply */
- list_for_each_entry(_rdev, &rdev->supply_list, slist)
- _notifier_call_chain(_rdev, event, data);
+ list_for_each_entry(_rdev, &rdev->supply_list, slist) {
+ mutex_lock(&_rdev->mutex);
+ _notifier_call_chain(_rdev, event, data);
+ mutex_unlock(&_rdev->mutex);
+ }
}
/**
@@ -1703,6 +1850,7 @@ EXPORT_SYMBOL_GPL(regulator_bulk_free);
*
* Called by regulator drivers to notify clients a regulator event has
* occurred. We also notify regulator clients downstream.
+ * Note lock must be held by caller.
*/
int regulator_notifier_call_chain(struct regulator_dev *rdev,
unsigned long event, void *data)
@@ -1744,6 +1892,11 @@ static int add_regulator_attributes(struct regulator_dev *rdev)
if (status < 0)
return status;
}
+ if (ops->get_status) {
+ status = device_create_file(dev, &dev_attr_status);
+ if (status < 0)
+ return status;
+ }
/* some attributes are type-specific */
if (rdev->desc->type == REGULATOR_CURRENT) {
@@ -1828,17 +1981,18 @@ static int add_regulator_attributes(struct regulator_dev *rdev)
* regulator_register - register regulator
* @regulator_desc: regulator to register
* @dev: struct device for the regulator
+ * @init_data: platform provided init data, passed through by driver
* @driver_data: private regulator data
*
* Called by regulator drivers to register a regulator.
* Returns 0 on success.
*/
struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
- struct device *dev, void *driver_data)
+ struct device *dev, struct regulator_init_data *init_data,
+ void *driver_data)
{
static atomic_t regulator_no = ATOMIC_INIT(0);
struct regulator_dev *rdev;
- struct regulator_init_data *init_data = dev->platform_data;
int ret, i;
if (regulator_desc == NULL)
@@ -1945,6 +2099,7 @@ void regulator_unregister(struct regulator_dev *rdev)
return;
mutex_lock(&regulator_list_mutex);
+ unset_regulator_supplies(rdev);
list_del(&rdev->list);
if (rdev->supply)
sysfs_remove_link(&rdev->dev.kobj, "supply");
@@ -1989,6 +2144,23 @@ out:
EXPORT_SYMBOL_GPL(regulator_suspend_prepare);
/**
+ * regulator_has_full_constraints - the system has fully specified constraints
+ *
+ * Calling this function will cause the regulator API to disable all
+ * regulators which have a zero use count and don't have an always_on
+ * constraint in a late_initcall.
+ *
+ * The intention is that this will become the default behaviour in a
+ * future kernel release so users are encouraged to use this facility
+ * now.
+ */
+void regulator_has_full_constraints(void)
+{
+ has_full_constraints = 1;
+}
+EXPORT_SYMBOL_GPL(regulator_has_full_constraints);
+
+/**
* rdev_get_drvdata - get rdev regulator driver data
* @rdev: regulator
*
@@ -2055,3 +2227,77 @@ static int __init regulator_init(void)
/* init early to allow our consumers to complete system booting */
core_initcall(regulator_init);
+
+static int __init regulator_init_complete(void)
+{
+ struct regulator_dev *rdev;
+ struct regulator_ops *ops;
+ struct regulation_constraints *c;
+ int enabled, ret;
+ const char *name;
+
+ mutex_lock(&regulator_list_mutex);
+
+ /* If we have a full configuration then disable any regulators
+ * which are not in use or always_on. This will become the
+ * default behaviour in the future.
+ */
+ list_for_each_entry(rdev, &regulator_list, list) {
+ ops = rdev->desc->ops;
+ c = rdev->constraints;
+
+ if (c->name)
+ name = c->name;
+ else if (rdev->desc->name)
+ name = rdev->desc->name;
+ else
+ name = "regulator";
+
+ if (!ops->disable || c->always_on)
+ continue;
+
+ mutex_lock(&rdev->mutex);
+
+ if (rdev->use_count)
+ goto unlock;
+
+ /* If we can't read the status assume it's on. */
+ if (ops->is_enabled)
+ enabled = ops->is_enabled(rdev);
+ else
+ enabled = 1;
+
+ if (!enabled)
+ goto unlock;
+
+ if (has_full_constraints) {
+ /* We log since this may kill the system if it
+ * goes wrong. */
+ printk(KERN_INFO "%s: disabling %s\n",
+ __func__, name);
+ ret = ops->disable(rdev);
+ if (ret != 0) {
+ printk(KERN_ERR
+ "%s: couldn't disable %s: %d\n",
+ __func__, name, ret);
+ }
+ } else {
+ /* The intention is that in future we will
+ * assume that full constraints are provided
+ * so warn even if we aren't going to do
+ * anything here.
+ */
+ printk(KERN_WARNING
+ "%s: incomplete constraints, leaving %s on\n",
+ __func__, name);
+ }
+
+unlock:
+ mutex_unlock(&rdev->mutex);
+ }
+
+ mutex_unlock(&regulator_list_mutex);
+
+ return 0;
+}
+late_initcall(regulator_init_complete);
diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index fe77730a7ed..72b15495183 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -471,7 +471,8 @@ static int __devinit da903x_regulator_probe(struct platform_device *pdev)
if (ri->desc.id == DA9030_ID_LDO1 || ri->desc.id == DA9030_ID_LDO15)
ri->desc.ops = &da9030_regulator_ldo1_15_ops;
- rdev = regulator_register(&ri->desc, &pdev->dev, ri);
+ rdev = regulator_register(&ri->desc, &pdev->dev,
+ pdev->dev.platform_data, ri);
if (IS_ERR(rdev)) {
dev_err(&pdev->dev, "failed to register regulator %s\n",
ri->desc.name);
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index d31db3e1491..23d554628a7 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -73,7 +73,8 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
drvdata->microvolts = config->microvolts;
- drvdata->dev = regulator_register(&drvdata->desc, drvdata);
+ drvdata->dev = regulator_register(&drvdata->desc, &pdev->dev,
+ config->init_data, drvdata);
if (IS_ERR(drvdata->dev)) {
ret = PTR_ERR(drvdata->dev);
goto err_name;
diff --git a/drivers/regulator/pcf50633-regulator.c b/drivers/regulator/pcf50633-regulator.c
index 4cc85ec6e12..cd761d85c8f 100644
--- a/drivers/regulator/pcf50633-regulator.c
+++ b/drivers/regulator/pcf50633-regulator.c
@@ -284,7 +284,8 @@ static int __devinit pcf50633_regulator_probe(struct platform_device *pdev)
/* Already set by core driver */
pcf = platform_get_drvdata(pdev);
- rdev = regulator_register(&regulators[pdev->id], &pdev->dev, pcf);
+ rdev = regulator_register(&regulators[pdev->id], &pdev->dev,
+ pdev->dev.platform_data, pcf);
if (IS_ERR(rdev))
return PTR_ERR(rdev);
diff --git a/drivers/regulator/twl4030-regulator.c b/drivers/regulator/twl4030-regulator.c
new file mode 100644
index 00000000000..e2032fb60b5
--- /dev/null
+++ b/drivers/regulator/twl4030-regulator.c
@@ -0,0 +1,500 @@
+/*
+ * twl4030-regulator.c -- support regulators in twl4030 family chips
+ *
+ * Copyright (C) 2008 David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/i2c/twl4030.h>
+
+
+/*
+ * The TWL4030/TW5030/TPS659x0 family chips include power management, a
+ * USB OTG transceiver, an RTC, ADC, PWM, and lots more. Some versions
+ * include an audio codec, battery charger, and more voltage regulators.
+ * These chips are often used in OMAP-based systems.
+ *
+ * This driver implements software-based resource control for various
+ * voltage regulators. This is usually augmented with state machine
+ * based control.
+ */
+
+struct twlreg_info {
+ /* start of regulator's PM_RECEIVER control register bank */
+ u8 base;
+
+ /* twl4030 resource ID, for resource control state machine */
+ u8 id;
+
+ /* voltage in mV = table[VSEL]; table_len must be a power-of-two */
+ u8 table_len;
+ const u16 *table;
+
+ /* chip constraints on regulator behavior */
+ u16 min_mV;
+
+ /* used by regulator core */
+ struct regulator_desc desc;
+};
+
+
+/* LDO control registers ... offset is from the base of its register bank.
+ * The first three registers of all power resource banks help hardware to
+ * manage the various resource groups.
+ */
+#define VREG_GRP 0
+#define VREG_TYPE 1
+#define VREG_REMAP 2
+#define VREG_DEDICATED 3 /* LDO control */
+
+
+static inline int
+twl4030reg_read(struct twlreg_info *info, unsigned offset)
+{
+ u8 value;
+ int status;
+
+ status = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER,
+ &value, info->base + offset);
+ return (status < 0) ? status : value;
+}
+
+static inline int
+twl4030reg_write(struct twlreg_info *info, unsigned offset, u8 value)
+{
+ return twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+ value, info->base + offset);
+}
+
+/*----------------------------------------------------------------------*/
+
+/* generic power resource operations, which work on all regulators */
+
+static int twl4030reg_grp(struct regulator_dev *rdev)
+{
+ return twl4030reg_read(rdev_get_drvdata(rdev), VREG_GRP);
+}
+
+/*
+ * Enable/disable regulators by joining/leaving the P1 (processor) group.
+ * We assume nobody else is updating the DEV_GRP registers.
+ */
+
+#define P3_GRP BIT(7) /* "peripherals" */
+#define P2_GRP BIT(6) /* secondary processor, modem, etc */
+#define P1_GRP BIT(5) /* CPU/Linux */
+
+static int twl4030reg_is_enabled(struct regulator_dev *rdev)
+{
+ int state = twl4030reg_grp(rdev);
+
+ if (state < 0)
+ return state;
+
+ return (state & P1_GRP) != 0;
+}
+
+static int twl4030reg_enable(struct regulator_dev *rdev)
+{
+ struct twlreg_info *info = rdev_get_drvdata(rdev);
+ int grp;
+
+ grp = twl4030reg_read(info, VREG_GRP);
+ if (grp < 0)
+ return grp;
+
+ grp |= P1_GRP;
+ return twl4030reg_write(info, VREG_GRP, grp);
+}
+
+static int twl4030reg_disable(struct regulator_dev *rdev)
+{
+ struct twlreg_info *info = rdev_get_drvdata(rdev);
+ int grp;
+
+ grp = twl4030reg_read(info, VREG_GRP);
+ if (grp < 0)
+ return grp;
+
+ grp &= ~P1_GRP;
+ return twl4030reg_write(info, VREG_GRP, grp);
+}
+
+static int twl4030reg_get_status(struct regulator_dev *rdev)
+{
+ int state = twl4030reg_grp(rdev);
+
+ if (state < 0)
+ return state;
+ state &= 0x0f;
+
+ /* assume state != WARM_RESET; we'd not be running... */
+ if (!state)
+ return REGULATOR_STATUS_OFF;
+ return (state & BIT(3))
+ ? REGULATOR_STATUS_NORMAL
+ : REGULATOR_STATUS_STANDBY;
+}
+
+static int twl4030reg_set_mode(struct regulator_dev *rdev, unsigned mode)
+{
+ struct twlreg_info *info = rdev_get_drvdata(rdev);
+ unsigned message;
+ int status;
+
+ /* We can only set the mode through state machine commands... */
+ switch (mode) {
+ case REGULATOR_MODE_NORMAL:
+ message = MSG_SINGULAR(DEV_GRP_P1, info->id, RES_STATE_ACTIVE);
+ break;
+ case REGULATOR_MODE_STANDBY:
+ message = MSG_SINGULAR(DEV_GRP_P1, info->id, RES_STATE_SLEEP);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Ensure the resource is associated with some group */
+ status = twl4030reg_grp(rdev);
+ if (status < 0)
+ return status;
+ if (!(status & (P3_GRP | P2_GRP | P1_GRP)))
+ return -EACCES;
+
+ status = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
+ message >> 8, 0x15 /* PB_WORD_MSB */ );
+ if (status >= 0)
+ return status;
+
+ return twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
+ message, 0x16 /* PB_WORD_LSB */ );
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Support for adjustable-voltage LDOs uses a four bit (or less) voltage
+ * select field in its control register. We use tables indexed by VSEL
+ * to record voltages in milliVolts. (Accuracy is about three percent.)
+ *
+ * Note that VSEL values for VAUX2 changed in twl5030 and newer silicon;
+ * currently handled by listing two slightly different VAUX2 regulators,
+ * only one of which will be configured.
+ *
+ * VSEL values documented as "TI cannot support these values" are flagged
+ * in these tables as UNSUP() values; we normally won't assign them.
+ *
+ * VAUX3 at 3V is incorrectly listed in some TI manuals as unsupported.
+ * TI are revising the twl5030/tps659x0 specs to support that 3.0V setting.
+ */
+#ifdef CONFIG_TWL4030_ALLOW_UNSUPPORTED
+#define UNSUP_MASK 0x0000
+#else
+#define UNSUP_MASK 0x8000
+#endif
+
+#define UNSUP(x) (UNSUP_MASK | (x))
+#define IS_UNSUP(x) (UNSUP_MASK & (x))
+#define LDO_MV(x) (~UNSUP_MASK & (x))
+
+
+static const u16 VAUX1_VSEL_table[] = {
+ UNSUP(1500), UNSUP(1800), 2500, 2800,
+ 3000, 3000, 3000, 3000,
+};
+static const u16 VAUX2_4030_VSEL_table[] = {
+ UNSUP(1000), UNSUP(1000), UNSUP(1200), 1300,
+ 1500, 1800, UNSUP(1850), 2500,
+ UNSUP(2600), 2800, UNSUP(2850), UNSUP(3000),
+ UNSUP(3150), UNSUP(3150), UNSUP(3150), UNSUP(3150),
+};
+static const u16 VAUX2_VSEL_table[] = {
+ 1700, 1700, 1900, 1300,
+ 1500, 1800, 2000, 2500,
+ 2100, 2800, 2200, 2300,
+ 2400, 2400, 2400, 2400,
+};
+static const u16 VAUX3_VSEL_table[] = {
+ 1500, 1800, 2500, 2800,
+ 3000, 3000, 3000, 3000,
+};
+static const u16 VAUX4_VSEL_table[] = {
+ 700, 1000, 1200, UNSUP(1300),
+ 1500, 1800, UNSUP(1850), 2500,
+ UNSUP(2600), 2800, UNSUP(2850), UNSUP(3000),
+ UNSUP(3150), UNSUP(3150), UNSUP(3150), UNSUP(3150),
+};
+static const u16 VMMC1_VSEL_table[] = {
+ 1850, 2850, 3000, 3150,
+};
+static const u16 VMMC2_VSEL_table[] = {
+ UNSUP(1000), UNSUP(1000), UNSUP(1200), UNSUP(1300),
+ UNSUP(1500), UNSUP(1800), 1850, UNSUP(2500),
+ 2600, 2800, 2850, 3000,
+ 3150, 3150, 3150, 3150,
+};
+static const u16 VPLL1_VSEL_table[] = {
+ 1000, 1200, 1300, 1800,
+ UNSUP(2800), UNSUP(3000), UNSUP(3000), UNSUP(3000),
+};
+static const u16 VPLL2_VSEL_table[] = {
+ 700, 1000, 1200, 1300,
+ UNSUP(1500), 1800, UNSUP(1850), UNSUP(2500),
+ UNSUP(2600), UNSUP(2800), UNSUP(2850), UNSUP(3000),
+ UNSUP(3150), UNSUP(3150), UNSUP(3150), UNSUP(3150),
+};
+static const u16 VSIM_VSEL_table[] = {
+ UNSUP(1000), UNSUP(1200), UNSUP(1300), 1800,
+ 2800, 3000, 3000, 3000,
+};
+static const u16 VDAC_VSEL_table[] = {
+ 1200, 1300, 1800, 1800,
+};
+
+
+static int twl4030ldo_list_voltage(struct regulator_dev *rdev, unsigned index)
+{
+ struct twlreg_info *info = rdev_get_drvdata(rdev);
+ int mV = info->table[index];
+
+ return IS_UNSUP(mV) ? 0 : (LDO_MV(mV) * 1000);
+}
+
+static int
+twl4030ldo_set_voltage(struct regulator_dev *rdev, int min_uV, int max_uV)
+{
+ struct twlreg_info *info = rdev_get_drvdata(rdev);
+ int vsel;
+
+ for (vsel = 0; vsel < info->table_len; vsel++) {
+ int mV = info->table[vsel];
+ int uV;
+
+ if (IS_UNSUP(mV))
+ continue;
+ uV = LDO_MV(mV) * 1000;
+
+ /* REVISIT for VAUX2, first match may not be best/lowest */
+
+ /* use the first in-range value */
+ if (min_uV <= uV && uV <= max_uV)
+ return twl4030reg_write(info, VREG_DEDICATED, vsel);
+ }
+
+ return -EDOM;
+}
+
+static int twl4030ldo_get_voltage(struct regulator_dev *rdev)
+{
+ struct twlreg_info *info = rdev_get_drvdata(rdev);
+ int vsel = twl4030reg_read(info, VREG_DEDICATED);
+
+ if (vsel < 0)
+ return vsel;
+
+ vsel &= info->table_len - 1;
+ return LDO_MV(info->table[vsel]) * 1000;
+}
+
+static struct regulator_ops twl4030ldo_ops = {
+ .list_voltage = twl4030ldo_list_voltage,
+
+ .set_voltage = twl4030ldo_set_voltage,
+ .get_voltage = twl4030ldo_get_voltage,
+
+ .enable = twl4030reg_enable,
+ .disable = twl4030reg_disable,
+ .is_enabled = twl4030reg_is_enabled,
+
+ .set_mode = twl4030reg_set_mode,
+
+ .get_status = twl4030reg_get_status,
+};
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Fixed voltage LDOs don't have a VSEL field to update.
+ */
+static int twl4030fixed_list_voltage(struct regulator_dev *rdev, unsigned index)
+{
+ struct twlreg_info *info = rdev_get_drvdata(rdev);
+
+ return info->min_mV * 1000;
+}
+
+static int twl4030fixed_get_voltage(struct regulator_dev *rdev)
+{
+ struct twlreg_info *info = rdev_get_drvdata(rdev);
+
+ return info->min_mV * 1000;
+}
+
+static struct regulator_ops twl4030fixed_ops = {
+ .list_voltage = twl4030fixed_list_voltage,
+
+ .get_voltage = twl4030fixed_get_voltage,
+
+ .enable = twl4030reg_enable,
+ .disable = twl4030reg_disable,
+ .is_enabled = twl4030reg_is_enabled,
+
+ .set_mode = twl4030reg_set_mode,
+
+ .get_status = twl4030reg_get_status,
+};
+
+/*----------------------------------------------------------------------*/
+
+#define TWL_ADJUSTABLE_LDO(label, offset, num) { \
+ .base = offset, \
+ .id = num, \
+ .table_len = ARRAY_SIZE(label##_VSEL_table), \
+ .table = label##_VSEL_table, \
+ .desc = { \
+ .name = #label, \
+ .id = TWL4030_REG_##label, \
+ .n_voltages = ARRAY_SIZE(label##_VSEL_table), \
+ .ops = &twl4030ldo_ops, \
+ .type = REGULATOR_VOLTAGE, \
+ .owner = THIS_MODULE, \
+ }, \
+ }
+
+#define TWL_FIXED_LDO(label, offset, mVolts, num) { \
+ .base = offset, \
+ .id = num, \
+ .min_mV = mVolts, \
+ .desc = { \
+ .name = #label, \
+ .id = TWL4030_REG_##label, \
+ .n_voltages = 1, \
+ .ops = &twl4030fixed_ops, \
+ .type = REGULATOR_VOLTAGE, \
+ .owner = THIS_MODULE, \
+ }, \
+ }
+
+/*
+ * We list regulators here if systems need some level of
+ * software control over them after boot.
+ */
+static struct twlreg_info twl4030_regs[] = {
+ TWL_ADJUSTABLE_LDO(VAUX1, 0x17, 1),
+ TWL_ADJUSTABLE_LDO(VAUX2_4030, 0x1b, 2),
+ TWL_ADJUSTABLE_LDO(VAUX2, 0x1b, 2),
+ TWL_ADJUSTABLE_LDO(VAUX3, 0x1f, 3),
+ TWL_ADJUSTABLE_LDO(VAUX4, 0x23, 4),
+ TWL_ADJUSTABLE_LDO(VMMC1, 0x27, 5),
+ TWL_ADJUSTABLE_LDO(VMMC2, 0x2b, 6),
+ /*
+ TWL_ADJUSTABLE_LDO(VPLL1, 0x2f, 7),
+ */
+ TWL_ADJUSTABLE_LDO(VPLL2, 0x33, 8),
+ TWL_ADJUSTABLE_LDO(VSIM, 0x37, 9),
+ TWL_ADJUSTABLE_LDO(VDAC, 0x3b, 10),
+ /*
+ TWL_ADJUSTABLE_LDO(VINTANA1, 0x3f, 11),
+ TWL_ADJUSTABLE_LDO(VINTANA2, 0x43, 12),
+ TWL_ADJUSTABLE_LDO(VINTDIG, 0x47, 13),
+ TWL_SMPS(VIO, 0x4b, 14),
+ TWL_SMPS(VDD1, 0x55, 15),
+ TWL_SMPS(VDD2, 0x63, 16),
+ */
+ TWL_FIXED_LDO(VUSB1V5, 0x71, 1500, 17),
+ TWL_FIXED_LDO(VUSB1V8, 0x74, 1800, 18),
+ TWL_FIXED_LDO(VUSB3V1, 0x77, 3100, 19),
+ /* VUSBCP is managed *only* by the USB subchip */
+};
+
+static int twl4030reg_probe(struct platform_device *pdev)
+{
+ int i;
+ struct twlreg_info *info;
+ struct regulator_init_data *initdata;
+ struct regulation_constraints *c;
+ struct regulator_dev *rdev;
+
+ for (i = 0, info = NULL; i < ARRAY_SIZE(twl4030_regs); i++) {
+ if (twl4030_regs[i].desc.id != pdev->id)
+ continue;
+ info = twl4030_regs + i;
+ break;
+ }
+ if (!info)
+ return -ENODEV;
+
+ initdata = pdev->dev.platform_data;
+ if (!initdata)
+ return -EINVAL;
+
+ /* Constrain board-specific capabilities according to what
+ * this driver and the chip itself can actually do.
+ */
+ c = &initdata->constraints;
+ c->valid_modes_mask &= REGULATOR_MODE_NORMAL | REGULATOR_MODE_STANDBY;
+ c->valid_ops_mask &= REGULATOR_CHANGE_VOLTAGE
+ | REGULATOR_CHANGE_MODE
+ | REGULATOR_CHANGE_STATUS;
+
+ rdev = regulator_register(&info->desc, &pdev->dev, initdata, info);
+ if (IS_ERR(rdev)) {
+ dev_err(&pdev->dev, "can't register %s, %ld\n",
+ info->desc.name, PTR_ERR(rdev));
+ return PTR_ERR(rdev);
+ }
+ platform_set_drvdata(pdev, rdev);
+
+ /* NOTE: many regulators support short-circuit IRQs (presentable
+ * as REGULATOR_OVER_CURRENT notifications?) configured via:
+ * - SC_CONFIG
+ * - SC_DETECT1 (vintana2, vmmc1/2, vaux1/2/3/4)
+ * - SC_DETECT2 (vusb, vdac, vio, vdd1/2, vpll2)
+ * - IT_CONFIG
+ */
+
+ return 0;
+}
+
+static int __devexit twl4030reg_remove(struct platform_device *pdev)
+{
+ regulator_unregister(platform_get_drvdata(pdev));
+ return 0;
+}
+
+MODULE_ALIAS("platform:twl4030_reg");
+
+static struct platform_driver twl4030reg_driver = {
+ .probe = twl4030reg_probe,
+ .remove = __devexit_p(twl4030reg_remove),
+ /* NOTE: short name, to work around driver model truncation of
+ * "twl4030_regulator.12" (and friends) to "twl4030_regulator.1".
+ */
+ .driver.name = "twl4030_reg",
+ .driver.owner = THIS_MODULE,
+};
+
+static int __init twl4030reg_init(void)
+{
+ return platform_driver_register(&twl4030reg_driver);
+}
+subsys_initcall(twl4030reg_init);
+
+static void __exit twl4030reg_exit(void)
+{
+ platform_driver_unregister(&twl4030reg_driver);
+}
+module_exit(twl4030reg_exit)
+
+MODULE_DESCRIPTION("TWL4030 regulator driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/virtual.c b/drivers/regulator/virtual.c
index 5ddb464b1c3..3d08348584e 100644
--- a/drivers/regulator/virtual.c
+++ b/drivers/regulator/virtual.c
@@ -226,13 +226,17 @@ static ssize_t set_mode(struct device *dev, struct device_attribute *attr,
unsigned int mode;
int ret;
- if (strncmp(buf, "fast", strlen("fast")) == 0)
+ /*
+ * sysfs_streq() doesn't need the \n's, but we add them so the strings
+ * will be shared with show_mode(), above.
+ */
+ if (sysfs_streq(buf, "fast\n") == 0)
mode = REGULATOR_MODE_FAST;
- else if (strncmp(buf, "normal", strlen("normal")) == 0)
+ else if (sysfs_streq(buf, "normal\n") == 0)
mode = REGULATOR_MODE_NORMAL;
- else if (strncmp(buf, "idle", strlen("idle")) == 0)
+ else if (sysfs_streq(buf, "idle\n") == 0)
mode = REGULATOR_MODE_IDLE;
- else if (strncmp(buf, "standby", strlen("standby")) == 0)
+ else if (sysfs_streq(buf, "standby\n") == 0)
mode = REGULATOR_MODE_STANDBY;
else {
dev_err(dev, "Configuring invalid mode\n");
@@ -256,7 +260,7 @@ static DEVICE_ATTR(min_microamps, 0666, show_min_uA, set_min_uA);
static DEVICE_ATTR(max_microamps, 0666, show_max_uA, set_max_uA);
static DEVICE_ATTR(mode, 0666, show_mode, set_mode);
-struct device_attribute *attributes[] = {
+static struct device_attribute *attributes[] = {
&dev_attr_min_microvolts,
&dev_attr_max_microvolts,
&dev_attr_min_microamps,
diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c
index 5056e23e441..771eca1066b 100644
--- a/drivers/regulator/wm8350-regulator.c
+++ b/drivers/regulator/wm8350-regulator.c
@@ -24,6 +24,9 @@
#include <linux/regulator/driver.h>
#include <linux/regulator/machine.h>
+/* Maximum value possible for VSEL */
+#define WM8350_DCDC_MAX_VSEL 0x66
+
/* Microamps */
static const int isink_cur[] = {
4,
@@ -385,6 +388,14 @@ static int wm8350_dcdc_get_voltage(struct regulator_dev *rdev)
return wm8350_dcdc_val_to_mvolts(val) * 1000;
}
+static int wm8350_dcdc_list_voltage(struct regulator_dev *rdev,
+ unsigned selector)
+{
+ if (selector > WM8350_DCDC_MAX_VSEL)
+ return -EINVAL;
+ return wm8350_dcdc_val_to_mvolts(selector) * 1000;
+}
+
static int wm8350_dcdc_set_suspend_voltage(struct regulator_dev *rdev, int uV)
{
struct wm8350 *wm8350 = rdev_get_drvdata(rdev);
@@ -775,6 +786,14 @@ static int wm8350_ldo_get_voltage(struct regulator_dev *rdev)
return wm8350_ldo_val_to_mvolts(val) * 1000;
}
+static int wm8350_ldo_list_voltage(struct regulator_dev *rdev,
+ unsigned selector)
+{
+ if (selector > WM8350_LDO1_VSEL_MASK)
+ return -EINVAL;
+ return wm8350_ldo_val_to_mvolts(selector) * 1000;
+}
+
int wm8350_dcdc_set_slot(struct wm8350 *wm8350, int dcdc, u16 start,
u16 stop, u16 fault)
{
@@ -1031,18 +1050,30 @@ static unsigned int wm8350_dcdc_get_mode(struct regulator_dev *rdev)
int dcdc = rdev_get_id(rdev);
u16 mask, sleep, active, force;
int mode = REGULATOR_MODE_NORMAL;
+ int reg;
- if (dcdc < WM8350_DCDC_1 || dcdc > WM8350_DCDC_6)
- return -EINVAL;
-
- if (dcdc == WM8350_DCDC_2 || dcdc == WM8350_DCDC_5)
+ switch (dcdc) {
+ case WM8350_DCDC_1:
+ reg = WM8350_DCDC1_FORCE_PWM;
+ break;
+ case WM8350_DCDC_3:
+ reg = WM8350_DCDC3_FORCE_PWM;
+ break;
+ case WM8350_DCDC_4:
+ reg = WM8350_DCDC4_FORCE_PWM;
+ break;
+ case WM8350_DCDC_6:
+ reg = WM8350_DCDC6_FORCE_PWM;
+ break;
+ default:
return -EINVAL;
+ }
mask = 1 << (dcdc - WM8350_DCDC_1);
active = wm8350_reg_read(wm8350, WM8350_DCDC_ACTIVE_OPTIONS) & mask;
+ force = wm8350_reg_read(wm8350, reg) & WM8350_DCDC1_FORCE_PWM_ENA;
sleep = wm8350_reg_read(wm8350, WM8350_DCDC_SLEEP_OPTIONS) & mask;
- force = wm8350_reg_read(wm8350, WM8350_DCDC1_FORCE_PWM)
- & WM8350_DCDC1_FORCE_PWM_ENA;
+
dev_dbg(wm8350->dev, "mask %x active %x sleep %x force %x",
mask, active, sleep, force);
@@ -1150,6 +1181,7 @@ static int wm8350_ldo_is_enabled(struct regulator_dev *rdev)
static struct regulator_ops wm8350_dcdc_ops = {
.set_voltage = wm8350_dcdc_set_voltage,
.get_voltage = wm8350_dcdc_get_voltage,
+ .list_voltage = wm8350_dcdc_list_voltage,
.enable = wm8350_dcdc_enable,
.disable = wm8350_dcdc_disable,
.get_mode = wm8350_dcdc_get_mode,
@@ -1173,6 +1205,7 @@ static struct regulator_ops wm8350_dcdc2_5_ops = {
static struct regulator_ops wm8350_ldo_ops = {
.set_voltage = wm8350_ldo_set_voltage,
.get_voltage = wm8350_ldo_get_voltage,
+ .list_voltage = wm8350_ldo_list_voltage,
.enable = wm8350_ldo_enable,
.disable = wm8350_ldo_disable,
.is_enabled = wm8350_ldo_is_enabled,
@@ -1197,6 +1230,7 @@ static struct regulator_desc wm8350_reg[NUM_WM8350_REGULATORS] = {
.ops = &wm8350_dcdc_ops,
.irq = WM8350_IRQ_UV_DC1,
.type = REGULATOR_VOLTAGE,
+ .n_voltages = WM8350_DCDC_MAX_VSEL + 1,
.owner = THIS_MODULE,
},
{
@@ -1213,6 +1247,7 @@ static struct regulator_desc wm8350_reg[NUM_WM8350_REGULATORS] = {
.ops = &wm8350_dcdc_ops,
.irq = WM8350_IRQ_UV_DC3,
.type = REGULATOR_VOLTAGE,
+ .n_voltages = WM8350_DCDC_MAX_VSEL + 1,
.owner = THIS_MODULE,
},
{
@@ -1221,6 +1256,7 @@ static struct regulator_desc wm8350_reg[NUM_WM8350_REGULATORS] = {
.ops = &wm8350_dcdc_ops,
.irq = WM8350_IRQ_UV_DC4,
.type = REGULATOR_VOLTAGE,
+ .n_voltages = WM8350_DCDC_MAX_VSEL + 1,
.owner = THIS_MODULE,
},
{
@@ -1237,6 +1273,7 @@ static struct regulator_desc wm8350_reg[NUM_WM8350_REGULATORS] = {
.ops = &wm8350_dcdc_ops,
.irq = WM8350_IRQ_UV_DC6,
.type = REGULATOR_VOLTAGE,
+ .n_voltages = WM8350_DCDC_MAX_VSEL + 1,
.owner = THIS_MODULE,
},
{
@@ -1245,6 +1282,7 @@ static struct regulator_desc wm8350_reg[NUM_WM8350_REGULATORS] = {
.ops = &wm8350_ldo_ops,
.irq = WM8350_IRQ_UV_LDO1,
.type = REGULATOR_VOLTAGE,
+ .n_voltages = WM8350_LDO1_VSEL_MASK + 1,
.owner = THIS_MODULE,
},
{
@@ -1253,6 +1291,7 @@ static struct regulator_desc wm8350_reg[NUM_WM8350_REGULATORS] = {
.ops = &wm8350_ldo_ops,
.irq = WM8350_IRQ_UV_LDO2,
.type = REGULATOR_VOLTAGE,
+ .n_voltages = WM8350_LDO2_VSEL_MASK + 1,
.owner = THIS_MODULE,
},
{
@@ -1261,6 +1300,7 @@ static struct regulator_desc wm8350_reg[NUM_WM8350_REGULATORS] = {
.ops = &wm8350_ldo_ops,
.irq = WM8350_IRQ_UV_LDO3,
.type = REGULATOR_VOLTAGE,
+ .n_voltages = WM8350_LDO3_VSEL_MASK + 1,
.owner = THIS_MODULE,
},
{
@@ -1269,6 +1309,7 @@ static struct regulator_desc wm8350_reg[NUM_WM8350_REGULATORS] = {
.ops = &wm8350_ldo_ops,
.irq = WM8350_IRQ_UV_LDO4,
.type = REGULATOR_VOLTAGE,
+ .n_voltages = WM8350_LDO4_VSEL_MASK + 1,
.owner = THIS_MODULE,
},
{
@@ -1293,6 +1334,7 @@ static void pmic_uv_handler(struct wm8350 *wm8350, int irq, void *data)
{
struct regulator_dev *rdev = (struct regulator_dev *)data;
+ mutex_lock(&rdev->mutex);
if (irq == WM8350_IRQ_CS1 || irq == WM8350_IRQ_CS2)
regulator_notifier_call_chain(rdev,
REGULATOR_EVENT_REGULATION_OUT,
@@ -1301,6 +1343,7 @@ static void pmic_uv_handler(struct wm8350 *wm8350, int irq, void *data)
regulator_notifier_call_chain(rdev,
REGULATOR_EVENT_UNDER_VOLTAGE,
wm8350);
+ mutex_unlock(&rdev->mutex);
}
static int wm8350_regulator_probe(struct platform_device *pdev)
@@ -1333,9 +1376,9 @@ static int wm8350_regulator_probe(struct platform_device *pdev)
break;
}
-
/* register regulator */
rdev = regulator_register(&wm8350_reg[pdev->id], &pdev->dev,
+ pdev->dev.platform_data,
dev_get_drvdata(&pdev->dev));
if (IS_ERR(rdev)) {
dev_err(&pdev->dev, "failed to register %s\n",
diff --git a/drivers/regulator/wm8400-regulator.c b/drivers/regulator/wm8400-regulator.c
index 56e23d44ba5..15742602907 100644
--- a/drivers/regulator/wm8400-regulator.c
+++ b/drivers/regulator/wm8400-regulator.c
@@ -43,6 +43,18 @@ static int wm8400_ldo_disable(struct regulator_dev *dev)
WM8400_LDO1_ENA, 0);
}
+static int wm8400_ldo_list_voltage(struct regulator_dev *dev,
+ unsigned selector)
+{
+ if (selector > WM8400_LDO1_VSEL_MASK)
+ return -EINVAL;
+
+ if (selector < 15)
+ return 900000 + (selector * 50000);
+ else
+ return 1600000 + ((selector - 14) * 100000);
+}
+
static int wm8400_ldo_get_voltage(struct regulator_dev *dev)
{
struct wm8400 *wm8400 = rdev_get_drvdata(dev);
@@ -51,10 +63,7 @@ static int wm8400_ldo_get_voltage(struct regulator_dev *dev)
val = wm8400_reg_read(wm8400, WM8400_LDO1_CONTROL + rdev_get_id(dev));
val &= WM8400_LDO1_VSEL_MASK;
- if (val < 15)
- return 900000 + (val * 50000);
- else
- return 1600000 + ((val - 14) * 100000);
+ return wm8400_ldo_list_voltage(dev, val);
}
static int wm8400_ldo_set_voltage(struct regulator_dev *dev,
@@ -92,6 +101,7 @@ static struct regulator_ops wm8400_ldo_ops = {
.is_enabled = wm8400_ldo_is_enabled,
.enable = wm8400_ldo_enable,
.disable = wm8400_ldo_disable,
+ .list_voltage = wm8400_ldo_list_voltage,
.get_voltage = wm8400_ldo_get_voltage,
.set_voltage = wm8400_ldo_set_voltage,
};
@@ -124,6 +134,15 @@ static int wm8400_dcdc_disable(struct regulator_dev *dev)
WM8400_DC1_ENA, 0);
}
+static int wm8400_dcdc_list_voltage(struct regulator_dev *dev,
+ unsigned selector)
+{
+ if (selector > WM8400_DC1_VSEL_MASK)
+ return -EINVAL;
+
+ return 850000 + (selector * 25000);
+}
+
static int wm8400_dcdc_get_voltage(struct regulator_dev *dev)
{
struct wm8400 *wm8400 = rdev_get_drvdata(dev);
@@ -237,6 +256,7 @@ static struct regulator_ops wm8400_dcdc_ops = {
.is_enabled = wm8400_dcdc_is_enabled,
.enable = wm8400_dcdc_enable,
.disable = wm8400_dcdc_disable,
+ .list_voltage = wm8400_dcdc_list_voltage,
.get_voltage = wm8400_dcdc_get_voltage,
.set_voltage = wm8400_dcdc_set_voltage,
.get_mode = wm8400_dcdc_get_mode,
@@ -249,6 +269,7 @@ static struct regulator_desc regulators[] = {
.name = "LDO1",
.id = WM8400_LDO1,
.ops = &wm8400_ldo_ops,
+ .n_voltages = WM8400_LDO1_VSEL_MASK + 1,
.type = REGULATOR_VOLTAGE,
.owner = THIS_MODULE,
},
@@ -256,6 +277,7 @@ static struct regulator_desc regulators[] = {
.name = "LDO2",
.id = WM8400_LDO2,
.ops = &wm8400_ldo_ops,
+ .n_voltages = WM8400_LDO2_VSEL_MASK + 1,
.type = REGULATOR_VOLTAGE,
.owner = THIS_MODULE,
},
@@ -263,6 +285,7 @@ static struct regulator_desc regulators[] = {
.name = "LDO3",
.id = WM8400_LDO3,
.ops = &wm8400_ldo_ops,
+ .n_voltages = WM8400_LDO3_VSEL_MASK + 1,
.type = REGULATOR_VOLTAGE,
.owner = THIS_MODULE,
},
@@ -270,6 +293,7 @@ static struct regulator_desc regulators[] = {
.name = "LDO4",
.id = WM8400_LDO4,
.ops = &wm8400_ldo_ops,
+ .n_voltages = WM8400_LDO4_VSEL_MASK + 1,
.type = REGULATOR_VOLTAGE,
.owner = THIS_MODULE,
},
@@ -277,6 +301,7 @@ static struct regulator_desc regulators[] = {
.name = "DCDC1",
.id = WM8400_DCDC1,
.ops = &wm8400_dcdc_ops,
+ .n_voltages = WM8400_DC1_VSEL_MASK + 1,
.type = REGULATOR_VOLTAGE,
.owner = THIS_MODULE,
},
@@ -284,6 +309,7 @@ static struct regulator_desc regulators[] = {
.name = "DCDC2",
.id = WM8400_DCDC2,
.ops = &wm8400_dcdc_ops,
+ .n_voltages = WM8400_DC2_VSEL_MASK + 1,
.type = REGULATOR_VOLTAGE,
.owner = THIS_MODULE,
},
@@ -294,7 +320,7 @@ static int __devinit wm8400_regulator_probe(struct platform_device *pdev)
struct regulator_dev *rdev;
rdev = regulator_register(&regulators[pdev->id], &pdev->dev,
- pdev->dev.driver_data);
+ pdev->dev.platform_data, pdev->dev.driver_data);
if (IS_ERR(rdev))
return PTR_ERR(rdev);
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 56002f7d26b..ffe34a12f44 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -688,22 +688,16 @@ config RTC_DRV_RS5C313
help
If you say yes here you get support for the Ricoh RS5C313 RTC chips.
-config RTC_DRV_PARISC
- tristate "PA-RISC firmware RTC support"
- depends on PARISC
- help
- Say Y or M here to enable RTC support on PA-RISC systems using
- firmware calls. If you do not know what you are doing, you should
+config RTC_DRV_GENERIC
+ tristate "Generic RTC support"
+ # Please consider writing a new RTC driver instead of using the generic
+ # RTC abstraction
+ depends on PARISC || M68K || PPC
+ help
+ Say Y or M here to enable RTC support on systems using the generic
+ RTC abstraction. If you do not know what you are doing, you should
just say Y.
-config RTC_DRV_PPC
- tristate "PowerPC machine dependent RTC support"
- depends on PPC
- help
- The PowerPC kernel has machine-specific functions for accessing
- the RTC. This exposes that functionality through the generic RTC
- class.
-
config RTC_DRV_PXA
tristate "PXA27x/PXA3xx"
depends on ARCH_PXA
@@ -747,4 +741,13 @@ config RTC_DRV_MV
This driver can also be built as a module. If so, the module
will be called rtc-mv.
+config RTC_DRV_PS3
+ tristate "PS3 RTC"
+ depends on PPC_PS3
+ help
+ If you say yes here you will get support for the RTC on PS3.
+
+ This driver can also be built as a module. If so, the module
+ will be called rtc-ps3.
+
endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index e7b09986d26..6c0639a14f0 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -56,8 +56,7 @@ obj-$(CONFIG_RTC_DRV_PCF8563) += rtc-pcf8563.o
obj-$(CONFIG_RTC_DRV_PCF8583) += rtc-pcf8583.o
obj-$(CONFIG_RTC_DRV_PL030) += rtc-pl030.o
obj-$(CONFIG_RTC_DRV_PL031) += rtc-pl031.o
-obj-$(CONFIG_RTC_DRV_PARISC) += rtc-parisc.o
-obj-$(CONFIG_RTC_DRV_PPC) += rtc-ppc.o
+obj-$(CONFIG_RTC_DRV_GENERIC) += rtc-generic.o
obj-$(CONFIG_RTC_DRV_PXA) += rtc-pxa.o
obj-$(CONFIG_RTC_DRV_R9701) += rtc-r9701.o
obj-$(CONFIG_RTC_DRV_RS5C313) += rtc-rs5c313.o
@@ -77,3 +76,4 @@ obj-$(CONFIG_RTC_DRV_VR41XX) += rtc-vr41xx.o
obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm8350.o
obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o
obj-$(CONFIG_RTC_DRV_PCF50633) += rtc-pcf50633.o
+obj-$(CONFIG_RTC_DRV_PS3) += rtc-ps3.o
diff --git a/drivers/rtc/rtc-generic.c b/drivers/rtc/rtc-generic.c
new file mode 100644
index 00000000000..98322004ad2
--- /dev/null
+++ b/drivers/rtc/rtc-generic.c
@@ -0,0 +1,84 @@
+/* rtc-generic: RTC driver using the generic RTC abstraction
+ *
+ * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+#include <asm/rtc.h>
+
+static int generic_get_time(struct device *dev, struct rtc_time *tm)
+{
+ unsigned int ret = get_rtc_time(tm);
+
+ if (ret & RTC_BATT_BAD)
+ return -EOPNOTSUPP;
+
+ return rtc_valid_tm(tm);
+}
+
+static int generic_set_time(struct device *dev, struct rtc_time *tm)
+{
+ if (set_rtc_time(tm) < 0)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static const struct rtc_class_ops generic_rtc_ops = {
+ .read_time = generic_get_time,
+ .set_time = generic_set_time,
+};
+
+static int __init generic_rtc_probe(struct platform_device *dev)
+{
+ struct rtc_device *rtc;
+
+ rtc = rtc_device_register("rtc-generic", &dev->dev, &generic_rtc_ops,
+ THIS_MODULE);
+ if (IS_ERR(rtc))
+ return PTR_ERR(rtc);
+
+ platform_set_drvdata(dev, rtc);
+
+ return 0;
+}
+
+static int __exit generic_rtc_remove(struct platform_device *dev)
+{
+ struct rtc_device *rtc = platform_get_drvdata(dev);
+
+ rtc_device_unregister(rtc);
+
+ return 0;
+}
+
+static struct platform_driver generic_rtc_driver = {
+ .driver = {
+ .name = "rtc-generic",
+ .owner = THIS_MODULE,
+ },
+ .remove = __exit_p(generic_rtc_remove),
+};
+
+static int __init generic_rtc_init(void)
+{
+ return platform_driver_probe(&generic_rtc_driver, generic_rtc_probe);
+}
+
+static void __exit generic_rtc_fini(void)
+{
+ platform_driver_unregister(&generic_rtc_driver);
+}
+
+module_init(generic_rtc_init);
+module_exit(generic_rtc_fini);
+
+MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Generic RTC driver");
+MODULE_ALIAS("platform:rtc-generic");
diff --git a/drivers/rtc/rtc-parisc.c b/drivers/rtc/rtc-parisc.c
deleted file mode 100644
index b966f56da97..00000000000
--- a/drivers/rtc/rtc-parisc.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/* rtc-parisc: RTC for HP PA-RISC firmware
- *
- * Copyright (C) 2008 Kyle McMartin <kyle@mcmartin.ca>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/platform_device.h>
-#include <linux/rtc.h>
-
-#include <asm/rtc.h>
-
-static int parisc_get_time(struct device *dev, struct rtc_time *tm)
-{
- unsigned long ret;
-
- ret = get_rtc_time(tm);
-
- if (ret & RTC_BATT_BAD)
- return -EOPNOTSUPP;
-
- return rtc_valid_tm(tm);
-}
-
-static int parisc_set_time(struct device *dev, struct rtc_time *tm)
-{
- if (set_rtc_time(tm) < 0)
- return -EOPNOTSUPP;
-
- return 0;
-}
-
-static const struct rtc_class_ops parisc_rtc_ops = {
- .read_time = parisc_get_time,
- .set_time = parisc_set_time,
-};
-
-static int __init parisc_rtc_probe(struct platform_device *dev)
-{
- struct rtc_device *rtc;
-
- rtc = rtc_device_register("rtc-parisc", &dev->dev, &parisc_rtc_ops,
- THIS_MODULE);
- if (IS_ERR(rtc))
- return PTR_ERR(rtc);
-
- platform_set_drvdata(dev, rtc);
-
- return 0;
-}
-
-static int __exit parisc_rtc_remove(struct platform_device *dev)
-{
- struct rtc_device *rtc = platform_get_drvdata(dev);
-
- rtc_device_unregister(rtc);
-
- return 0;
-}
-
-static struct platform_driver parisc_rtc_driver = {
- .driver = {
- .name = "rtc-parisc",
- .owner = THIS_MODULE,
- },
- .probe = parisc_rtc_probe,
- .remove = __devexit_p(parisc_rtc_remove),
-};
-
-static int __init parisc_rtc_init(void)
-{
- return platform_driver_probe(&parisc_rtc_driver, parisc_rtc_probe);
-}
-
-static void __exit parisc_rtc_fini(void)
-{
- platform_driver_unregister(&parisc_rtc_driver);
-}
-
-module_init(parisc_rtc_init);
-module_exit(parisc_rtc_fini);
-
-MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("HP PA-RISC RTC driver");
diff --git a/drivers/rtc/rtc-ppc.c b/drivers/rtc/rtc-ppc.c
deleted file mode 100644
index c8e97e25ef7..00000000000
--- a/drivers/rtc/rtc-ppc.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * RTC driver for ppc_md RTC functions
- *
- * © 2007 Red Hat, Inc.
- *
- * Author: David Woodhouse <dwmw2@infradead.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/rtc.h>
-#include <linux/platform_device.h>
-#include <asm/machdep.h>
-
-static int ppc_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
- ppc_md.get_rtc_time(tm);
- return 0;
-}
-
-static int ppc_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
- return ppc_md.set_rtc_time(tm);
-}
-
-static const struct rtc_class_ops ppc_rtc_ops = {
- .set_time = ppc_rtc_set_time,
- .read_time = ppc_rtc_read_time,
-};
-
-static struct rtc_device *rtc;
-static struct platform_device *ppc_rtc_pdev;
-
-static int __init ppc_rtc_init(void)
-{
- if (!ppc_md.get_rtc_time || !ppc_md.set_rtc_time)
- return -ENODEV;
-
- ppc_rtc_pdev = platform_device_register_simple("ppc-rtc", 0, NULL, 0);
- if (IS_ERR(ppc_rtc_pdev))
- return PTR_ERR(ppc_rtc_pdev);
-
- rtc = rtc_device_register("ppc_md", &ppc_rtc_pdev->dev,
- &ppc_rtc_ops, THIS_MODULE);
- if (IS_ERR(rtc)) {
- platform_device_unregister(ppc_rtc_pdev);
- return PTR_ERR(rtc);
- }
-
- return 0;
-}
-
-static void __exit ppc_rtc_exit(void)
-{
- rtc_device_unregister(rtc);
- platform_device_unregister(ppc_rtc_pdev);
-}
-
-module_init(ppc_rtc_init);
-module_exit(ppc_rtc_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
-MODULE_DESCRIPTION("Generic RTC class driver for PowerPC");
diff --git a/drivers/rtc/rtc-ps3.c b/drivers/rtc/rtc-ps3.c
new file mode 100644
index 00000000000..968133ce1ee
--- /dev/null
+++ b/drivers/rtc/rtc-ps3.c
@@ -0,0 +1,104 @@
+/*
+ * PS3 RTC Driver
+ *
+ * Copyright 2009 Sony Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ * If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+
+static u64 read_rtc(void)
+{
+ int result;
+ u64 rtc_val;
+ u64 tb_val;
+
+ result = lv1_get_rtc(&rtc_val, &tb_val);
+ BUG_ON(result);
+
+ return rtc_val;
+}
+
+static int ps3_get_time(struct device *dev, struct rtc_time *tm)
+{
+ rtc_time_to_tm(read_rtc() + ps3_os_area_get_rtc_diff(), tm);
+ return rtc_valid_tm(tm);
+}
+
+static int ps3_set_time(struct device *dev, struct rtc_time *tm)
+{
+ unsigned long now;
+
+ rtc_tm_to_time(tm, &now);
+ ps3_os_area_set_rtc_diff(now - read_rtc());
+ return 0;
+}
+
+static const struct rtc_class_ops ps3_rtc_ops = {
+ .read_time = ps3_get_time,
+ .set_time = ps3_set_time,
+};
+
+static int __init ps3_rtc_probe(struct platform_device *dev)
+{
+ struct rtc_device *rtc;
+
+ rtc = rtc_device_register("rtc-ps3", &dev->dev, &ps3_rtc_ops,
+ THIS_MODULE);
+ if (IS_ERR(rtc))
+ return PTR_ERR(rtc);
+
+ platform_set_drvdata(dev, rtc);
+ return 0;
+}
+
+static int __exit ps3_rtc_remove(struct platform_device *dev)
+{
+ rtc_device_unregister(platform_get_drvdata(dev));
+ return 0;
+}
+
+static struct platform_driver ps3_rtc_driver = {
+ .driver = {
+ .name = "rtc-ps3",
+ .owner = THIS_MODULE,
+ },
+ .remove = __exit_p(ps3_rtc_remove),
+};
+
+static int __init ps3_rtc_init(void)
+{
+ return platform_driver_probe(&ps3_rtc_driver, ps3_rtc_probe);
+}
+
+static void __exit ps3_rtc_fini(void)
+{
+ platform_driver_unregister(&ps3_rtc_driver);
+}
+
+module_init(ps3_rtc_init);
+module_exit(ps3_rtc_fini);
+
+MODULE_AUTHOR("Sony Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ps3 RTC driver");
+MODULE_ALIAS("platform:rtc-ps3");
diff --git a/drivers/serial/mcf.c b/drivers/serial/mcf.c
index 56841fe5f48..0eefb07beba 100644
--- a/drivers/serial/mcf.c
+++ b/drivers/serial/mcf.c
@@ -513,7 +513,7 @@ static int __init mcf_console_setup(struct console *co, char *options)
int parity = 'n';
int flow = 'n';
- if ((co->index >= 0) && (co->index <= MCF_MAXPORTS))
+ if ((co->index < 0) || (co->index >= MCF_MAXPORTS))
co->index = 0;
port = &mcf_ports[co->index].port;
if (port->membase == 0)
diff --git a/drivers/usb/storage/isd200.c b/drivers/usb/storage/isd200.c
index 882c57b399f..fdba2f69d4c 100644
--- a/drivers/usb/storage/isd200.c
+++ b/drivers/usb/storage/isd200.c
@@ -46,6 +46,7 @@
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/ata.h>
#include <linux/hdreg.h>
#include <linux/scatterlist.h>
@@ -328,7 +329,7 @@ struct isd200_config {
struct isd200_info {
struct inquiry_data InquiryData;
- struct hd_driveid *id;
+ u16 *id;
struct isd200_config ConfigData;
unsigned char *RegsBuf;
unsigned char ATARegs[8];
@@ -419,19 +420,19 @@ static void isd200_build_sense(struct us_data *us, struct scsi_cmnd *srb)
buf->Flags = UNIT_ATTENTION;
buf->AdditionalSenseCode = 0;
buf->AdditionalSenseCodeQualifier = 0;
- } else if(error & MCR_ERR) {
+ } else if (error & ATA_MCR) {
buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID;
buf->AdditionalSenseLength = 0xb;
buf->Flags = UNIT_ATTENTION;
buf->AdditionalSenseCode = 0;
buf->AdditionalSenseCodeQualifier = 0;
- } else if(error & TRK0_ERR) {
+ } else if (error & ATA_TRK0NF) {
buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID;
buf->AdditionalSenseLength = 0xb;
buf->Flags = NOT_READY;
buf->AdditionalSenseCode = 0;
buf->AdditionalSenseCodeQualifier = 0;
- } else if(error & ECC_ERR) {
+ } else if (error & ATA_UNC) {
buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID;
buf->AdditionalSenseLength = 0xb;
buf->Flags = DATA_PROTECT;
@@ -547,16 +548,16 @@ static int isd200_action( struct us_data *us, int action,
ata.generic.ActionSelect = ACTION_SELECT_1|ACTION_SELECT_5;
ata.generic.RegisterSelect = REG_DEVICE_HEAD | REG_COMMAND;
ata.write.DeviceHeadByte = info->DeviceHead;
- ata.write.CommandByte = WIN_SRST;
+ ata.write.CommandByte = ATA_CMD_DEV_RESET;
isd200_set_srb(info, DMA_NONE, NULL, 0);
break;
case ACTION_IDENTIFY:
US_DEBUGP(" isd200_action(IDENTIFY)\n");
ata.generic.RegisterSelect = REG_COMMAND;
- ata.write.CommandByte = WIN_IDENTIFY;
+ ata.write.CommandByte = ATA_CMD_ID_ATA;
isd200_set_srb(info, DMA_FROM_DEVICE, info->id,
- sizeof(struct hd_driveid));
+ ATA_ID_WORDS * 2);
break;
default:
@@ -944,22 +945,22 @@ static int isd200_try_enum(struct us_data *us, unsigned char master_slave,
break;
if (!detect) {
- if (regs[ATA_REG_STATUS_OFFSET] & BUSY_STAT) {
+ if (regs[ATA_REG_STATUS_OFFSET] & ATA_BUSY) {
US_DEBUGP(" %s status is still BSY, try again...\n",mstr);
} else {
US_DEBUGP(" %s status !BSY, continue with next operation\n",mstr);
break;
}
}
- /* check for BUSY_STAT and */
- /* WRERR_STAT (workaround ATA Zip drive) and */
- /* ERR_STAT (workaround for Archos CD-ROM) */
+ /* check for ATA_BUSY and */
+ /* ATA_DF (workaround ATA Zip drive) and */
+ /* ATA_ERR (workaround for Archos CD-ROM) */
else if (regs[ATA_REG_STATUS_OFFSET] &
- (BUSY_STAT | WRERR_STAT | ERR_STAT )) {
+ (ATA_BUSY | ATA_DF | ATA_ERR)) {
US_DEBUGP(" Status indicates it is not ready, try again...\n");
}
/* check for DRDY, ATA devices set DRDY after SRST */
- else if (regs[ATA_REG_STATUS_OFFSET] & READY_STAT) {
+ else if (regs[ATA_REG_STATUS_OFFSET] & ATA_DRDY) {
US_DEBUGP(" Identified ATA device\n");
info->DeviceFlags |= DF_ATA_DEVICE;
info->DeviceHead = master_slave;
@@ -1053,103 +1054,50 @@ static int isd200_manual_enum(struct us_data *us)
return(retStatus);
}
-static void isd200_fix_driveid (struct hd_driveid *id)
+static void isd200_fix_driveid(u16 *id)
{
#ifndef __LITTLE_ENDIAN
# ifdef __BIG_ENDIAN
int i;
- u16 *stringcast;
-
- id->config = __le16_to_cpu(id->config);
- id->cyls = __le16_to_cpu(id->cyls);
- id->reserved2 = __le16_to_cpu(id->reserved2);
- id->heads = __le16_to_cpu(id->heads);
- id->track_bytes = __le16_to_cpu(id->track_bytes);
- id->sector_bytes = __le16_to_cpu(id->sector_bytes);
- id->sectors = __le16_to_cpu(id->sectors);
- id->vendor0 = __le16_to_cpu(id->vendor0);
- id->vendor1 = __le16_to_cpu(id->vendor1);
- id->vendor2 = __le16_to_cpu(id->vendor2);
- stringcast = (u16 *)&id->serial_no[0];
- for (i = 0; i < (20/2); i++)
- stringcast[i] = __le16_to_cpu(stringcast[i]);
- id->buf_type = __le16_to_cpu(id->buf_type);
- id->buf_size = __le16_to_cpu(id->buf_size);
- id->ecc_bytes = __le16_to_cpu(id->ecc_bytes);
- stringcast = (u16 *)&id->fw_rev[0];
- for (i = 0; i < (8/2); i++)
- stringcast[i] = __le16_to_cpu(stringcast[i]);
- stringcast = (u16 *)&id->model[0];
- for (i = 0; i < (40/2); i++)
- stringcast[i] = __le16_to_cpu(stringcast[i]);
- id->dword_io = __le16_to_cpu(id->dword_io);
- id->reserved50 = __le16_to_cpu(id->reserved50);
- id->field_valid = __le16_to_cpu(id->field_valid);
- id->cur_cyls = __le16_to_cpu(id->cur_cyls);
- id->cur_heads = __le16_to_cpu(id->cur_heads);
- id->cur_sectors = __le16_to_cpu(id->cur_sectors);
- id->cur_capacity0 = __le16_to_cpu(id->cur_capacity0);
- id->cur_capacity1 = __le16_to_cpu(id->cur_capacity1);
- id->lba_capacity = __le32_to_cpu(id->lba_capacity);
- id->dma_1word = __le16_to_cpu(id->dma_1word);
- id->dma_mword = __le16_to_cpu(id->dma_mword);
- id->eide_pio_modes = __le16_to_cpu(id->eide_pio_modes);
- id->eide_dma_min = __le16_to_cpu(id->eide_dma_min);
- id->eide_dma_time = __le16_to_cpu(id->eide_dma_time);
- id->eide_pio = __le16_to_cpu(id->eide_pio);
- id->eide_pio_iordy = __le16_to_cpu(id->eide_pio_iordy);
- for (i = 0; i < 2; ++i)
- id->words69_70[i] = __le16_to_cpu(id->words69_70[i]);
- for (i = 0; i < 4; ++i)
- id->words71_74[i] = __le16_to_cpu(id->words71_74[i]);
- id->queue_depth = __le16_to_cpu(id->queue_depth);
- for (i = 0; i < 4; ++i)
- id->words76_79[i] = __le16_to_cpu(id->words76_79[i]);
- id->major_rev_num = __le16_to_cpu(id->major_rev_num);
- id->minor_rev_num = __le16_to_cpu(id->minor_rev_num);
- id->command_set_1 = __le16_to_cpu(id->command_set_1);
- id->command_set_2 = __le16_to_cpu(id->command_set_2);
- id->cfsse = __le16_to_cpu(id->cfsse);
- id->cfs_enable_1 = __le16_to_cpu(id->cfs_enable_1);
- id->cfs_enable_2 = __le16_to_cpu(id->cfs_enable_2);
- id->csf_default = __le16_to_cpu(id->csf_default);
- id->dma_ultra = __le16_to_cpu(id->dma_ultra);
- id->trseuc = __le16_to_cpu(id->trseuc);
- id->trsEuc = __le16_to_cpu(id->trsEuc);
- id->CurAPMvalues = __le16_to_cpu(id->CurAPMvalues);
- id->mprc = __le16_to_cpu(id->mprc);
- id->hw_config = __le16_to_cpu(id->hw_config);
- id->acoustic = __le16_to_cpu(id->acoustic);
- id->msrqs = __le16_to_cpu(id->msrqs);
- id->sxfert = __le16_to_cpu(id->sxfert);
- id->sal = __le16_to_cpu(id->sal);
- id->spg = __le32_to_cpu(id->spg);
- id->lba_capacity_2 = __le64_to_cpu(id->lba_capacity_2);
- for (i = 0; i < 22; i++)
- id->words104_125[i] = __le16_to_cpu(id->words104_125[i]);
- id->last_lun = __le16_to_cpu(id->last_lun);
- id->word127 = __le16_to_cpu(id->word127);
- id->dlf = __le16_to_cpu(id->dlf);
- id->csfo = __le16_to_cpu(id->csfo);
- for (i = 0; i < 26; i++)
- id->words130_155[i] = __le16_to_cpu(id->words130_155[i]);
- id->word156 = __le16_to_cpu(id->word156);
- for (i = 0; i < 3; i++)
- id->words157_159[i] = __le16_to_cpu(id->words157_159[i]);
- id->cfa_power = __le16_to_cpu(id->cfa_power);
- for (i = 0; i < 14; i++)
- id->words161_175[i] = __le16_to_cpu(id->words161_175[i]);
- for (i = 0; i < 31; i++)
- id->words176_205[i] = __le16_to_cpu(id->words176_205[i]);
- for (i = 0; i < 48; i++)
- id->words206_254[i] = __le16_to_cpu(id->words206_254[i]);
- id->integrity_word = __le16_to_cpu(id->integrity_word);
+
+ for (i = 0; i < ATA_ID_WORDS; i++)
+ id[i] = __le16_to_cpu(id[i]);
# else
# error "Please fix <asm/byteorder.h>"
# endif
#endif
}
+static void isd200_dump_driveid(u16 *id)
+{
+ US_DEBUGP(" Identify Data Structure:\n");
+ US_DEBUGP(" config = 0x%x\n", id[ATA_ID_CONFIG]);
+ US_DEBUGP(" cyls = 0x%x\n", id[ATA_ID_CYLS]);
+ US_DEBUGP(" heads = 0x%x\n", id[ATA_ID_HEADS]);
+ US_DEBUGP(" track_bytes = 0x%x\n", id[4]);
+ US_DEBUGP(" sector_bytes = 0x%x\n", id[5]);
+ US_DEBUGP(" sectors = 0x%x\n", id[ATA_ID_SECTORS]);
+ US_DEBUGP(" serial_no[0] = 0x%x\n", *(char *)&id[ATA_ID_SERNO]);
+ US_DEBUGP(" buf_type = 0x%x\n", id[20]);
+ US_DEBUGP(" buf_size = 0x%x\n", id[ATA_ID_BUF_SIZE]);
+ US_DEBUGP(" ecc_bytes = 0x%x\n", id[22]);
+ US_DEBUGP(" fw_rev[0] = 0x%x\n", *(char *)&id[ATA_ID_FW_REV]);
+ US_DEBUGP(" model[0] = 0x%x\n", *(char *)&id[ATA_ID_PROD]);
+ US_DEBUGP(" max_multsect = 0x%x\n", id[ATA_ID_MAX_MULTSECT] & 0xff);
+ US_DEBUGP(" dword_io = 0x%x\n", id[ATA_ID_DWORD_IO]);
+ US_DEBUGP(" capability = 0x%x\n", id[ATA_ID_CAPABILITY] >> 8);
+ US_DEBUGP(" tPIO = 0x%x\n", id[ATA_ID_OLD_PIO_MODES] >> 8);
+ US_DEBUGP(" tDMA = 0x%x\n", id[ATA_ID_OLD_DMA_MODES] >> 8);
+ US_DEBUGP(" field_valid = 0x%x\n", id[ATA_ID_FIELD_VALID]);
+ US_DEBUGP(" cur_cyls = 0x%x\n", id[ATA_ID_CUR_CYLS]);
+ US_DEBUGP(" cur_heads = 0x%x\n", id[ATA_ID_CUR_HEADS]);
+ US_DEBUGP(" cur_sectors = 0x%x\n", id[ATA_ID_CUR_SECTORS]);
+ US_DEBUGP(" cur_capacity = 0x%x\n", ata_id_u32(id, 57));
+ US_DEBUGP(" multsect = 0x%x\n", id[ATA_ID_MULTSECT] & 0xff);
+ US_DEBUGP(" lba_capacity = 0x%x\n", ata_id_u32(id, ATA_ID_LBA_CAPACITY));
+ US_DEBUGP(" command_set_1 = 0x%x\n", id[ATA_ID_COMMAND_SET_1]);
+ US_DEBUGP(" command_set_2 = 0x%x\n", id[ATA_ID_COMMAND_SET_2]);
+}
/**************************************************************************
* isd200_get_inquiry_data
@@ -1163,7 +1111,7 @@ static int isd200_get_inquiry_data( struct us_data *us )
{
struct isd200_info *info = (struct isd200_info *)us->extra;
int retStatus = ISD200_GOOD;
- struct hd_driveid *id = info->id;
+ u16 *id = info->id;
US_DEBUGP("Entering isd200_get_inquiry_data\n");
@@ -1180,8 +1128,7 @@ static int isd200_get_inquiry_data( struct us_data *us )
/* this must be an ATA device */
/* perform an ATA Command Identify */
transferStatus = isd200_action( us, ACTION_IDENTIFY,
- id,
- sizeof(struct hd_driveid) );
+ id, ATA_ID_WORDS * 2);
if (transferStatus != ISD200_TRANSPORT_GOOD) {
/* Error issuing ATA Command Identify */
US_DEBUGP(" Error issuing ATA Command Identify\n");
@@ -1191,35 +1138,9 @@ static int isd200_get_inquiry_data( struct us_data *us )
int i;
__be16 *src;
__u16 *dest;
- isd200_fix_driveid(id);
- US_DEBUGP(" Identify Data Structure:\n");
- US_DEBUGP(" config = 0x%x\n", id->config);
- US_DEBUGP(" cyls = 0x%x\n", id->cyls);
- US_DEBUGP(" heads = 0x%x\n", id->heads);
- US_DEBUGP(" track_bytes = 0x%x\n", id->track_bytes);
- US_DEBUGP(" sector_bytes = 0x%x\n", id->sector_bytes);
- US_DEBUGP(" sectors = 0x%x\n", id->sectors);
- US_DEBUGP(" serial_no[0] = 0x%x\n", id->serial_no[0]);
- US_DEBUGP(" buf_type = 0x%x\n", id->buf_type);
- US_DEBUGP(" buf_size = 0x%x\n", id->buf_size);
- US_DEBUGP(" ecc_bytes = 0x%x\n", id->ecc_bytes);
- US_DEBUGP(" fw_rev[0] = 0x%x\n", id->fw_rev[0]);
- US_DEBUGP(" model[0] = 0x%x\n", id->model[0]);
- US_DEBUGP(" max_multsect = 0x%x\n", id->max_multsect);
- US_DEBUGP(" dword_io = 0x%x\n", id->dword_io);
- US_DEBUGP(" capability = 0x%x\n", id->capability);
- US_DEBUGP(" tPIO = 0x%x\n", id->tPIO);
- US_DEBUGP(" tDMA = 0x%x\n", id->tDMA);
- US_DEBUGP(" field_valid = 0x%x\n", id->field_valid);
- US_DEBUGP(" cur_cyls = 0x%x\n", id->cur_cyls);
- US_DEBUGP(" cur_heads = 0x%x\n", id->cur_heads);
- US_DEBUGP(" cur_sectors = 0x%x\n", id->cur_sectors);
- US_DEBUGP(" cur_capacity = 0x%x\n", (id->cur_capacity1 << 16) + id->cur_capacity0 );
- US_DEBUGP(" multsect = 0x%x\n", id->multsect);
- US_DEBUGP(" lba_capacity = 0x%x\n", id->lba_capacity);
- US_DEBUGP(" command_set_1 = 0x%x\n", id->command_set_1);
- US_DEBUGP(" command_set_2 = 0x%x\n", id->command_set_2);
+ isd200_fix_driveid(id);
+ isd200_dump_driveid(id);
memset(&info->InquiryData, 0, sizeof(info->InquiryData));
@@ -1229,30 +1150,30 @@ static int isd200_get_inquiry_data( struct us_data *us )
/* The length must be at least 36 (5 + 31) */
info->InquiryData.AdditionalLength = 0x1F;
- if (id->command_set_1 & COMMANDSET_MEDIA_STATUS) {
+ if (id[ATA_ID_COMMAND_SET_1] & COMMANDSET_MEDIA_STATUS) {
/* set the removable bit */
info->InquiryData.DeviceTypeModifier = DEVICE_REMOVABLE;
info->DeviceFlags |= DF_REMOVABLE_MEDIA;
}
/* Fill in vendor identification fields */
- src = (__be16*)id->model;
+ src = (__be16 *)&id[ATA_ID_PROD];
dest = (__u16*)info->InquiryData.VendorId;
for (i=0;i<4;i++)
dest[i] = be16_to_cpu(src[i]);
- src = (__be16*)(id->model+8);
+ src = (__be16 *)&id[ATA_ID_PROD + 8/2];
dest = (__u16*)info->InquiryData.ProductId;
for (i=0;i<8;i++)
dest[i] = be16_to_cpu(src[i]);
- src = (__be16*)id->fw_rev;
+ src = (__be16 *)&id[ATA_ID_FW_REV];
dest = (__u16*)info->InquiryData.ProductRevisionLevel;
for (i=0;i<2;i++)
dest[i] = be16_to_cpu(src[i]);
/* determine if it supports Media Status Notification */
- if (id->command_set_2 & COMMANDSET_MEDIA_STATUS) {
+ if (id[ATA_ID_COMMAND_SET_2] & COMMANDSET_MEDIA_STATUS) {
US_DEBUGP(" Device supports Media Status Notification\n");
/* Indicate that it is enabled, even though it is not
@@ -1301,7 +1222,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
union ata_cdb * ataCdb)
{
struct isd200_info *info = (struct isd200_info *)us->extra;
- struct hd_driveid *id = info->id;
+ u16 *id = info->id;
int sendToTransport = 1;
unsigned char sectnum, head;
unsigned short cylinder;
@@ -1369,13 +1290,12 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
US_DEBUGP(" ATA OUT - SCSIOP_READ_CAPACITY\n");
- if (id->capability & CAPABILITY_LBA ) {
- capacity = id->lba_capacity - 1;
- } else {
- capacity = (id->heads *
- id->cyls *
- id->sectors) - 1;
- }
+ if (ata_id_has_lba(id))
+ capacity = ata_id_u32(id, ATA_ID_LBA_CAPACITY) - 1;
+ else
+ capacity = (id[ATA_ID_HEADS] * id[ATA_ID_CYLS] *
+ id[ATA_ID_SECTORS]) - 1;
+
readCapacityData.LogicalBlockAddress = cpu_to_be32(capacity);
readCapacityData.BytesPerBlock = cpu_to_be32(0x200);
@@ -1392,16 +1312,16 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
lba = be32_to_cpu(*(__be32 *)&srb->cmnd[2]);
blockCount = (unsigned long)srb->cmnd[7]<<8 | (unsigned long)srb->cmnd[8];
- if (id->capability & CAPABILITY_LBA) {
+ if (ata_id_has_lba(id)) {
sectnum = (unsigned char)(lba);
cylinder = (unsigned short)(lba>>8);
head = ATA_ADDRESS_DEVHEAD_LBA_MODE | (unsigned char)(lba>>24 & 0x0F);
} else {
- sectnum = (unsigned char)((lba % id->sectors) + 1);
- cylinder = (unsigned short)(lba / (id->sectors *
- id->heads));
- head = (unsigned char)((lba / id->sectors) %
- id->heads);
+ sectnum = (u8)((lba % id[ATA_ID_SECTORS]) + 1);
+ cylinder = (u16)(lba / (id[ATA_ID_SECTORS] *
+ id[ATA_ID_HEADS]));
+ head = (u8)((lba / id[ATA_ID_SECTORS]) %
+ id[ATA_ID_HEADS]);
}
ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand;
ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand;
@@ -1415,7 +1335,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
ataCdb->write.CylinderHighByte = (unsigned char)(cylinder>>8);
ataCdb->write.CylinderLowByte = (unsigned char)cylinder;
ataCdb->write.DeviceHeadByte = (head | ATA_ADDRESS_DEVHEAD_STD);
- ataCdb->write.CommandByte = WIN_READ;
+ ataCdb->write.CommandByte = ATA_CMD_PIO_READ;
break;
case WRITE_10:
@@ -1424,14 +1344,16 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
lba = be32_to_cpu(*(__be32 *)&srb->cmnd[2]);
blockCount = (unsigned long)srb->cmnd[7]<<8 | (unsigned long)srb->cmnd[8];
- if (id->capability & CAPABILITY_LBA) {
+ if (ata_id_has_lba(id)) {
sectnum = (unsigned char)(lba);
cylinder = (unsigned short)(lba>>8);
head = ATA_ADDRESS_DEVHEAD_LBA_MODE | (unsigned char)(lba>>24 & 0x0F);
} else {
- sectnum = (unsigned char)((lba % id->sectors) + 1);
- cylinder = (unsigned short)(lba / (id->sectors * id->heads));
- head = (unsigned char)((lba / id->sectors) % id->heads);
+ sectnum = (u8)((lba % id[ATA_ID_SECTORS]) + 1);
+ cylinder = (u16)(lba / (id[ATA_ID_SECTORS] *
+ id[ATA_ID_HEADS]));
+ head = (u8)((lba / id[ATA_ID_SECTORS]) %
+ id[ATA_ID_HEADS]);
}
ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand;
ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand;
@@ -1445,7 +1367,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
ataCdb->write.CylinderHighByte = (unsigned char)(cylinder>>8);
ataCdb->write.CylinderLowByte = (unsigned char)cylinder;
ataCdb->write.DeviceHeadByte = (head | ATA_ADDRESS_DEVHEAD_STD);
- ataCdb->write.CommandByte = WIN_WRITE;
+ ataCdb->write.CommandByte = ATA_CMD_PIO_WRITE;
break;
case ALLOW_MEDIUM_REMOVAL:
@@ -1459,7 +1381,7 @@ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us,
ataCdb->generic.TransferBlockSize = 1;
ataCdb->generic.RegisterSelect = REG_COMMAND;
ataCdb->write.CommandByte = (srb->cmnd[4] & 0x1) ?
- WIN_DOORLOCK : WIN_DOORUNLOCK;
+ ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
isd200_srb_set_bufflen(srb, 0);
} else {
US_DEBUGP(" Not removeable media, just report okay\n");
@@ -1539,8 +1461,7 @@ static int isd200_init_info(struct us_data *us)
if (!info)
retStatus = ISD200_ERROR;
else {
- info->id = (struct hd_driveid *)
- kzalloc(sizeof(struct hd_driveid), GFP_KERNEL);
+ info->id = kzalloc(ATA_ID_WORDS * 2, GFP_KERNEL);
info->RegsBuf = (unsigned char *)
kmalloc(sizeof(info->ATARegs), GFP_KERNEL);
info->srb.sense_buffer =