diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 23 | ||||
-rw-r--r-- | drivers/md/Makefile | 7 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 18 | ||||
-rw-r--r-- | drivers/md/dm-emc.c | 345 | ||||
-rw-r--r-- | drivers/md/dm-hw-handler.c | 213 | ||||
-rw-r--r-- | drivers/md/dm-hw-handler.h | 63 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 38 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-mpath-hp-sw.c | 247 | ||||
-rw-r--r-- | drivers/md/dm-mpath-rdac.c | 700 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 186 | ||||
-rw-r--r-- | drivers/md/dm-mpath.h | 1 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 163 | ||||
-rw-r--r-- | drivers/md/dm-snap.h | 11 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 42 | ||||
-rw-r--r-- | drivers/md/dm.c | 46 | ||||
-rw-r--r-- | drivers/md/dm.h | 6 | ||||
-rw-r--r-- | drivers/md/linear.c | 10 | ||||
-rw-r--r-- | drivers/md/raid0.c | 10 | ||||
-rw-r--r-- | drivers/md/raid10.c | 15 | ||||
-rw-r--r-- | drivers/md/raid5.c | 10 |
22 files changed, 404 insertions, 1756 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 610af916891..07d92c11b5d 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -252,27 +252,14 @@ config DM_ZERO config DM_MULTIPATH tristate "Multipath target" depends on BLK_DEV_DM + # nasty syntax but means make DM_MULTIPATH independent + # of SCSI_DH if the latter isn't defined but if + # it is, DM_MULTIPATH must depend on it. We get a build + # error if SCSI_DH=m and DM_MULTIPATH=y + depends on SCSI_DH || !SCSI_DH ---help--- Allow volume managers to support multipath hardware. -config DM_MULTIPATH_EMC - tristate "EMC CX/AX multipath support" - depends on DM_MULTIPATH && BLK_DEV_DM - ---help--- - Multipath support for EMC CX/AX series hardware. - -config DM_MULTIPATH_RDAC - tristate "LSI/Engenio RDAC multipath support (EXPERIMENTAL)" - depends on DM_MULTIPATH && BLK_DEV_DM && SCSI && EXPERIMENTAL - ---help--- - Multipath support for LSI/Engenio RDAC. - -config DM_MULTIPATH_HP - tristate "HP MSA multipath support (EXPERIMENTAL)" - depends on DM_MULTIPATH && BLK_DEV_DM && SCSI && EXPERIMENTAL - ---help--- - Multipath support for HP MSA (Active/Passive) series hardware. - config DM_DELAY tristate "I/O delaying target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 7be09eeea29..f1ef33dfd8c 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -4,11 +4,9 @@ dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ dm-ioctl.o dm-io.o dm-kcopyd.o -dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o +dm-multipath-objs := dm-path-selector.o dm-mpath.o dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-mirror-objs := dm-raid1.o -dm-rdac-objs := dm-mpath-rdac.o -dm-hp-sw-objs := dm-mpath-hp-sw.o md-mod-objs := md.o bitmap.o raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ raid6int1.o raid6int2.o raid6int4.o \ @@ -35,9 +33,6 @@ obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o -obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o -obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o -obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o obj-$(CONFIG_DM_ZERO) += dm-zero.o diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 621a272a2c7..7e65bad522c 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1234,7 +1234,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect case 0: bitmap_file_set_bit(bitmap, offset); bitmap_count_page(bitmap,offset, 1); - blk_plug_device(bitmap->mddev->queue); + blk_plug_device_unlocked(bitmap->mddev->queue); /* fall through */ case 1: *bmc = 2; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ab6a61db63c..13956437bc8 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1216,9 +1216,24 @@ error: return -EINVAL; } +static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct crypt_config *cc = ti->private; + struct request_queue *q = bdev_get_queue(cc->dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = cc->dev->bdev; + bvm->bi_sector = cc->start + bvm->bi_sector - ti->begin; + + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + static struct target_type crypt_target = { .name = "crypt", - .version= {1, 5, 0}, + .version= {1, 6, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -1228,6 +1243,7 @@ static struct target_type crypt_target = { .preresume = crypt_preresume, .resume = crypt_resume, .message = crypt_message, + .merge = crypt_merge, }; static int __init dm_crypt_init(void) diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c deleted file mode 100644 index 3ea5ad4b780..00000000000 --- a/drivers/md/dm-emc.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright (C) 2004 SUSE LINUX Products GmbH. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is released under the GPL. - * - * Multipath support for EMC CLARiiON AX/CX-series hardware. - */ - -#include "dm.h" -#include "dm-hw-handler.h" -#include <scsi/scsi.h> -#include <scsi/scsi_cmnd.h> - -#define DM_MSG_PREFIX "multipath emc" - -struct emc_handler { - spinlock_t lock; - - /* Whether we should send the short trespass command (FC-series) - * or the long version (default for AX/CX CLARiiON arrays). */ - unsigned short_trespass; - /* Whether or not to honor SCSI reservations when initiating a - * switch-over. Default: Don't. */ - unsigned hr; - - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; -}; - -#define TRESPASS_PAGE 0x22 -#define EMC_FAILOVER_TIMEOUT (60 * HZ) - -/* Code borrowed from dm-lsi-rdac by Mike Christie */ - -static inline void free_bio(struct bio *bio) -{ - __free_page(bio->bi_io_vec[0].bv_page); - bio_put(bio); -} - -static void emc_endio(struct bio *bio, int error) -{ - struct dm_path *path = bio->bi_private; - - /* We also need to look at the sense keys here whether or not to - * switch to the next PG etc. - * - * For now simple logic: either it works or it doesn't. - */ - if (error) - dm_pg_init_complete(path, MP_FAIL_PATH); - else - dm_pg_init_complete(path, 0); - - /* request is freed in block layer */ - free_bio(bio); -} - -static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size) -{ - struct bio *bio; - struct page *page; - - bio = bio_alloc(GFP_ATOMIC, 1); - if (!bio) { - DMERR("get_failover_bio: bio_alloc() failed."); - return NULL; - } - - bio->bi_rw |= (1 << BIO_RW); - bio->bi_bdev = path->dev->bdev; - bio->bi_sector = 0; - bio->bi_private = path; - bio->bi_end_io = emc_endio; - - page = alloc_page(GFP_ATOMIC); - if (!page) { - DMERR("get_failover_bio: alloc_page() failed."); - bio_put(bio); - return NULL; - } - - if (bio_add_page(bio, page, data_size, 0) != data_size) { - DMERR("get_failover_bio: bio_add_page() failed."); - __free_page(page); - bio_put(bio); - return NULL; - } - - return bio; -} - -static struct request *get_failover_req(struct emc_handler *h, - struct bio *bio, struct dm_path *path) -{ - struct request *rq; - struct block_device *bdev = bio->bi_bdev; - struct request_queue *q = bdev_get_queue(bdev); - - /* FIXME: Figure out why it fails with GFP_ATOMIC. */ - rq = blk_get_request(q, WRITE, __GFP_WAIT); - if (!rq) { - DMERR("get_failover_req: blk_get_request failed"); - return NULL; - } - - blk_rq_append_bio(q, rq, bio); - - rq->sense = h->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = 0; - - rq->timeout = EMC_FAILOVER_TIMEOUT; - rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; - - return rq; -} - -static struct request *emc_trespass_get(struct emc_handler *h, - struct dm_path *path) -{ - struct bio *bio; - struct request *rq; - unsigned char *page22; - unsigned char long_trespass_pg[] = { - 0, 0, 0, 0, - TRESPASS_PAGE, /* Page code */ - 0x09, /* Page length - 2 */ - h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */ - 0xff, 0xff, /* Trespass target */ - 0, 0, 0, 0, 0, 0 /* Reserved bytes / unknown */ - }; - unsigned char short_trespass_pg[] = { - 0, 0, 0, 0, - TRESPASS_PAGE, /* Page code */ - 0x02, /* Page length - 2 */ - h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */ - 0xff, /* Trespass target */ - }; - unsigned data_size = h->short_trespass ? sizeof(short_trespass_pg) : - sizeof(long_trespass_pg); - - /* get bio backing */ - if (data_size > PAGE_SIZE) - /* this should never happen */ - return NULL; - - bio = get_failover_bio(path, data_size); - if (!bio) { - DMERR("emc_trespass_get: no bio"); - return NULL; - } - - page22 = (unsigned char *)bio_data(bio); - memset(page22, 0, data_size); - - memcpy(page22, h->short_trespass ? - short_trespass_pg : long_trespass_pg, data_size); - - /* get request for block layer packet command */ - rq = get_failover_req(h, bio, path); - if (!rq) { - DMERR("emc_trespass_get: no rq"); - free_bio(bio); - return NULL; - } - - /* Prepare the command. */ - rq->cmd[0] = MODE_SELECT; - rq->cmd[1] = 0x10; - rq->cmd[4] = data_size; - rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); - - return rq; -} - -static void emc_pg_init(struct hw_handler *hwh, unsigned bypassed, - struct dm_path *path) -{ - struct request *rq; - struct request_queue *q = bdev_get_queue(path->dev->bdev); - - /* - * We can either blindly init the pg (then look at the sense), - * or we can send some commands to get the state here (then - * possibly send the fo cmnd), or we can also have the - * initial state passed into us and then get an update here. - */ - if (!q) { - DMINFO("emc_pg_init: no queue"); - goto fail_path; - } - - /* FIXME: The request should be pre-allocated. */ - rq = emc_trespass_get(hwh->context, path); - if (!rq) { - DMERR("emc_pg_init: no rq"); - goto fail_path; - } - - DMINFO("emc_pg_init: sending switch-over command"); - elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1); - return; - -fail_path: - dm_pg_init_complete(path, MP_FAIL_PATH); -} - -static struct emc_handler *alloc_emc_handler(void) -{ - struct emc_handler *h = kzalloc(sizeof(*h), GFP_KERNEL); - - if (h) - spin_lock_init(&h->lock); - - return h; -} - -static int emc_create(struct hw_handler *hwh, unsigned argc, char **argv) -{ - struct emc_handler *h; - unsigned hr, short_trespass; - - if (argc == 0) { - /* No arguments: use defaults */ - hr = 0; - short_trespass = 0; - } else if (argc != 2) { - DMWARN("incorrect number of arguments"); - return -EINVAL; - } else { - if ((sscanf(argv[0], "%u", &short_trespass) != 1) - || (short_trespass > 1)) { - DMWARN("invalid trespass mode selected"); - return -EINVAL; - } - - if ((sscanf(argv[1], "%u", &hr) != 1) - || (hr > 1)) { - DMWARN("invalid honor reservation flag selected"); - return -EINVAL; - } - } - - h = alloc_emc_handler(); - if (!h) - return -ENOMEM; - - hwh->context = h; - - if ((h->short_trespass = short_trespass)) - DMWARN("short trespass command will be send"); - else - DMWARN("long trespass command will be send"); - - if ((h->hr = hr)) - DMWARN("honor reservation bit will be set"); - else - DMWARN("honor reservation bit will not be set (default)"); - - return 0; -} - -static void emc_destroy(struct hw_handler *hwh) -{ - struct emc_handler *h = (struct emc_handler *) hwh->context; - - kfree(h); - hwh->context = NULL; -} - -static unsigned emc_error(struct hw_handler *hwh, struct bio *bio) -{ - /* FIXME: Patch from axboe still missing */ -#if 0 - int sense; - - if (bio->bi_error & BIO_SENSE) { - sense = bio->bi_error & 0xffffff; /* sense key / asc / ascq */ - - if (sense == 0x020403) { - /* LUN Not Ready - Manual Intervention Required - * indicates this is a passive path. - * - * FIXME: However, if this is seen and EVPD C0 - * indicates that this is due to a NDU in - * progress, we should set FAIL_PATH too. - * This indicates we might have to do a SCSI - * inquiry in the end_io path. Ugh. */ - return MP_BYPASS_PG | MP_RETRY_IO; - } else if (sense == 0x052501) { - /* An array based copy is in progress. Do not - * fail the path, do not bypass to another PG, - * do not retry. Fail the IO immediately. - * (Actually this is the same conclusion as in - * the default handler, but lets make sure.) */ - return 0; - } else if (sense == 0x062900) { - /* Unit Attention Code. This is the first IO - * to the new path, so just retry. */ - return MP_RETRY_IO; - } - } -#endif - - /* Try default handler */ - return dm_scsi_err_handler(hwh, bio); -} - -static struct hw_handler_type emc_hwh = { - .name = "emc", - .module = THIS_MODULE, - .create = emc_create, - .destroy = emc_destroy, - .pg_init = emc_pg_init, - .error = emc_error, -}; - -static int __init dm_emc_init(void) -{ - int r = dm_register_hw_handler(&emc_hwh); - - if (r < 0) - DMERR("register failed %d", r); - - DMINFO("version 0.0.3 loaded"); - - return r; -} - -static void __exit dm_emc_exit(void) -{ - int r = dm_unregister_hw_handler(&emc_hwh); - - if (r < 0) - DMERR("unregister failed %d", r); -} - -module_init(dm_emc_init); -module_exit(dm_emc_exit); - -MODULE_DESCRIPTION(DM_NAME " EMC CX/AX/FC-family multipath"); -MODULE_AUTHOR("Lars Marowsky-Bree <lmb@suse.de>"); -MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-hw-handler.c b/drivers/md/dm-hw-handler.c deleted file mode 100644 index 2ee84d8aa0b..00000000000 --- a/drivers/md/dm-hw-handler.c +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is released under the GPL. - * - * Multipath hardware handler registration. - */ - -#include "dm.h" -#include "dm-hw-handler.h" - -#include <linux/slab.h> - -struct hwh_internal { - struct hw_handler_type hwht; - - struct list_head list; - long use; -}; - -#define hwht_to_hwhi(__hwht) container_of((__hwht), struct hwh_internal, hwht) - -static LIST_HEAD(_hw_handlers); -static DECLARE_RWSEM(_hwh_lock); - -static struct hwh_internal *__find_hw_handler_type(const char *name) -{ - struct hwh_internal *hwhi; - - list_for_each_entry(hwhi, &_hw_handlers, list) { - if (!strcmp(name, hwhi->hwht.name)) - return hwhi; - } - - return NULL; -} - -static struct hwh_internal *get_hw_handler(const char *name) -{ - struct hwh_internal *hwhi; - - down_read(&_hwh_lock); - hwhi = __find_hw_handler_type(name); - if (hwhi) { - if ((hwhi->use == 0) && !try_module_get(hwhi->hwht.module)) - hwhi = NULL; - else - hwhi->use++; - } - up_read(&_hwh_lock); - - return hwhi; -} - -struct hw_handler_type *dm_get_hw_handler(const char *name) -{ - struct hwh_internal *hwhi; - - if (!name) - return NULL; - - hwhi = get_hw_handler(name); - if (!hwhi) { - request_module("dm-%s", name); - hwhi = get_hw_handler(name); - } - - return hwhi ? &hwhi->hwht : NULL; -} - -void dm_put_hw_handler(struct hw_handler_type *hwht) -{ - struct hwh_internal *hwhi; - - if (!hwht) - return; - - down_read(&_hwh_lock); - hwhi = __find_hw_handler_type(hwht->name); - if (!hwhi) - goto out; - - if (--hwhi->use == 0) - module_put(hwhi->hwht.module); - - BUG_ON(hwhi->use < 0); - - out: - up_read(&_hwh_lock); -} - -static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht) -{ - struct hwh_internal *hwhi = kzalloc(sizeof(*hwhi), GFP_KERNEL); - - if (hwhi) - hwhi->hwht = *hwht; - - return hwhi; -} - -int dm_register_hw_handler(struct hw_handler_type *hwht) -{ - int r = 0; - struct hwh_internal *hwhi = _alloc_hw_handler(hwht); - - if (!hwhi) - return -ENOMEM; - - down_write(&_hwh_lock); - - if (__find_hw_handler_type(hwht->name)) { - kfree(hwhi); - r = -EEXIST; - } else - list_add(&hwhi->list, &_hw_handlers); - - up_write(&_hwh_lock); - - return r; -} - -int dm_unregister_hw_handler(struct hw_handler_type *hwht) -{ - struct hwh_internal *hwhi; - - down_write(&_hwh_lock); - - hwhi = __find_hw_handler_type(hwht->name); - if (!hwhi) { - up_write(&_hwh_lock); - return -EINVAL; - } - - if (hwhi->use) { - up_write(&_hwh_lock); - return -ETXTBSY; - } - - list_del(&hwhi->list); - - up_write(&_hwh_lock); - - kfree(hwhi); - - return 0; -} - -unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio) -{ -#if 0 - int sense_key, asc, ascq; - - if (bio->bi_error & BIO_SENSE) { - /* FIXME: This is just an initial guess. */ - /* key / asc / ascq */ - sense_key = (bio->bi_error >> 16) & 0xff; - asc = (bio->bi_error >> 8) & 0xff; - ascq = bio->bi_error & 0xff; - - switch (sense_key) { - /* This block as a whole comes from the device. - * So no point retrying on another path. */ - case 0x03: /* Medium error */ - case 0x05: /* Illegal request */ - case 0x07: /* Data protect */ - case 0x08: /* Blank check */ - case 0x0a: /* copy aborted */ - case 0x0c: /* obsolete - no clue ;-) */ - case 0x0d: /* volume overflow */ - case 0x0e: /* data miscompare */ - case 0x0f: /* reserved - no idea either. */ - return MP_ERROR_IO; - - /* For these errors it's unclear whether they - * come from the device or the controller. - * So just lets try a different path, and if - * it eventually succeeds, user-space will clear - * the paths again... */ - case 0x02: /* Not ready */ - case 0x04: /* Hardware error */ - case 0x09: /* vendor specific */ - case 0x0b: /* Aborted command */ - return MP_FAIL_PATH; - - case 0x06: /* Unit attention - might want to decode */ - if (asc == 0x04 && ascq == 0x01) - /* "Unit in the process of - * becoming ready" */ - return 0; - return MP_FAIL_PATH; - - /* FIXME: For Unit Not Ready we may want - * to have a generic pg activation - * feature (START_UNIT). */ - - /* Should these two ever end up in the - * error path? I don't think so. */ - case 0x00: /* No sense */ - case 0x01: /* Recovered error */ - return 0; - } - } -#endif - - /* We got no idea how to decode the other kinds of errors -> - * assume generic error condition. */ - return MP_FAIL_PATH; -} - -EXPORT_SYMBOL_GPL(dm_register_hw_handler); -EXPORT_SYMBOL_GPL(dm_unregister_hw_handler); -EXPORT_SYMBOL_GPL(dm_scsi_err_handler); diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h deleted file mode 100644 index 46809dcb121..00000000000 --- a/drivers/md/dm-hw-handler.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is released under the GPL. - * - * Multipath hardware handler registration. - */ - -#ifndef DM_HW_HANDLER_H -#define DM_HW_HANDLER_H - -#include <linux/device-mapper.h> - -#include "dm-mpath.h" - -struct hw_handler_type; -struct hw_handler { - struct hw_handler_type *type; - struct mapped_device *md; - void *context; -}; - -/* - * Constructs a hardware handler object, takes custom arguments - */ -/* Information about a hardware handler type */ -struct hw_handler_type { - char *name; - struct module *module; - - int (*create) (struct hw_handler *handler, unsigned int argc, - char **argv); - void (*destroy) (struct hw_handler *hwh); - - void (*pg_init) (struct hw_handler *hwh, unsigned bypassed, - struct dm_path *path); - unsigned (*error) (struct hw_handler *hwh, struct bio *bio); - int (*status) (struct hw_handler *hwh, status_type_t type, - char *result, unsigned int maxlen); -}; - -/* Register a hardware handler */ -int dm_register_hw_handler(struct hw_handler_type *type); - -/* Unregister a hardware handler */ -int dm_unregister_hw_handler(struct hw_handler_type *type); - -/* Returns a registered hardware handler type */ -struct hw_handler_type *dm_get_hw_handler(const char *name); - -/* Releases a hardware handler */ -void dm_put_hw_handler(struct hw_handler_type *hwht); - -/* Default err function */ -unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio); - -/* Error flags for err and dm_pg_init_complete */ -#define MP_FAIL_PATH 1 -#define MP_BYPASS_PG 2 -#define MP_ERROR_IO 4 /* Don't retry this I/O */ -#define MP_RETRY 8 - -#endif diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 17753d80ad2..6449bcdf84c 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -69,13 +69,25 @@ static void linear_dtr(struct dm_target *ti) kfree(lc); } -static int linear_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector) { - struct linear_c *lc = (struct linear_c *) ti->private; + struct linear_c *lc = ti->private; + + return lc->start + (bi_sector - ti->begin); +} + +static void linear_map_bio(struct dm_target *ti, struct bio *bio) +{ + struct linear_c *lc = ti->private; bio->bi_bdev = lc->dev->bdev; - bio->bi_sector = lc->start + (bio->bi_sector - ti->begin); + bio->bi_sector = linear_map_sector(ti, bio->bi_sector); +} + +static int linear_map(struct dm_target *ti, struct bio *bio, + union map_info *map_context) +{ + linear_map_bio(ti, bio); return DM_MAPIO_REMAPPED; } @@ -114,15 +126,31 @@ static int linear_ioctl(struct dm_target *ti, struct inode *inode, return blkdev_driver_ioctl(bdev->bd_inode, &fake_file, bdev->bd_disk, cmd, arg); } +static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct linear_c *lc = ti->private; + struct request_queue *q = bdev_get_queue(lc->dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = lc->dev->bdev; + bvm->bi_sector = linear_map_sector(ti, bvm->bi_sector); + + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + static struct target_type linear_target = { .name = "linear", - .version= {1, 0, 2}, + .version= {1, 0, 3}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, .map = linear_map, .status = linear_status, .ioctl = linear_ioctl, + .merge = linear_merge, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 67a6f31b7fc..5b48478c79f 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -831,7 +831,7 @@ static struct dm_dirty_log_type _disk_type = { .status = disk_status, }; -int __init dm_dirty_log_init(void) +static int __init dm_dirty_log_init(void) { int r; @@ -848,7 +848,7 @@ int __init dm_dirty_log_init(void) return r; } -void __exit dm_dirty_log_exit(void) +static void __exit dm_dirty_log_exit(void) { dm_dirty_log_type_unregister(&_disk_type); dm_dirty_log_type_unregister(&_core_type); diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c deleted file mode 100644 index b63a0ab37c5..00000000000 --- a/drivers/md/dm-mpath-hp-sw.c +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (C) 2005 Mike Christie, All rights reserved. - * Copyright (C) 2007 Red Hat, Inc. All rights reserved. - * Authors: Mike Christie - * Dave Wysochanski - * - * This file is released under the GPL. - * - * This module implements the specific path activation code for - * HP StorageWorks and FSC FibreCat Asymmetric (Active/Passive) - * storage arrays. - * These storage arrays have controller-based failover, not - * LUN-based failover. However, LUN-based failover is the design - * of dm-multipath. Thus, this module is written for LUN-based failover. - */ -#include <linux/blkdev.h> -#include <linux/list.h> -#include <linux/types.h> -#include <scsi/scsi.h> -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_dbg.h> - -#include "dm.h" -#include "dm-hw-handler.h" - -#define DM_MSG_PREFIX "multipath hp-sw" -#define DM_HP_HWH_NAME "hp-sw" -#define DM_HP_HWH_VER "1.0.0" - -struct hp_sw_context { - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; -}; - -/* - * hp_sw_error_is_retryable - Is an HP-specific check condition retryable? - * @req: path activation request - * - * Examine error codes of request and determine whether the error is retryable. - * Some error codes are already retried by scsi-ml (see - * scsi_decide_disposition), but some HP specific codes are not. - * The intent of this routine is to supply the logic for the HP specific - * check conditions. - * - * Returns: - * 1 - command completed with retryable error - * 0 - command completed with non-retryable error - * - * Possible optimizations - * 1. More hardware-specific error codes - */ -static int hp_sw_error_is_retryable(struct request *req) -{ - /* - * NOT_READY is known to be retryable - * For now we just dump out the sense data and call it retryable - */ - if (status_byte(req->errors) == CHECK_CONDITION) - __scsi_print_sense(DM_HP_HWH_NAME, req->sense, req->sense_len); - - /* - * At this point we don't have complete information about all the error - * codes from this hardware, so we are just conservative and retry - * when in doubt. - */ - return 1; -} - -/* - * hp_sw_end_io - Completion handler for HP path activation. - * @req: path activation request - * @error: scsi-ml error - * - * Check sense data, free request structure, and notify dm that - * pg initialization has completed. - * - * Context: scsi-ml softirq - * - */ -static void hp_sw_end_io(struct request *req, int error) -{ - struct dm_path *path = req->end_io_data; - unsigned err_flags = 0; - - if (!error) { - DMDEBUG("%s path activation command - success", - path->dev->name); - goto out; - } - - if (hp_sw_error_is_retryable(req)) { - DMDEBUG("%s path activation command - retry", - path->dev->name); - err_flags = MP_RETRY; - goto out; - } - - DMWARN("%s path activation fail - error=0x%x", - path->dev->name, error); - err_flags = MP_FAIL_PATH; - -out: - req->end_io_data = NULL; - __blk_put_request(req->q, req); - dm_pg_init_complete(path, err_flags); -} - -/* - * hp_sw_get_request - Allocate an HP specific path activation request - * @path: path on which request will be sent (needed for request queue) - * - * The START command is used for path activation request. - * These arrays are controller-based failover, not LUN based. - * One START command issued to a single path will fail over all - * LUNs for the same controller. - * - * Possible optimizations - * 1. Make timeout configurable - * 2. Preallocate request - */ -static struct request *hp_sw_get_request(struct dm_path *path) -{ - struct request *req; - struct block_device *bdev = path->dev->bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct hp_sw_context *h = path->hwhcontext; - - req = blk_get_request(q, WRITE, GFP_NOIO); - if (!req) - goto out; - - req->timeout = 60 * HZ; - - req->errors = 0; - req->cmd_type = REQ_TYPE_BLOCK_PC; - req->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; - req->end_io_data = path; - req->sense = h->sense; - memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); - - req->cmd[0] = START_STOP; - req->cmd[4] = 1; - req->cmd_len = COMMAND_SIZE(req->cmd[0]); - -out: - return req; -} - -/* - * hp_sw_pg_init - HP path activation implementation. - * @hwh: hardware handler specific data - * @bypassed: unused; is the path group bypassed? (see dm-mpath.c) - * @path: path to send initialization command - * - * Send an HP-specific path activation command on 'path'. - * Do not try to optimize in any way, just send the activation command. - * More than one path activation command may be sent to the same controller. - * This seems to work fine for basic failover support. - * - * Possible optimizations - * 1. Detect an in-progress activation request and avoid submitting another one - * 2. Model the controller and only send a single activation request at a time - * 3. Determine the state of a path before sending an activation request - * - * Context: kmpathd (see process_queued_ios() in dm-mpath.c) - */ -static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed, - struct dm_path *path) -{ - struct request *req; - struct hp_sw_context *h; - - path->hwhcontext = hwh->context; - h = hwh->context; - - req = hp_sw_get_request(path); - if (!req) { - DMERR("%s path activation command - allocation fail", - path->dev->name); - goto retry; - } - - DMDEBUG("%s path activation command - sent", path->dev->name); - - blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io); - return; - -retry: - dm_pg_init_complete(path, MP_RETRY); -} - -static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv) -{ - struct hp_sw_context *h; - - h = kmalloc(sizeof(*h), GFP_KERNEL); - if (!h) - return -ENOMEM; - - hwh->context = h; - - return 0; -} - -static void hp_sw_destroy(struct hw_handler *hwh) -{ - struct hp_sw_context *h = hwh->context; - - kfree(h); -} - -static struct hw_handler_type hp_sw_hwh = { - .name = DM_HP_HWH_NAME, - .module = THIS_MODULE, - .create = hp_sw_create, - .destroy = hp_sw_destroy, - .pg_init = hp_sw_pg_init, -}; - -static int __init hp_sw_init(void) -{ - int r; - - r = dm_register_hw_handler(&hp_sw_hwh); - if (r < 0) - DMERR("register failed %d", r); - else - DMINFO("version " DM_HP_HWH_VER " loaded"); - - return r; -} - -static void __exit hp_sw_exit(void) -{ - int r; - - r = dm_unregister_hw_handler(&hp_sw_hwh); - if (r < 0) - DMERR("unregister failed %d", r); -} - -module_init(hp_sw_init); -module_exit(hp_sw_exit); - -MODULE_DESCRIPTION("DM Multipath HP StorageWorks / FSC FibreCat (A/P) support"); -MODULE_AUTHOR("Mike Christie, Dave Wysochanski <dm-devel@redhat.com>"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(DM_HP_HWH_VER); diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c deleted file mode 100644 index 95e77734880..00000000000 --- a/drivers/md/dm-mpath-rdac.c +++ /dev/null @@ -1,700 +0,0 @@ -/* - * Engenio/LSI RDAC DM HW handler - * - * Copyright (C) 2005 Mike Christie. All rights reserved. - * Copyright (C) Chandra Seetharaman, IBM Corp. 2007 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - */ -#include <scsi/scsi.h> -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_eh.h> - -#define DM_MSG_PREFIX "multipath rdac" - -#include "dm.h" -#include "dm-hw-handler.h" - -#define RDAC_DM_HWH_NAME "rdac" -#define RDAC_DM_HWH_VER "0.4" - -/* - * LSI mode page stuff - * - * These struct definitions and the forming of the - * mode page were taken from the LSI RDAC 2.4 GPL'd - * driver, and then converted to Linux conventions. - */ -#define RDAC_QUIESCENCE_TIME 20; -/* - * Page Codes - */ -#define RDAC_PAGE_CODE_REDUNDANT_CONTROLLER 0x2c - -/* - * Controller modes definitions - */ -#define RDAC_MODE_TRANSFER_ALL_LUNS 0x01 -#define RDAC_MODE_TRANSFER_SPECIFIED_LUNS 0x02 - -/* - * RDAC Options field - */ -#define RDAC_FORCED_QUIESENCE 0x02 - -#define RDAC_FAILOVER_TIMEOUT (60 * HZ) - -struct rdac_mode_6_hdr { - u8 data_len; - u8 medium_type; - u8 device_params; - u8 block_desc_len; -}; - -struct rdac_mode_10_hdr { - u16 data_len; - u8 medium_type; - u8 device_params; - u16 reserved; - u16 block_desc_len; -}; - -struct rdac_mode_common { - u8 controller_serial[16]; - u8 alt_controller_serial[16]; - u8 rdac_mode[2]; - u8 alt_rdac_mode[2]; - u8 quiescence_timeout; - u8 rdac_options; -}; - -struct rdac_pg_legacy { - struct rdac_mode_6_hdr hdr; - u8 page_code; - u8 page_len; - struct rdac_mode_common common; -#define MODE6_MAX_LUN 32 - u8 lun_table[MODE6_MAX_LUN]; - u8 reserved2[32]; - u8 reserved3; - u8 reserved4; -}; - -struct rdac_pg_expanded { - struct rdac_mode_10_hdr hdr; - u8 page_code; - u8 subpage_code; - u8 page_len[2]; - struct rdac_mode_common common; - u8 lun_table[256]; - u8 reserved3; - u8 reserved4; -}; - -struct c9_inquiry { - u8 peripheral_info; - u8 page_code; /* 0xC9 */ - u8 reserved1; - u8 page_len; - u8 page_id[4]; /* "vace" */ - u8 avte_cvp; - u8 path_prio; - u8 reserved2[38]; -}; - -#define SUBSYS_ID_LEN 16 -#define SLOT_ID_LEN 2 - -struct c4_inquiry { - u8 peripheral_info; - u8 page_code; /* 0xC4 */ - u8 reserved1; - u8 page_len; - u8 page_id[4]; /* "subs" */ - u8 subsys_id[SUBSYS_ID_LEN]; - u8 revision[4]; - u8 slot_id[SLOT_ID_LEN]; - u8 reserved[2]; -}; - -struct rdac_controller { - u8 subsys_id[SUBSYS_ID_LEN]; - u8 slot_id[SLOT_ID_LEN]; - int use_10_ms; - struct kref kref; - struct list_head node; /* list of all controllers */ - spinlock_t lock; - int submitted; - struct list_head cmd_list; /* list of commands to be submitted */ - union { - struct rdac_pg_legacy legacy; - struct rdac_pg_expanded expanded; - } mode_select; -}; -struct c8_inquiry { - u8 peripheral_info; - u8 page_code; /* 0xC8 */ - u8 reserved1; - u8 page_len; - u8 page_id[4]; /* "edid" */ - u8 reserved2[3]; - u8 vol_uniq_id_len; - u8 vol_uniq_id[16]; - u8 vol_user_label_len; - u8 vol_user_label[60]; - u8 array_uniq_id_len; - u8 array_unique_id[16]; - u8 array_user_label_len; - u8 array_user_label[60]; - u8 lun[8]; -}; - -struct c2_inquiry { - u8 peripheral_info; - u8 page_code; /* 0xC2 */ - u8 reserved1; - u8 page_len; - u8 page_id[4]; /* "swr4" */ - u8 sw_version[3]; - u8 sw_date[3]; - u8 features_enabled; - u8 max_lun_supported; - u8 partitions[239]; /* Total allocation length should be 0xFF */ -}; - -struct rdac_handler { - struct list_head entry; /* list waiting to submit MODE SELECT */ - unsigned timeout; - struct rdac_controller *ctlr; -#define UNINITIALIZED_LUN (1 << 8) - unsigned lun; - unsigned char sense[SCSI_SENSE_BUFFERSIZE]; - struct dm_path *path; - struct work_struct work; -#define SEND_C2_INQUIRY 1 -#define SEND_C4_INQUIRY 2 -#define SEND_C8_INQUIRY 3 -#define SEND_C9_INQUIRY 4 -#define SEND_MODE_SELECT 5 - int cmd_to_send; - union { - struct c2_inquiry c2; - struct c4_inquiry c4; - struct c8_inquiry c8; - struct c9_inquiry c9; - } inq; -}; - -static LIST_HEAD(ctlr_list); -static DEFINE_SPINLOCK(list_lock); -static struct workqueue_struct *rdac_wkqd; - -static inline int had_failures(struct request *req, int error) -{ - return (error || host_byte(req->errors) != DID_OK || - msg_byte(req->errors) != COMMAND_COMPLETE); -} - -static void rdac_resubmit_all(struct rdac_handler *h) -{ - struct rdac_controller *ctlr = h->ctlr; - struct rdac_handler *tmp, *h1; - - spin_lock(&ctlr->lock); - list_for_each_entry_safe(h1, tmp, &ctlr->cmd_list, entry) { - h1->cmd_to_send = SEND_C9_INQUIRY; - queue_work(rdac_wkqd, &h1->work); - list_del(&h1->entry); - } - ctlr->submitted = 0; - spin_unlock(&ctlr->lock); -} - -static void mode_select_endio(struct request *req, int error) -{ - struct rdac_handler *h = req->end_io_data; - struct scsi_sense_hdr sense_hdr; - int sense = 0, fail = 0; - - if (had_failures(req, error)) { - fail = 1; - goto failed; - } - - if (status_byte(req->errors) == CHECK_CONDITION) { - scsi_normalize_sense(req->sense, SCSI_SENSE_BUFFERSIZE, - &sense_hdr); - sense = (sense_hdr.sense_key << 16) | (sense_hdr.asc << 8) | - sense_hdr.ascq; - /* If it is retryable failure, submit the c9 inquiry again */ - if (sense == 0x59136 || sense == 0x68b02 || sense == 0xb8b02 || - sense == 0x62900) { - /* 0x59136 - Command lock contention - * 0x[6b]8b02 - Quiesense in progress or achieved - * 0x62900 - Power On, Reset, or Bus Device Reset - */ - h->cmd_to_send = SEND_C9_INQUIRY; - queue_work(rdac_wkqd, &h->work); - goto done; - } - if (sense) - DMINFO("MODE_SELECT failed on %s with sense 0x%x", - h->path->dev->name, sense); - } -failed: - if (fail || sense) - dm_pg_init_complete(h->path, MP_FAIL_PATH); - else - dm_pg_init_complete(h->path, 0); - -done: - rdac_resubmit_all(h); - __blk_put_request(req->q, req); -} - -static struct request *get_rdac_req(struct rdac_handler *h, - void *buffer, unsigned buflen, int rw) -{ - struct request *rq; - struct request_queue *q = bdev_get_queue(h->path->dev->bdev); - - rq = blk_get_request(q, rw, GFP_KERNEL); - - if (!rq) { - DMINFO("get_rdac_req: blk_get_request failed"); - return NULL; - } - - if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_KERNEL)) { - blk_put_request(rq); - DMINFO("get_rdac_req: blk_rq_map_kern failed"); - return NULL; - } - - rq->sense = h->sense; - memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); - rq->sense_len = 0; - - rq->end_io_data = h; - rq->timeout = h->timeout; - rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; - return rq; -} - -static struct request *rdac_failover_get(struct rdac_handler *h) -{ - struct request *rq; - struct rdac_mode_common *common; - unsigned data_size; - - if (h->ctlr->use_10_ms) { - struct rdac_pg_expanded *rdac_pg; - - data_size = sizeof(struct rdac_pg_expanded); - rdac_pg = &h->ctlr->mode_select.expanded; - memset(rdac_pg, 0, data_size); - common = &rdac_pg->common; - rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40; - rdac_pg->subpage_code = 0x1; - rdac_pg->page_len[0] = 0x01; - rdac_pg->page_len[1] = 0x28; - rdac_pg->lun_table[h->lun] = 0x81; - } else { - struct rdac_pg_legacy *rdac_pg; - - data_size = sizeof(struct rdac_pg_legacy); - rdac_pg = &h->ctlr->mode_select.legacy; - memset(rdac_pg, 0, data_size); - common = &rdac_pg->common; - rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER; - rdac_pg->page_len = 0x68; - rdac_pg->lun_table[h->lun] = 0x81; - } - common->rdac_mode[1] = RDAC_MODE_TRANSFER_SPECIFIED_LUNS; - common->quiescence_timeout = RDAC_QUIESCENCE_TIME; - common->rdac_options = RDAC_FORCED_QUIESENCE; - - /* get request for block layer packet command */ - rq = get_rdac_req(h, &h->ctlr->mode_select, data_size, WRITE); - if (!rq) { - DMERR("rdac_failover_get: no rq"); - return NULL; - } - - /* Prepare the command. */ - if (h->ctlr->use_10_ms) { - rq->cmd[0] = MODE_SELECT_10; - rq->cmd[7] = data_size >> 8; - rq->cmd[8] = data_size & 0xff; - } else { - rq->cmd[0] = MODE_SELECT; - rq->cmd[4] = data_size; - } - rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); - - return rq; -} - -/* Acquires h->ctlr->lock */ -static void submit_mode_select(struct rdac_handler *h) -{ - struct request *rq; - struct request_queue *q = bdev_get_queue(h->path->dev->bdev); - - spin_lock(&h->ctlr->lock); - if (h->ctlr->submitted) { - list_add(&h->entry, &h->ctlr->cmd_list); - goto drop_lock; - } - - if (!q) { - DMINFO("submit_mode_select: no queue"); - goto fail_path; - } - - rq = rdac_failover_get(h); - if (!rq) { - DMERR("submit_mode_select: no rq"); - goto fail_path; - } - - DMINFO("queueing MODE_SELECT command on %s", h->path->dev->name); - - blk_execute_rq_nowait(q, NULL, rq, 1, mode_select_endio); - h->ctlr->submitted = 1; - goto drop_lock; -fail_path: - dm_pg_init_complete(h->path, MP_FAIL_PATH); -drop_lock: - spin_unlock(&h->ctlr->lock); -} - -static void release_ctlr(struct kref *kref) -{ - struct rdac_controller *ctlr; - ctlr = container_of(kref, struct rdac_controller, kref); - - spin_lock(&list_lock); - list_del(&ctlr->node); - spin_unlock(&list_lock); - kfree(ctlr); -} - -static struct rdac_controller *get_controller(u8 *subsys_id, u8 *slot_id) -{ - struct rdac_controller *ctlr, *tmp; - - spin_lock(&list_lock); - - list_for_each_entry(tmp, &ctlr_list, node) { - if ((memcmp(tmp->subsys_id, subsys_id, SUBSYS_ID_LEN) == 0) && - (memcmp(tmp->slot_id, slot_id, SLOT_ID_LEN) == 0)) { - kref_get(&tmp->kref); - spin_unlock(&list_lock); - return tmp; - } - } - ctlr = kmalloc(sizeof(*ctlr), GFP_ATOMIC); - if (!ctlr) - goto done; - - /* initialize fields of controller */ - memcpy(ctlr->subsys_id, subsys_id, SUBSYS_ID_LEN); - memcpy(ctlr->slot_id, slot_id, SLOT_ID_LEN); - kref_init(&ctlr->kref); - spin_lock_init(&ctlr->lock); - ctlr->submitted = 0; - ctlr->use_10_ms = -1; - INIT_LIST_HEAD(&ctlr->cmd_list); - list_add(&ctlr->node, &ctlr_list); -done: - spin_unlock(&list_lock); - return ctlr; -} - -static void c4_endio(struct request *req, int error) -{ - struct rdac_handler *h = req->end_io_data; - struct c4_inquiry *sp; - - if (had_failures(req, error)) { - dm_pg_init_complete(h->path, MP_FAIL_PATH); - goto done; - } - - sp = &h->inq.c4; - - h->ctlr = get_controller(sp->subsys_id, sp->slot_id); - - if (h->ctlr) { - h->cmd_to_send = SEND_C9_INQUIRY; - queue_work(rdac_wkqd, &h->work); - } else - dm_pg_init_complete(h->path, MP_FAIL_PATH); -done: - __blk_put_request(req->q, req); -} - -static void c2_endio(struct request *req, int error) -{ - struct rdac_handler *h = req->end_io_data; - struct c2_inquiry *sp; - - if (had_failures(req, error)) { - dm_pg_init_complete(h->path, MP_FAIL_PATH); - goto done; - } - - sp = &h->inq.c2; - - /* If more than MODE6_MAX_LUN luns are supported, use mode select 10 */ - if (sp->max_lun_supported >= MODE6_MAX_LUN) - h->ctlr->use_10_ms = 1; - else - h->ctlr->use_10_ms = 0; - - h->cmd_to_send = SEND_MODE_SELECT; - queue_work(rdac_wkqd, &h->work); -done: - __blk_put_request(req->q, req); -} - -static void c9_endio(struct request *req, int error) -{ - struct rdac_handler *h = req->end_io_data; - struct c9_inquiry *sp; - - if (had_failures(req, error)) { - dm_pg_init_complete(h->path, MP_FAIL_PATH); - goto done; - } - - /* We need to look at the sense keys here to take clear action. - * For now simple logic: If the host is in AVT mode or if controller - * owns the lun, return dm_pg_init_complete(), otherwise submit - * MODE SELECT. - */ - sp = &h->inq.c9; - - /* If in AVT mode, return success */ - if ((sp->avte_cvp >> 7) == 0x1) { - dm_pg_init_complete(h->path, 0); - goto done; - } - - /* If the controller on this path owns the LUN, return success */ - if (sp->avte_cvp & 0x1) { - dm_pg_init_complete(h->path, 0); - goto done; - } - - if (h->ctlr) { - if (h->ctlr->use_10_ms == -1) - h->cmd_to_send = SEND_C2_INQUIRY; - else - h->cmd_to_send = SEND_MODE_SELECT; - } else - h->cmd_to_send = SEND_C4_INQUIRY; - queue_work(rdac_wkqd, &h->work); -done: - __blk_put_request(req->q, req); -} - -static void c8_endio(struct request *req, int error) -{ - struct rdac_handler *h = req->end_io_data; - struct c8_inquiry *sp; - - if (had_failures(req, error)) { - dm_pg_init_complete(h->path, MP_FAIL_PATH); - goto done; - } - - /* We need to look at the sense keys here to take clear action. - * For now simple logic: Get the lun from the inquiry page. - */ - sp = &h->inq.c8; - h->lun = sp->lun[7]; /* currently it uses only one byte */ - h->cmd_to_send = SEND_C9_INQUIRY; - queue_work(rdac_wkqd, &h->work); -done: - __blk_put_request(req->q, req); -} - -static void submit_inquiry(struct rdac_handler *h, int page_code, - unsigned int len, rq_end_io_fn endio) -{ - struct request *rq; - struct request_queue *q = bdev_get_queue(h->path->dev->bdev); - - if (!q) - goto fail_path; - - rq = get_rdac_req(h, &h->inq, len, READ); - if (!rq) - goto fail_path; - - /* Prepare the command. */ - rq->cmd[0] = INQUIRY; - rq->cmd[1] = 1; - rq->cmd[2] = page_code; - rq->cmd[4] = len; - rq->cmd_len = COMMAND_SIZE(INQUIRY); - blk_execute_rq_nowait(q, NULL, rq, 1, endio); - return; - -fail_path: - dm_pg_init_complete(h->path, MP_FAIL_PATH); -} - -static void service_wkq(struct work_struct *work) -{ - struct rdac_handler *h = container_of(work, struct rdac_handler, work); - - switch (h->cmd_to_send) { - case SEND_C2_INQUIRY: - submit_inquiry(h, 0xC2, sizeof(struct c2_inquiry), c2_endio); - break; - case SEND_C4_INQUIRY: - submit_inquiry(h, 0xC4, sizeof(struct c4_inquiry), c4_endio); - break; - case SEND_C8_INQUIRY: - submit_inquiry(h, 0xC8, sizeof(struct c8_inquiry), c8_endio); - break; - case SEND_C9_INQUIRY: - submit_inquiry(h, 0xC9, sizeof(struct c9_inquiry), c9_endio); - break; - case SEND_MODE_SELECT: - submit_mode_select(h); - break; - default: - BUG(); - } -} -/* - * only support subpage2c until we confirm that this is just a matter of - * of updating firmware or not, and RDAC (basic AVT works already) for now - * but we can add these in in when we get time and testers - */ -static int rdac_create(struct hw_handler *hwh, unsigned argc, char **argv) -{ - struct rdac_handler *h; - unsigned timeout; - - if (argc == 0) { - /* No arguments: use defaults */ - timeout = RDAC_FAILOVER_TIMEOUT; - } else if (argc != 1) { - DMWARN("incorrect number of arguments"); - return -EINVAL; - } else { - if (sscanf(argv[1], "%u", &timeout) != 1) { - DMWARN("invalid timeout value"); - return -EINVAL; - } - } - - h = kzalloc(sizeof(*h), GFP_KERNEL); - if (!h) - return -ENOMEM; - - hwh->context = h; - h->timeout = timeout; - h->lun = UNINITIALIZED_LUN; - INIT_WORK(&h->work, service_wkq); - DMWARN("using RDAC command with timeout %u", h->timeout); - - return 0; -} - -static void rdac_destroy(struct hw_handler *hwh) -{ - struct rdac_handler *h = hwh->context; - - if (h->ctlr) - kref_put(&h->ctlr->kref, release_ctlr); - kfree(h); - hwh->context = NULL; -} - -static unsigned rdac_error(struct hw_handler *hwh, struct bio *bio) -{ - /* Try default handler */ - return dm_scsi_err_handler(hwh, bio); -} - -static void rdac_pg_init(struct hw_handler *hwh, unsigned bypassed, - struct dm_path *path) -{ - struct rdac_handler *h = hwh->context; - - h->path = path; - switch (h->lun) { - case UNINITIALIZED_LUN: - submit_inquiry(h, 0xC8, sizeof(struct c8_inquiry), c8_endio); - break; - default: - submit_inquiry(h, 0xC9, sizeof(struct c9_inquiry), c9_endio); - } -} - -static struct hw_handler_type rdac_handler = { - .name = RDAC_DM_HWH_NAME, - .module = THIS_MODULE, - .create = rdac_create, - .destroy = rdac_destroy, - .pg_init = rdac_pg_init, - .error = rdac_error, -}; - -static int __init rdac_init(void) -{ - int r; - - rdac_wkqd = create_singlethread_workqueue("rdac_wkqd"); - if (!rdac_wkqd) { - DMERR("Failed to create workqueue rdac_wkqd."); - return -ENOMEM; - } - - r = dm_register_hw_handler(&rdac_handler); - if (r < 0) { - DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r); - destroy_workqueue(rdac_wkqd); - return r; - } - - DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER); - return 0; -} - -static void __exit rdac_exit(void) -{ - int r = dm_unregister_hw_handler(&rdac_handler); - - destroy_workqueue(rdac_wkqd); - if (r < 0) - DMERR("%s: unregister failed %d", RDAC_DM_HWH_NAME, r); -} - -module_init(rdac_init); -module_exit(rdac_exit); - -MODULE_DESCRIPTION("DM Multipath LSI/Engenio RDAC support"); -MODULE_AUTHOR("Mike Christie, Chandra Seetharaman"); -MODULE_LICENSE("GPL"); -MODULE_VERSION(RDAC_DM_HWH_VER); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e7ee59e655d..71dd65aa31b 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -7,7 +7,6 @@ #include "dm.h" #include "dm-path-selector.h" -#include "dm-hw-handler.h" #include "dm-bio-list.h" #include "dm-bio-record.h" #include "dm-uevent.h" @@ -20,6 +19,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/workqueue.h> +#include <scsi/scsi_dh.h> #include <asm/atomic.h> #define DM_MSG_PREFIX "multipath" @@ -61,7 +61,8 @@ struct multipath { spinlock_t lock; - struct hw_handler hw_handler; + const char *hw_handler_name; + struct work_struct activate_path; unsigned nr_priority_groups; struct list_head priority_groups; unsigned pg_init_required; /* pg_init needs calling? */ @@ -106,9 +107,10 @@ typedef int (*action_fn) (struct pgpath *pgpath); static struct kmem_cache *_mpio_cache; -static struct workqueue_struct *kmultipathd; +static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void process_queued_ios(struct work_struct *work); static void trigger_event(struct work_struct *work); +static void activate_path(struct work_struct *work); /*----------------------------------------------- @@ -145,9 +147,12 @@ static struct priority_group *alloc_priority_group(void) static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) { struct pgpath *pgpath, *tmp; + struct multipath *m = ti->private; list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { list_del(&pgpath->list); + if (m->hw_handler_name) + scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); dm_put_device(ti, pgpath->path.dev); free_pgpath(pgpath); } @@ -178,6 +183,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) m->queue_io = 1; INIT_WORK(&m->process_queued_ios, process_queued_ios); INIT_WORK(&m->trigger_event, trigger_event); + INIT_WORK(&m->activate_path, activate_path); m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); if (!m->mpio_pool) { kfree(m); @@ -193,18 +199,13 @@ static struct multipath *alloc_multipath(struct dm_target *ti) static void free_multipath(struct multipath *m) { struct priority_group *pg, *tmp; - struct hw_handler *hwh = &m->hw_handler; list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { list_del(&pg->list); free_priority_group(pg, m->ti); } - if (hwh->type) { - hwh->type->destroy(hwh); - dm_put_hw_handler(hwh->type); - } - + kfree(m->hw_handler_name); mempool_destroy(m->mpio_pool); kfree(m); } @@ -216,12 +217,10 @@ static void free_multipath(struct multipath *m) static void __switch_pg(struct multipath *m, struct pgpath *pgpath) { - struct hw_handler *hwh = &m->hw_handler; - m->current_pg = pgpath->pg; /* Must we initialise the PG first, and queue I/O till it's ready? */ - if (hwh->type && hwh->type->pg_init) { + if (m->hw_handler_name) { m->pg_init_required = 1; m->queue_io = 1; } else { @@ -409,7 +408,6 @@ static void process_queued_ios(struct work_struct *work) { struct multipath *m = container_of(work, struct multipath, process_queued_ios); - struct hw_handler *hwh = &m->hw_handler; struct pgpath *pgpath = NULL; unsigned init_required = 0, must_queue = 1; unsigned long flags; @@ -439,7 +437,7 @@ out: spin_unlock_irqrestore(&m->lock, flags); if (init_required) - hwh->type->pg_init(hwh, pgpath->pg->bypassed, &pgpath->path); + queue_work(kmpath_handlerd, &m->activate_path); if (!must_queue) dispatch_queued_ios(m); @@ -530,8 +528,10 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg, } r = read_param(_params, shift(as), &ps_argc, &ti->error); - if (r) + if (r) { + dm_put_path_selector(pst); return -EINVAL; + } r = pst->create(&pg->ps, ps_argc, as->argv); if (r) { @@ -551,6 +551,7 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, { int r; struct pgpath *p; + struct multipath *m = ti->private; /* we need at least a path arg */ if (as->argc < 1) { @@ -569,6 +570,15 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, goto bad; } + if (m->hw_handler_name) { + r = scsi_dh_attach(bdev_get_queue(p->path.dev->bdev), + m->hw_handler_name); + if (r < 0) { + dm_put_device(ti, p->path.dev); + goto bad; + } + } + r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); if (r) { dm_put_device(ti, p->path.dev); @@ -628,8 +638,10 @@ static struct priority_group *parse_priority_group(struct arg_set *as, struct pgpath *pgpath; struct arg_set path_args; - if (as->argc < nr_params) + if (as->argc < nr_params) { + ti->error = "not enough path parameters"; goto bad; + } path_args.argc = nr_params; path_args.argv = as->argv; @@ -652,8 +664,6 @@ static struct priority_group *parse_priority_group(struct arg_set *as, static int parse_hw_handler(struct arg_set *as, struct multipath *m) { - int r; - struct hw_handler_type *hwht; unsigned hw_argc; struct dm_target *ti = m->ti; @@ -661,30 +671,20 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m) {0, 1024, "invalid number of hardware handler args"}, }; - r = read_param(_params, shift(as), &hw_argc, &ti->error); - if (r) + if (read_param(_params, shift(as), &hw_argc, &ti->error)) return -EINVAL; if (!hw_argc) return 0; - hwht = dm_get_hw_handler(shift(as)); - if (!hwht) { + m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL); + request_module("scsi_dh_%s", m->hw_handler_name); + if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { ti->error = "unknown hardware handler type"; + kfree(m->hw_handler_name); + m->hw_handler_name = NULL; return -EINVAL; } - - m->hw_handler.md = dm_table_get_md(ti->table); - dm_put(m->hw_handler.md); - - r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv); - if (r) { - dm_put_hw_handler(hwht); - ti->error = "hardware handler constructor failed"; - return r; - } - - m->hw_handler.type = hwht; consume(as, hw_argc - 1); return 0; @@ -808,6 +808,7 @@ static void multipath_dtr(struct dm_target *ti) { struct multipath *m = (struct multipath *) ti->private; + flush_workqueue(kmpath_handlerd); flush_workqueue(kmultipathd); free_multipath(m); } @@ -883,7 +884,7 @@ static int reinstate_path(struct pgpath *pgpath) if (pgpath->path.is_active) goto out; - if (!pgpath->pg->ps.type) { + if (!pgpath->pg->ps.type->reinstate_path) { DMWARN("Reinstate path not supported by path selector %s", pgpath->pg->ps.type->name); r = -EINVAL; @@ -1025,52 +1026,85 @@ static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) return limit_reached; } -/* - * pg_init must call this when it has completed its initialisation - */ -void dm_pg_init_complete(struct dm_path *path, unsigned err_flags) +static void pg_init_done(struct dm_path *path, int errors) { struct pgpath *pgpath = path_to_pgpath(path); struct priority_group *pg = pgpath->pg; struct multipath *m = pg->m; unsigned long flags; - /* - * If requested, retry pg_init until maximum number of retries exceeded. - * If retry not requested and PG already bypassed, always fail the path. - */ - if (err_flags & MP_RETRY) { - if (pg_init_limit_reached(m, pgpath)) - err_flags |= MP_FAIL_PATH; - } else if (err_flags && pg->bypassed) - err_flags |= MP_FAIL_PATH; - - if (err_flags & MP_FAIL_PATH) + /* device or driver problems */ + switch (errors) { + case SCSI_DH_OK: + break; + case SCSI_DH_NOSYS: + if (!m->hw_handler_name) { + errors = 0; + break; + } + DMERR("Cannot failover device because scsi_dh_%s was not " + "loaded.", m->hw_handler_name); + /* + * Fail path for now, so we do not ping pong + */ fail_path(pgpath); - - if (err_flags & MP_BYPASS_PG) + break; + case SCSI_DH_DEV_TEMP_BUSY: + /* + * Probably doing something like FW upgrade on the + * controller so try the other pg. + */ bypass_pg(m, pg, 1); + break; + /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */ + case SCSI_DH_RETRY: + case SCSI_DH_IMM_RETRY: + case SCSI_DH_RES_TEMP_UNAVAIL: + if (pg_init_limit_reached(m, pgpath)) + fail_path(pgpath); + errors = 0; + break; + default: + /* + * We probably do not want to fail the path for a device + * error, but this is what the old dm did. In future + * patches we can do more advanced handling. + */ + fail_path(pgpath); + } spin_lock_irqsave(&m->lock, flags); - if (err_flags & ~MP_RETRY) { + if (errors) { + DMERR("Could not failover device. Error %d.", errors); m->current_pgpath = NULL; m->current_pg = NULL; - } else if (!m->pg_init_required) + } else if (!m->pg_init_required) { m->queue_io = 0; + pg->bypassed = 0; + } m->pg_init_in_progress = 0; queue_work(kmultipathd, &m->process_queued_ios); spin_unlock_irqrestore(&m->lock, flags); } +static void activate_path(struct work_struct *work) +{ + int ret; + struct multipath *m = + container_of(work, struct multipath, activate_path); + struct dm_path *path = &m->current_pgpath->path; + + ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev)); + pg_init_done(path, ret); +} + /* * end_io handling */ static int do_end_io(struct multipath *m, struct bio *bio, int error, struct dm_mpath_io *mpio) { - struct hw_handler *hwh = &m->hw_handler; - unsigned err_flags = MP_FAIL_PATH; /* Default behavior */ unsigned long flags; if (!error) @@ -1097,19 +1131,8 @@ static int do_end_io(struct multipath *m, struct bio *bio, } spin_unlock_irqrestore(&m->lock, flags); - if (hwh->type && hwh->type->error) - err_flags = hwh->type->error(hwh, bio); - - if (mpio->pgpath) { - if (err_flags & MP_FAIL_PATH) - fail_path(mpio->pgpath); - - if (err_flags & MP_BYPASS_PG) - bypass_pg(m, mpio->pgpath->pg, 1); - } - - if (err_flags & MP_ERROR_IO) - return -EIO; + if (mpio->pgpath) + fail_path(mpio->pgpath); requeue: dm_bio_restore(&mpio->details, bio); @@ -1194,7 +1217,6 @@ static int multipath_status(struct dm_target *ti, status_type_t type, int sz = 0; unsigned long flags; struct multipath *m = (struct multipath *) ti->private; - struct hw_handler *hwh = &m->hw_handler; struct priority_group *pg; struct pgpath *p; unsigned pg_num; @@ -1214,12 +1236,10 @@ static int multipath_status(struct dm_target *ti, status_type_t type, DMEMIT("pg_init_retries %u ", m->pg_init_retries); } - if (hwh->type && hwh->type->status) - sz += hwh->type->status(hwh, type, result + sz, maxlen - sz); - else if (!hwh->type || type == STATUSTYPE_INFO) + if (!m->hw_handler_name || type == STATUSTYPE_INFO) DMEMIT("0 "); else - DMEMIT("1 %s ", hwh->type->name); + DMEMIT("1 %s ", m->hw_handler_name); DMEMIT("%u ", m->nr_priority_groups); @@ -1422,6 +1442,21 @@ static int __init dm_multipath_init(void) return -ENOMEM; } + /* + * A separate workqueue is used to handle the device handlers + * to avoid overloading existing workqueue. Overloading the + * old workqueue would also create a bottleneck in the + * path of the storage hardware device activation. + */ + kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd"); + if (!kmpath_handlerd) { + DMERR("failed to create workqueue kmpath_handlerd"); + destroy_workqueue(kmultipathd); + dm_unregister_target(&multipath_target); + kmem_cache_destroy(_mpio_cache); + return -ENOMEM; + } + DMINFO("version %u.%u.%u loaded", multipath_target.version[0], multipath_target.version[1], multipath_target.version[2]); @@ -1433,6 +1468,7 @@ static void __exit dm_multipath_exit(void) { int r; + destroy_workqueue(kmpath_handlerd); destroy_workqueue(kmultipathd); r = dm_unregister_target(&multipath_target); @@ -1441,8 +1477,6 @@ static void __exit dm_multipath_exit(void) kmem_cache_destroy(_mpio_cache); } -EXPORT_SYMBOL_GPL(dm_pg_init_complete); - module_init(dm_multipath_init); module_exit(dm_multipath_exit); diff --git a/drivers/md/dm-mpath.h b/drivers/md/dm-mpath.h index b9cdcbb3ed5..c198b856a45 100644 --- a/drivers/md/dm-mpath.h +++ b/drivers/md/dm-mpath.h @@ -16,7 +16,6 @@ struct dm_path { unsigned is_active; /* Read-only */ void *pscontext; /* For path-selector use */ - void *hwhcontext; /* For hw-handler use */ }; /* Callback for hwh_pg_init_fn to use when complete */ diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 1ba8a47d61b..6e5528aecc9 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -40,6 +40,11 @@ */ #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) +/* + * The size of the mempool used to track chunks in use. + */ +#define MIN_IOS 256 + static struct workqueue_struct *ksnapd; static void flush_queued_bios(struct work_struct *work); @@ -91,7 +96,63 @@ struct dm_snap_pending_exception { */ static struct kmem_cache *exception_cache; static struct kmem_cache *pending_cache; -static mempool_t *pending_pool; + +struct dm_snap_tracked_chunk { + struct hlist_node node; + chunk_t chunk; +}; + +static struct kmem_cache *tracked_chunk_cache; + +static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, + chunk_t chunk) +{ + struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, + GFP_NOIO); + unsigned long flags; + + c->chunk = chunk; + + spin_lock_irqsave(&s->tracked_chunk_lock, flags); + hlist_add_head(&c->node, + &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); + spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); + + return c; +} + +static void stop_tracking_chunk(struct dm_snapshot *s, + struct dm_snap_tracked_chunk *c) +{ + unsigned long flags; + + spin_lock_irqsave(&s->tracked_chunk_lock, flags); + hlist_del(&c->node); + spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); + + mempool_free(c, s->tracked_chunk_pool); +} + +static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) +{ + struct dm_snap_tracked_chunk *c; + struct hlist_node *hn; + int found = 0; + + spin_lock_irq(&s->tracked_chunk_lock); + + hlist_for_each_entry(c, hn, + &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { + if (c->chunk == chunk) { + found = 1; + break; + } + } + + spin_unlock_irq(&s->tracked_chunk_lock); + + return found; +} /* * One of these per registered origin, held in the snapshot_origins hash @@ -302,14 +363,19 @@ static void free_exception(struct dm_snap_exception *e) kmem_cache_free(exception_cache, e); } -static struct dm_snap_pending_exception *alloc_pending_exception(void) +static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) { - return mempool_alloc(pending_pool, GFP_NOIO); + struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, + GFP_NOIO); + + pe->snap = s; + + return pe; } static void free_pending_exception(struct dm_snap_pending_exception *pe) { - mempool_free(pe, pending_pool); + mempool_free(pe, pe->snap->pending_pool); } static void insert_completed_exception(struct dm_snapshot *s, @@ -482,6 +548,7 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct dm_snapshot *s; + int i; int r = -EINVAL; char persistent; char *origin_path; @@ -564,11 +631,30 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad5; } + s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); + if (!s->pending_pool) { + ti->error = "Could not allocate mempool for pending exceptions"; + goto bad6; + } + + s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, + tracked_chunk_cache); + if (!s->tracked_chunk_pool) { + ti->error = "Could not allocate tracked_chunk mempool for " + "tracking reads"; + goto bad_tracked_chunk_pool; + } + + for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) + INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); + + spin_lock_init(&s->tracked_chunk_lock); + /* Metadata must only be loaded into one table at once */ r = s->store.read_metadata(&s->store); if (r < 0) { ti->error = "Failed to read snapshot metadata"; - goto bad6; + goto bad_load_and_register; } else if (r > 0) { s->valid = 0; DMWARN("Snapshot is marked invalid."); @@ -582,7 +668,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (register_snapshot(s)) { r = -EINVAL; ti->error = "Cannot register snapshot origin"; - goto bad6; + goto bad_load_and_register; } ti->private = s; @@ -590,6 +676,12 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) return 0; + bad_load_and_register: + mempool_destroy(s->tracked_chunk_pool); + + bad_tracked_chunk_pool: + mempool_destroy(s->pending_pool); + bad6: dm_kcopyd_client_destroy(s->kcopyd_client); @@ -624,6 +716,9 @@ static void __free_exceptions(struct dm_snapshot *s) static void snapshot_dtr(struct dm_target *ti) { +#ifdef CONFIG_DM_DEBUG + int i; +#endif struct dm_snapshot *s = ti->private; flush_workqueue(ksnapd); @@ -632,8 +727,17 @@ static void snapshot_dtr(struct dm_target *ti) /* After this returns there can be no new kcopyd jobs. */ unregister_snapshot(s); +#ifdef CONFIG_DM_DEBUG + for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) + BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); +#endif + + mempool_destroy(s->tracked_chunk_pool); + __free_exceptions(s); + mempool_destroy(s->pending_pool); + dm_put_device(ti, s->origin); dm_put_device(ti, s->cow); @@ -772,6 +876,13 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) } /* + * Check for conflicting reads. This is extremely improbable, + * so yield() is sufficient and there is no need for a wait queue. + */ + while (__chunk_is_tracked(s, pe->e.old_chunk)) + yield(); + + /* * Add a proper exception, and remove the * in-flight exception from the list. */ @@ -873,7 +984,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) * to hold the lock while we do this. */ up_write(&s->lock); - pe = alloc_pending_exception(); + pe = alloc_pending_exception(s); down_write(&s->lock); if (!s->valid) { @@ -893,7 +1004,6 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) bio_list_init(&pe->snapshot_bios); pe->primary_pe = NULL; atomic_set(&pe->ref_count, 0); - pe->snap = s; pe->started = 0; if (s->store.prepare_exception(&s->store, &pe->e)) { @@ -974,14 +1084,10 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, start_copy(pe); goto out; } - } else - /* - * FIXME: this read path scares me because we - * always use the origin when we have a pending - * exception. However I can't think of a - * situation where this is wrong - ejt. - */ + } else { bio->bi_bdev = s->origin->bdev; + map_context->ptr = track_chunk(s, chunk); + } out_unlock: up_write(&s->lock); @@ -989,6 +1095,18 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, return r; } +static int snapshot_end_io(struct dm_target *ti, struct bio *bio, + int error, union map_info *map_context) +{ + struct dm_snapshot *s = ti->private; + struct dm_snap_tracked_chunk *c = map_context->ptr; + + if (c) + stop_tracking_chunk(s, c); + + return 0; +} + static void snapshot_resume(struct dm_target *ti) { struct dm_snapshot *s = ti->private; @@ -1266,6 +1384,7 @@ static struct target_type snapshot_target = { .ctr = snapshot_ctr, .dtr = snapshot_dtr, .map = snapshot_map, + .end_io = snapshot_end_io, .resume = snapshot_resume, .status = snapshot_status, }; @@ -1306,9 +1425,9 @@ static int __init dm_snapshot_init(void) goto bad4; } - pending_pool = mempool_create_slab_pool(128, pending_cache); - if (!pending_pool) { - DMERR("Couldn't create pending pool."); + tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); + if (!tracked_chunk_cache) { + DMERR("Couldn't create cache to track chunks in use."); r = -ENOMEM; goto bad5; } @@ -1317,13 +1436,13 @@ static int __init dm_snapshot_init(void) if (!ksnapd) { DMERR("Failed to create ksnapd workqueue."); r = -ENOMEM; - goto bad6; + goto bad_pending_pool; } return 0; - bad6: - mempool_destroy(pending_pool); + bad_pending_pool: + kmem_cache_destroy(tracked_chunk_cache); bad5: kmem_cache_destroy(pending_cache); bad4: @@ -1352,9 +1471,9 @@ static void __exit dm_snapshot_exit(void) DMERR("origin unregister failed %d", r); exit_origin_hash(); - mempool_destroy(pending_pool); kmem_cache_destroy(pending_cache); kmem_cache_destroy(exception_cache); + kmem_cache_destroy(tracked_chunk_cache); } /* Module hooks */ diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 24f9fb73b98..292c15609ae 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -130,6 +130,10 @@ struct exception_store { void *context; }; +#define DM_TRACKED_CHUNK_HASH_SIZE 16 +#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ + (DM_TRACKED_CHUNK_HASH_SIZE - 1)) + struct dm_snapshot { struct rw_semaphore lock; struct dm_target *ti; @@ -157,6 +161,8 @@ struct dm_snapshot { /* The last percentage we notified */ int last_percent; + mempool_t *pending_pool; + struct exception_table pending; struct exception_table complete; @@ -174,6 +180,11 @@ struct dm_snapshot { /* Queue of snapshot writes for ksnapd to flush */ struct bio_list queued_bios; struct work_struct queued_bios_work; + + /* Chunks with outstanding reads */ + mempool_t *tracked_chunk_pool; + spinlock_t tracked_chunk_lock; + struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; }; /* diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 94116eaf470..61f44140923 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -316,29 +316,12 @@ static inline int check_space(struct dm_table *t) */ static int lookup_device(const char *path, dev_t *dev) { - int r; - struct nameidata nd; - struct inode *inode; - - if ((r = path_lookup(path, LOOKUP_FOLLOW, &nd))) - return r; - - inode = nd.path.dentry->d_inode; - if (!inode) { - r = -ENOENT; - goto out; - } - - if (!S_ISBLK(inode->i_mode)) { - r = -ENOTBLK; - goto out; - } - - *dev = inode->i_rdev; - - out: - path_put(&nd.path); - return r; + struct block_device *bdev = lookup_bdev(path); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + *dev = bdev->bd_dev; + bdput(bdev); + return 0; } /* @@ -506,14 +489,13 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) rs->max_sectors = min_not_zero(rs->max_sectors, q->max_sectors); - /* FIXME: Device-Mapper on top of RAID-0 breaks because DM - * currently doesn't honor MD's merge_bvec_fn routine. - * In this case, we'll force DM to use PAGE_SIZE or - * smaller I/O, just to be safe. A better fix is in the - * works, but add this for the time being so it will at - * least operate correctly. + /* + * Check if merge fn is supported. + * If not we'll force DM to use PAGE_SIZE or + * smaller I/O, just to be safe. */ - if (q->merge_bvec_fn) + + if (q->merge_bvec_fn && !ti->type->merge) rs->max_sectors = min_not_zero(rs->max_sectors, (unsigned int) (PAGE_SIZE >> 9)); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 372369b1cc2..bca448e1187 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -37,8 +37,8 @@ static DEFINE_SPINLOCK(_minor_lock); struct dm_io { struct mapped_device *md; int error; - struct bio *bio; atomic_t io_count; + struct bio *bio; unsigned long start_time; }; @@ -829,6 +829,49 @@ static int __split_bio(struct mapped_device *md, struct bio *bio) * CRUD END *---------------------------------------------------------------*/ +static int dm_merge_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) +{ + struct mapped_device *md = q->queuedata; + struct dm_table *map = dm_get_table(md); + struct dm_target *ti; + sector_t max_sectors; + int max_size; + + if (unlikely(!map)) + return 0; + + ti = dm_table_find_target(map, bvm->bi_sector); + + /* + * Find maximum amount of I/O that won't need splitting + */ + max_sectors = min(max_io_len(md, bvm->bi_sector, ti), + (sector_t) BIO_MAX_SECTORS); + max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; + if (max_size < 0) + max_size = 0; + + /* + * merge_bvec_fn() returns number of bytes + * it can accept at this offset + * max is precomputed maximal io size + */ + if (max_size && ti->type->merge) + max_size = ti->type->merge(ti, bvm, biovec, max_size); + + /* + * Always allow an entire first page + */ + if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) + max_size = biovec->bv_len; + + dm_table_put(map); + + return max_size; +} + /* * The request function that just remaps the bio built up by * dm_merge_bvec. @@ -1032,6 +1075,7 @@ static struct mapped_device *alloc_dev(int minor) blk_queue_make_request(md->queue, dm_request); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); md->queue->unplug_fn = dm_unplug_all; + blk_queue_merge_bvec(md->queue, dm_merge_bvec); md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); if (!md->io_pool) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 8c03b634e62..1e59a0b0a78 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -100,12 +100,6 @@ int dm_lock_for_deletion(struct mapped_device *md); void dm_kobject_uevent(struct mapped_device *md); -/* - * Dirty log - */ -int dm_dirty_log_init(void); -void dm_dirty_log_exit(void); - int dm_kcopyd_init(void); void dm_kcopyd_exit(void); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 1cafaa95944..b1eebf88c20 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -50,17 +50,19 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) /** * linear_mergeable_bvec -- tell bio layer if two requests can be merged * @q: request queue - * @bio: the buffer head that's been built up so far + * @bvm: properties of new bio * @biovec: the request that could be merged to it. * * Return amount of bytes we can take at this offset */ -static int linear_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) +static int linear_mergeable_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; dev_info_t *dev0; - unsigned long maxsectors, bio_sectors = bio->bi_size >> 9; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9; + sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); dev0 = which_dev(mddev, sector); maxsectors = (dev0->size << 1) - (sector - (dev0->offset<<1)); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 2f30ebd8b7a..18361063566 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -241,18 +241,20 @@ static int create_strip_zones (mddev_t *mddev) /** * raid0_mergeable_bvec -- tell bio layer if a two requests can be merged * @q: request queue - * @bio: the buffer head that's been built up so far + * @bvm: properties of new bio * @biovec: the request that could be merged to it. * * Return amount of bytes we can accept at this offset */ -static int raid0_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) +static int raid0_mergeable_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; unsigned int chunk_sectors = mddev->chunk_size >> 9; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bvm->bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8674a5f7e70..d41bebb6da0 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -442,26 +442,27 @@ static sector_t raid10_find_virt(conf_t *conf, sector_t sector, int dev) /** * raid10_mergeable_bvec -- tell bio layer if a two requests can be merged * @q: request queue - * @bio: the buffer head that's been built up so far + * @bvm: properties of new bio * @biovec: the request that could be merged to it. * * Return amount of bytes we can accept at this offset * If near_copies == raid_disk, there are no striping issues, * but in that case, the function isn't called at all. */ -static int raid10_mergeable_bvec(struct request_queue *q, struct bio *bio, - struct bio_vec *bio_vec) +static int raid10_mergeable_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; unsigned int chunk_sectors = mddev->chunk_size >> 9; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bvm->bi_size >> 9; max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; if (max < 0) max = 0; /* bio_add cannot handle a negative return */ - if (max <= bio_vec->bv_len && bio_sectors == 0) - return bio_vec->bv_len; + if (max <= biovec->bv_len && bio_sectors == 0) + return biovec->bv_len; else return max; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b428e15d59a..40e93967565 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3079,15 +3079,17 @@ static int raid5_congested(void *data, int bits) /* We want read requests to align with chunks where possible, * but write requests don't need to. */ -static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) +static int raid5_mergeable_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; unsigned int chunk_sectors = mddev->chunk_size >> 9; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bvm->bi_size >> 9; - if (bio_data_dir(bio) == WRITE) + if ((bvm->bi_rw & 1) == WRITE) return biovec->bv_len; /* always allow writes to be mergeable */ max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; |