aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/dm-snap.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-snap.c')
-rw-r--r--drivers/md/dm-snap.c351
1 files changed, 179 insertions, 172 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 1d0fafda0f7..5281e009407 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -39,6 +39,9 @@
*/
#define SNAPSHOT_PAGES 256
+struct workqueue_struct *ksnapd;
+static void flush_queued_bios(void *data);
+
struct pending_exception {
struct exception e;
@@ -56,7 +59,7 @@ struct pending_exception {
/*
* The primary pending_exception is the one that holds
- * the sibling_count and the list of origin_bios for a
+ * the ref_count and the list of origin_bios for a
* group of pending_exceptions. It is always last to get freed.
* These fields get set up when writing to the origin.
*/
@@ -69,7 +72,7 @@ struct pending_exception {
* the sibling concerned and not pe->primary_pe->snap->lock unless
* they are the same.
*/
- atomic_t sibling_count;
+ atomic_t ref_count;
/* Pointer back to snapshot context */
struct dm_snapshot *snap;
@@ -387,15 +390,46 @@ static inline ulong round_up(ulong n, ulong size)
return (n + size) & ~size;
}
-static void read_snapshot_metadata(struct dm_snapshot *s)
+static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
+ char **error)
{
- if (s->store.read_metadata(&s->store)) {
- down_write(&s->lock);
- s->valid = 0;
- up_write(&s->lock);
+ unsigned long chunk_size;
+ char *value;
+
+ chunk_size = simple_strtoul(chunk_size_arg, &value, 10);
+ if (*chunk_size_arg == '\0' || *value != '\0') {
+ *error = "Invalid chunk size";
+ return -EINVAL;
+ }
+
+ if (!chunk_size) {
+ s->chunk_size = s->chunk_mask = s->chunk_shift = 0;
+ return 0;
+ }
+
+ /*
+ * Chunk size must be multiple of page size. Silently
+ * round up if it's not.
+ */
+ chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
+
+ /* Check chunk_size is a power of 2 */
+ if (chunk_size & (chunk_size - 1)) {
+ *error = "Chunk size is not a power of 2";
+ return -EINVAL;
+ }
- dm_table_event(s->table);
+ /* Validate the chunk size against the device block size */
+ if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
+ *error = "Chunk size is not a multiple of device blocksize";
+ return -EINVAL;
}
+
+ s->chunk_size = chunk_size;
+ s->chunk_mask = chunk_size - 1;
+ s->chunk_shift = ffs(chunk_size) - 1;
+
+ return 0;
}
/*
@@ -404,15 +438,12 @@ static void read_snapshot_metadata(struct dm_snapshot *s)
static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct dm_snapshot *s;
- unsigned long chunk_size;
int r = -EINVAL;
char persistent;
char *origin_path;
char *cow_path;
- char *value;
- int blocksize;
- if (argc < 4) {
+ if (argc != 4) {
ti->error = "requires exactly 4 arguments";
r = -EINVAL;
goto bad1;
@@ -428,13 +459,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad1;
}
- chunk_size = simple_strtoul(argv[3], &value, 10);
- if (chunk_size == 0 || value == NULL) {
- ti->error = "Invalid chunk size";
- r = -EINVAL;
- goto bad1;
- }
-
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s == NULL) {
ti->error = "Cannot allocate snapshot context private "
@@ -457,36 +481,17 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad2;
}
- /*
- * Chunk size must be multiple of page size. Silently
- * round up if it's not.
- */
- chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
-
- /* Validate the chunk size against the device block size */
- blocksize = s->cow->bdev->bd_disk->queue->hardsect_size;
- if (chunk_size % (blocksize >> 9)) {
- ti->error = "Chunk size is not a multiple of device blocksize";
- r = -EINVAL;
- goto bad3;
- }
-
- /* Check chunk_size is a power of 2 */
- if (chunk_size & (chunk_size - 1)) {
- ti->error = "Chunk size is not a power of 2";
- r = -EINVAL;
+ r = set_chunk_size(s, argv[3], &ti->error);
+ if (r)
goto bad3;
- }
- s->chunk_size = chunk_size;
- s->chunk_mask = chunk_size - 1;
s->type = persistent;
- s->chunk_shift = ffs(chunk_size) - 1;
s->valid = 1;
s->active = 0;
s->last_percent = 0;
init_rwsem(&s->lock);
+ spin_lock_init(&s->pe_lock);
s->table = ti->table;
/* Allocate hash table for COW data */
@@ -496,16 +501,12 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad3;
}
- /*
- * Check the persistent flag - done here because we need the iobuf
- * to check the LV header
- */
s->store.snap = s;
if (persistent == 'P')
- r = dm_create_persistent(&s->store, chunk_size);
+ r = dm_create_persistent(&s->store);
else
- r = dm_create_transient(&s->store, s, blocksize);
+ r = dm_create_transient(&s->store);
if (r) {
ti->error = "Couldn't create exception store";
@@ -520,7 +521,14 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
/* Metadata must only be loaded into one table at once */
- read_snapshot_metadata(s);
+ r = s->store.read_metadata(&s->store);
+ if (r) {
+ ti->error = "Failed to read snapshot metadata";
+ goto bad6;
+ }
+
+ bio_list_init(&s->queued_bios);
+ INIT_WORK(&s->queued_bios_work, flush_queued_bios, s);
/* Add snapshot to the list of snapshots for this origin */
/* Exceptions aren't triggered till snapshot_resume() is called */
@@ -560,6 +568,8 @@ static void snapshot_dtr(struct dm_target *ti)
{
struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+ flush_workqueue(ksnapd);
+
/* Prevent further origin writes from using this snapshot. */
/* After this returns there can be no new kcopyd jobs. */
unregister_snapshot(s);
@@ -593,6 +603,19 @@ static void flush_bios(struct bio *bio)
}
}
+static void flush_queued_bios(void *data)
+{
+ struct dm_snapshot *s = (struct dm_snapshot *) data;
+ struct bio *queued_bios;
+ unsigned long flags;
+
+ spin_lock_irqsave(&s->pe_lock, flags);
+ queued_bios = bio_list_get(&s->queued_bios);
+ spin_unlock_irqrestore(&s->pe_lock, flags);
+
+ flush_bios(queued_bios);
+}
+
/*
* Error a list of buffers.
*/
@@ -608,28 +631,7 @@ static void error_bios(struct bio *bio)
}
}
-static inline void error_snapshot_bios(struct pending_exception *pe)
-{
- error_bios(bio_list_get(&pe->snapshot_bios));
-}
-
-static struct bio *__flush_bios(struct pending_exception *pe)
-{
- /*
- * If this pe is involved in a write to the origin and
- * it is the last sibling to complete then release
- * the bios for the original write to the origin.
- */
-
- if (pe->primary_pe &&
- atomic_dec_and_test(&pe->primary_pe->sibling_count))
- return bio_list_get(&pe->primary_pe->origin_bios);
-
- return NULL;
-}
-
-static void __invalidate_snapshot(struct dm_snapshot *s,
- struct pending_exception *pe, int err)
+static void __invalidate_snapshot(struct dm_snapshot *s, int err)
{
if (!s->valid)
return;
@@ -639,9 +641,6 @@ static void __invalidate_snapshot(struct dm_snapshot *s,
else if (err == -ENOMEM)
DMERR("Invalidating snapshot: Unable to allocate exception.");
- if (pe)
- remove_exception(&pe->e);
-
if (s->store.drop_snapshot)
s->store.drop_snapshot(&s->store);
@@ -650,78 +649,95 @@ static void __invalidate_snapshot(struct dm_snapshot *s,
dm_table_event(s->table);
}
+static void get_pending_exception(struct pending_exception *pe)
+{
+ atomic_inc(&pe->ref_count);
+}
+
+static struct bio *put_pending_exception(struct pending_exception *pe)
+{
+ struct pending_exception *primary_pe;
+ struct bio *origin_bios = NULL;
+
+ primary_pe = pe->primary_pe;
+
+ /*
+ * If this pe is involved in a write to the origin and
+ * it is the last sibling to complete then release
+ * the bios for the original write to the origin.
+ */
+ if (primary_pe &&
+ atomic_dec_and_test(&primary_pe->ref_count))
+ origin_bios = bio_list_get(&primary_pe->origin_bios);
+
+ /*
+ * Free the pe if it's not linked to an origin write or if
+ * it's not itself a primary pe.
+ */
+ if (!primary_pe || primary_pe != pe)
+ free_pending_exception(pe);
+
+ /*
+ * Free the primary pe if nothing references it.
+ */
+ if (primary_pe && !atomic_read(&primary_pe->ref_count))
+ free_pending_exception(primary_pe);
+
+ return origin_bios;
+}
+
static void pending_complete(struct pending_exception *pe, int success)
{
struct exception *e;
- struct pending_exception *primary_pe;
struct dm_snapshot *s = pe->snap;
- struct bio *flush = NULL;
+ struct bio *origin_bios = NULL;
+ struct bio *snapshot_bios = NULL;
+ int error = 0;
if (!success) {
/* Read/write error - snapshot is unusable */
down_write(&s->lock);
- __invalidate_snapshot(s, pe, -EIO);
- flush = __flush_bios(pe);
- up_write(&s->lock);
-
- error_snapshot_bios(pe);
+ __invalidate_snapshot(s, -EIO);
+ error = 1;
goto out;
}
e = alloc_exception();
if (!e) {
down_write(&s->lock);
- __invalidate_snapshot(s, pe, -ENOMEM);
- flush = __flush_bios(pe);
- up_write(&s->lock);
-
- error_snapshot_bios(pe);
+ __invalidate_snapshot(s, -ENOMEM);
+ error = 1;
goto out;
}
*e = pe->e;
- /*
- * Add a proper exception, and remove the
- * in-flight exception from the list.
- */
down_write(&s->lock);
if (!s->valid) {
- flush = __flush_bios(pe);
- up_write(&s->lock);
-
free_exception(e);
-
- error_snapshot_bios(pe);
+ error = 1;
goto out;
}
+ /*
+ * Add a proper exception, and remove the
+ * in-flight exception from the list.
+ */
insert_exception(&s->complete, e);
+
+ out:
remove_exception(&pe->e);
- flush = __flush_bios(pe);
+ snapshot_bios = bio_list_get(&pe->snapshot_bios);
+ origin_bios = put_pending_exception(pe);
up_write(&s->lock);
/* Submit any pending write bios */
- flush_bios(bio_list_get(&pe->snapshot_bios));
-
- out:
- primary_pe = pe->primary_pe;
-
- /*
- * Free the pe if it's not linked to an origin write or if
- * it's not itself a primary pe.
- */
- if (!primary_pe || primary_pe != pe)
- free_pending_exception(pe);
-
- /*
- * Free the primary pe if nothing references it.
- */
- if (primary_pe && !atomic_read(&primary_pe->sibling_count))
- free_pending_exception(primary_pe);
+ if (error)
+ error_bios(snapshot_bios);
+ else
+ flush_bios(snapshot_bios);
- if (flush)
- flush_bios(flush);
+ flush_bios(origin_bios);
}
static void commit_callback(void *context, int success)
@@ -822,7 +838,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
bio_list_init(&pe->origin_bios);
bio_list_init(&pe->snapshot_bios);
pe->primary_pe = NULL;
- atomic_set(&pe->sibling_count, 1);
+ atomic_set(&pe->ref_count, 0);
pe->snap = s;
pe->started = 0;
@@ -831,6 +847,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
return NULL;
}
+ get_pending_exception(pe);
insert_exception(&s->pending, &pe->e);
out:
@@ -850,7 +867,6 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
{
struct exception *e;
struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
- int copy_needed = 0;
int r = 1;
chunk_t chunk;
struct pending_exception *pe = NULL;
@@ -865,32 +881,31 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
if (unlikely(bio_barrier(bio)))
return -EOPNOTSUPP;
+ /* FIXME: should only take write lock if we need
+ * to copy an exception */
+ down_write(&s->lock);
+
+ if (!s->valid) {
+ r = -EIO;
+ goto out_unlock;
+ }
+
+ /* If the block is already remapped - use that, else remap it */
+ e = lookup_exception(&s->complete, chunk);
+ if (e) {
+ remap_exception(s, e, bio);
+ goto out_unlock;
+ }
+
/*
* Write to snapshot - higher level takes care of RW/RO
* flags so we should only get this if we are
* writeable.
*/
if (bio_rw(bio) == WRITE) {
-
- /* FIXME: should only take write lock if we need
- * to copy an exception */
- down_write(&s->lock);
-
- if (!s->valid) {
- r = -EIO;
- goto out_unlock;
- }
-
- /* If the block is already remapped - use that, else remap it */
- e = lookup_exception(&s->complete, chunk);
- if (e) {
- remap_exception(s, e, bio);
- goto out_unlock;
- }
-
pe = __find_pending_exception(s, bio);
if (!pe) {
- __invalidate_snapshot(s, pe, -ENOMEM);
+ __invalidate_snapshot(s, -ENOMEM);
r = -EIO;
goto out_unlock;
}
@@ -898,45 +913,27 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
remap_exception(s, &pe->e, bio);
bio_list_add(&pe->snapshot_bios, bio);
+ r = 0;
+
if (!pe->started) {
/* this is protected by snap->lock */
pe->started = 1;
- copy_needed = 1;
- }
-
- r = 0;
-
- out_unlock:
- up_write(&s->lock);
-
- if (copy_needed)
+ up_write(&s->lock);
start_copy(pe);
- } else {
+ goto out;
+ }
+ } else
/*
* FIXME: this read path scares me because we
* always use the origin when we have a pending
* exception. However I can't think of a
* situation where this is wrong - ejt.
*/
+ bio->bi_bdev = s->origin->bdev;
- /* Do reads */
- down_read(&s->lock);
-
- if (!s->valid) {
- up_read(&s->lock);
- return -EIO;
- }
-
- /* See if it it has been remapped */
- e = lookup_exception(&s->complete, chunk);
- if (e)
- remap_exception(s, e, bio);
- else
- bio->bi_bdev = s->origin->bdev;
-
- up_read(&s->lock);
- }
-
+ out_unlock:
+ up_write(&s->lock);
+ out:
return r;
}
@@ -1025,7 +1022,7 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
* is already remapped in this snapshot
* and trigger an exception if not.
*
- * sibling_count is initialised to 1 so pending_complete()
+ * ref_count is initialised to 1 so pending_complete()
* won't destroy the primary_pe while we're inside this loop.
*/
e = lookup_exception(&snap->complete, chunk);
@@ -1034,7 +1031,7 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
pe = __find_pending_exception(snap, bio);
if (!pe) {
- __invalidate_snapshot(snap, pe, ENOMEM);
+ __invalidate_snapshot(snap, -ENOMEM);
goto next_snapshot;
}
@@ -1056,8 +1053,8 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
}
if (!pe->primary_pe) {
- atomic_inc(&primary_pe->sibling_count);
pe->primary_pe = primary_pe;
+ get_pending_exception(primary_pe);
}
if (!pe->started) {
@@ -1070,20 +1067,20 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
}
if (!primary_pe)
- goto out;
+ return r;
/*
* If this is the first time we're processing this chunk and
- * sibling_count is now 1 it means all the pending exceptions
+ * ref_count is now 1 it means all the pending exceptions
* got completed while we were in the loop above, so it falls to
* us here to remove the primary_pe and submit any origin_bios.
*/
- if (first && atomic_dec_and_test(&primary_pe->sibling_count)) {
+ if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
flush_bios(bio_list_get(&primary_pe->origin_bios));
free_pending_exception(primary_pe);
/* If we got here, pe_queue is necessarily empty. */
- goto out;
+ return r;
}
/*
@@ -1092,7 +1089,6 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
start_copy(pe);
- out:
return r;
}
@@ -1205,7 +1201,7 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result,
static struct target_type origin_target = {
.name = "snapshot-origin",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = origin_ctr,
.dtr = origin_dtr,
@@ -1216,7 +1212,7 @@ static struct target_type origin_target = {
static struct target_type snapshot_target = {
.name = "snapshot",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
@@ -1275,8 +1271,17 @@ static int __init dm_snapshot_init(void)
goto bad5;
}
+ ksnapd = create_singlethread_workqueue("ksnapd");
+ if (!ksnapd) {
+ DMERR("Failed to create ksnapd workqueue.");
+ r = -ENOMEM;
+ goto bad6;
+ }
+
return 0;
+ bad6:
+ mempool_destroy(pending_pool);
bad5:
kmem_cache_destroy(pending_cache);
bad4:
@@ -1294,6 +1299,8 @@ static void __exit dm_snapshot_exit(void)
{
int r;
+ destroy_workqueue(ksnapd);
+
r = dm_unregister_target(&snapshot_target);
if (r)
DMERR("snapshot unregister failed %d", r);