aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-03-18 00:37:55 -0700
committerDavid S. Miller <davem@davemloft.net>2008-03-18 00:37:55 -0700
commit577f99c1d08cf9cbdafd4e858dd13ff04d855090 (patch)
tree0f726bbda9b18d311d4c95198bbd96cb7ac01db0 /drivers/md
parent26c0f03f6b77c513cb7bc37b73a06819bdbb791b (diff)
parent2f633928cbba8a5858bb39b11e7219a41b0fbef5 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
Conflicts: drivers/net/wireless/rt2x00/rt2x00dev.c net/8021q/vlan_dev.c
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c23
-rw-r--r--drivers/md/md.c50
-rw-r--r--drivers/md/raid1.c73
-rw-r--r--drivers/md/raid10.c87
4 files changed, 167 insertions, 66 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 7aeceedcf7d..c14dacdacfa 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1045,8 +1045,14 @@ void bitmap_daemon_work(struct bitmap *bitmap)
if (bitmap == NULL)
return;
if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ))
- return;
+ goto done;
+
bitmap->daemon_lastrun = jiffies;
+ if (bitmap->allclean) {
+ bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
+ return;
+ }
+ bitmap->allclean = 1;
for (j = 0; j < bitmap->chunks; j++) {
bitmap_counter_t *bmc;
@@ -1068,8 +1074,10 @@ void bitmap_daemon_work(struct bitmap *bitmap)
clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
spin_unlock_irqrestore(&bitmap->lock, flags);
- if (need_write)
+ if (need_write) {
write_page(bitmap, page, 0);
+ bitmap->allclean = 0;
+ }
continue;
}
@@ -1098,6 +1106,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
/*
if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
*/
+ if (*bmc)
+ bitmap->allclean = 0;
+
if (*bmc == 2) {
*bmc=1; /* maybe clear the bit next time */
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
@@ -1132,6 +1143,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
}
}
+ done:
+ if (bitmap->allclean == 0)
+ bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ;
}
static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
@@ -1226,6 +1240,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
sectors -= blocks;
else sectors = 0;
}
+ bitmap->allclean = 0;
return 0;
}
@@ -1296,6 +1311,7 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
}
}
spin_unlock_irq(&bitmap->lock);
+ bitmap->allclean = 0;
return rv;
}
@@ -1332,6 +1348,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab
}
unlock:
spin_unlock_irqrestore(&bitmap->lock, flags);
+ bitmap->allclean = 0;
}
void bitmap_close_sync(struct bitmap *bitmap)
@@ -1399,7 +1416,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
}
spin_unlock_irq(&bitmap->lock);
-
+ bitmap->allclean = 0;
}
/* dirty the memory and file bits for bitmap chunks "s" to "e" */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7da6ec244e1..ccbbf63727c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1105,7 +1105,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
if (rdev->sb_size & bmask)
- rdev-> sb_size = (rdev->sb_size | bmask)+1;
+ rdev->sb_size = (rdev->sb_size | bmask) + 1;
+
+ if (minor_version
+ && rdev->data_offset < sb_offset + (rdev->sb_size/512))
+ return -EINVAL;
if (sb->level == cpu_to_le32(LEVEL_MULTIPATH))
rdev->desc_nr = -1;
@@ -1137,7 +1141,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
else
ret = 0;
}
- if (minor_version)
+ if (minor_version)
rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2;
else
rdev->size = rdev->sb_offset;
@@ -1499,7 +1503,8 @@ static void export_rdev(mdk_rdev_t * rdev)
free_disk_sb(rdev);
list_del_init(&rdev->same_set);
#ifndef MODULE
- md_autodetect_dev(rdev->bdev->bd_dev);
+ if (test_bit(AutoDetected, &rdev->flags))
+ md_autodetect_dev(rdev->bdev->bd_dev);
#endif
unlock_rdev(rdev);
kobject_put(&rdev->kobj);
@@ -1996,9 +2001,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
char *e;
unsigned long long size = simple_strtoull(buf, &e, 10);
unsigned long long oldsize = rdev->size;
+ mddev_t *my_mddev = rdev->mddev;
+
if (e==buf || (*e && *e != '\n'))
return -EINVAL;
- if (rdev->mddev->pers)
+ if (my_mddev->pers)
return -EBUSY;
rdev->size = size;
if (size > oldsize && rdev->mddev->external) {
@@ -2011,7 +2018,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
int overlap = 0;
struct list_head *tmp, *tmp2;
- mddev_unlock(rdev->mddev);
+ mddev_unlock(my_mddev);
for_each_mddev(mddev, tmp) {
mdk_rdev_t *rdev2;
@@ -2031,7 +2038,7 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
break;
}
}
- mddev_lock(rdev->mddev);
+ mddev_lock(my_mddev);
if (overlap) {
/* Someone else could have slipped in a size
* change here, but doing so is just silly.
@@ -2043,8 +2050,8 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
return -EBUSY;
}
}
- if (size < rdev->mddev->size || rdev->mddev->size == 0)
- rdev->mddev->size = size;
+ if (size < my_mddev->size || my_mddev->size == 0)
+ my_mddev->size = size;
return len;
}
@@ -2065,10 +2072,21 @@ rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
+ mddev_t *mddev = rdev->mddev;
+ ssize_t rv;
if (!entry->show)
return -EIO;
- return entry->show(rdev, page);
+
+ rv = mddev ? mddev_lock(mddev) : -EBUSY;
+ if (!rv) {
+ if (rdev->mddev == NULL)
+ rv = -EBUSY;
+ else
+ rv = entry->show(rdev, page);
+ mddev_unlock(mddev);
+ }
+ return rv;
}
static ssize_t
@@ -2077,15 +2095,19 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr,
{
struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
- int rv;
+ ssize_t rv;
+ mddev_t *mddev = rdev->mddev;
if (!entry->store)
return -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- rv = mddev_lock(rdev->mddev);
+ rv = mddev ? mddev_lock(mddev): -EBUSY;
if (!rv) {
- rv = entry->store(rdev, page, length);
+ if (rdev->mddev == NULL)
+ rv = -EBUSY;
+ else
+ rv = entry->store(rdev, page, length);
mddev_unlock(rdev->mddev);
}
return rv;
@@ -5127,7 +5149,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
if (mddev->ro==1)
seq_printf(seq, " (read-only)");
if (mddev->ro==2)
- seq_printf(seq, "(auto-read-only)");
+ seq_printf(seq, " (auto-read-only)");
seq_printf(seq, " %s", mddev->pers->name);
}
@@ -5351,6 +5373,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
mddev->ro = 0;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
+ md_wakeup_thread(mddev->sync_thread);
}
atomic_inc(&mddev->writes_pending);
if (mddev->in_sync) {
@@ -6021,6 +6044,7 @@ static void autostart_arrays(int part)
MD_BUG();
continue;
}
+ set_bit(AutoDetected, &rdev->flags);
list_add(&rdev->same_set, &pending_raid_disks);
i_passed++;
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 5c7fef091ce..ff61b309129 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -592,6 +592,37 @@ static int raid1_congested(void *data, int bits)
}
+static int flush_pending_writes(conf_t *conf)
+{
+ /* Any writes that have been queued but are awaiting
+ * bitmap updates get flushed here.
+ * We return 1 if any requests were actually submitted.
+ */
+ int rv = 0;
+
+ spin_lock_irq(&conf->device_lock);
+
+ if (conf->pending_bio_list.head) {
+ struct bio *bio;
+ bio = bio_list_get(&conf->pending_bio_list);
+ blk_remove_plug(conf->mddev->queue);
+ spin_unlock_irq(&conf->device_lock);
+ /* flush any pending bitmap writes to
+ * disk before proceeding w/ I/O */
+ bitmap_unplug(conf->mddev->bitmap);
+
+ while (bio) { /* submit pending writes */
+ struct bio *next = bio->bi_next;
+ bio->bi_next = NULL;
+ generic_make_request(bio);
+ bio = next;
+ }
+ rv = 1;
+ } else
+ spin_unlock_irq(&conf->device_lock);
+ return rv;
+}
+
/* Barriers....
* Sometimes we need to suspend IO while we do something else,
* either some resync/recovery, or reconfigure the array.
@@ -673,15 +704,23 @@ static void freeze_array(conf_t *conf)
/* stop syncio and normal IO and wait for everything to
* go quite.
* We increment barrier and nr_waiting, and then
- * wait until barrier+nr_pending match nr_queued+2
+ * wait until nr_pending match nr_queued+1
+ * This is called in the context of one normal IO request
+ * that has failed. Thus any sync request that might be pending
+ * will be blocked by nr_pending, and we need to wait for
+ * pending IO requests to complete or be queued for re-try.
+ * Thus the number queued (nr_queued) plus this request (1)
+ * must match the number of pending IOs (nr_pending) before
+ * we continue.
*/
spin_lock_irq(&conf->resync_lock);
conf->barrier++;
conf->nr_waiting++;
wait_event_lock_irq(conf->wait_barrier,
- conf->barrier+conf->nr_pending == conf->nr_queued+2,
+ conf->nr_pending == conf->nr_queued+1,
conf->resync_lock,
- raid1_unplug(conf->mddev->queue));
+ ({ flush_pending_writes(conf);
+ raid1_unplug(conf->mddev->queue); }));
spin_unlock_irq(&conf->resync_lock);
}
static void unfreeze_array(conf_t *conf)
@@ -907,6 +946,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
blk_plug_device(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
+ /* In case raid1d snuck into freeze_array */
+ wake_up(&conf->wait_barrier);
+
if (do_sync)
md_wakeup_thread(mddev->thread);
#if 0
@@ -1473,28 +1515,14 @@ static void raid1d(mddev_t *mddev)
for (;;) {
char b[BDEVNAME_SIZE];
- spin_lock_irqsave(&conf->device_lock, flags);
-
- if (conf->pending_bio_list.head) {
- bio = bio_list_get(&conf->pending_bio_list);
- blk_remove_plug(mddev->queue);
- spin_unlock_irqrestore(&conf->device_lock, flags);
- /* flush any pending bitmap writes to disk before proceeding w/ I/O */
- bitmap_unplug(mddev->bitmap);
- while (bio) { /* submit pending writes */
- struct bio *next = bio->bi_next;
- bio->bi_next = NULL;
- generic_make_request(bio);
- bio = next;
- }
- unplug = 1;
+ unplug += flush_pending_writes(conf);
- continue;
- }
-
- if (list_empty(head))
+ spin_lock_irqsave(&conf->device_lock, flags);
+ if (list_empty(head)) {
+ spin_unlock_irqrestore(&conf->device_lock, flags);
break;
+ }
r1_bio = list_entry(head->prev, r1bio_t, retry_list);
list_del(head->prev);
conf->nr_queued--;
@@ -1590,7 +1618,6 @@ static void raid1d(mddev_t *mddev)
}
}
}
- spin_unlock_irqrestore(&conf->device_lock, flags);
if (unplug)
unplug_slaves(mddev);
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 017f58113c3..32389d2f18f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -537,7 +537,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
current_distance = abs(r10_bio->devs[slot].addr -
conf->mirrors[disk].head_position);
- /* Find the disk whose head is closest */
+ /* Find the disk whose head is closest,
+ * or - for far > 1 - find the closest to partition beginning */
for (nslot = slot; nslot < conf->copies; nslot++) {
int ndisk = r10_bio->devs[nslot].devnum;
@@ -557,8 +558,13 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
slot = nslot;
break;
}
- new_distance = abs(r10_bio->devs[nslot].addr -
- conf->mirrors[ndisk].head_position);
+
+ /* for far > 1 always use the lowest address */
+ if (conf->far_copies > 1)
+ new_distance = r10_bio->devs[nslot].addr;
+ else
+ new_distance = abs(r10_bio->devs[nslot].addr -
+ conf->mirrors[ndisk].head_position);
if (new_distance < current_distance) {
current_distance = new_distance;
disk = ndisk;
@@ -629,7 +635,36 @@ static int raid10_congested(void *data, int bits)
return ret;
}
-
+static int flush_pending_writes(conf_t *conf)
+{
+ /* Any writes that have been queued but are awaiting
+ * bitmap updates get flushed here.
+ * We return 1 if any requests were actually submitted.
+ */
+ int rv = 0;
+
+ spin_lock_irq(&conf->device_lock);
+
+ if (conf->pending_bio_list.head) {
+ struct bio *bio;
+ bio = bio_list_get(&conf->pending_bio_list);
+ blk_remove_plug(conf->mddev->queue);
+ spin_unlock_irq(&conf->device_lock);
+ /* flush any pending bitmap writes to disk
+ * before proceeding w/ I/O */
+ bitmap_unplug(conf->mddev->bitmap);
+
+ while (bio) { /* submit pending writes */
+ struct bio *next = bio->bi_next;
+ bio->bi_next = NULL;
+ generic_make_request(bio);
+ bio = next;
+ }
+ rv = 1;
+ } else
+ spin_unlock_irq(&conf->device_lock);
+ return rv;
+}
/* Barriers....
* Sometimes we need to suspend IO while we do something else,
* either some resync/recovery, or reconfigure the array.
@@ -712,15 +747,23 @@ static void freeze_array(conf_t *conf)
/* stop syncio and normal IO and wait for everything to
* go quiet.
* We increment barrier and nr_waiting, and then
- * wait until barrier+nr_pending match nr_queued+2
+ * wait until nr_pending match nr_queued+1
+ * This is called in the context of one normal IO request
+ * that has failed. Thus any sync request that might be pending
+ * will be blocked by nr_pending, and we need to wait for
+ * pending IO requests to complete or be queued for re-try.
+ * Thus the number queued (nr_queued) plus this request (1)
+ * must match the number of pending IOs (nr_pending) before
+ * we continue.
*/
spin_lock_irq(&conf->resync_lock);
conf->barrier++;
conf->nr_waiting++;
wait_event_lock_irq(conf->wait_barrier,
- conf->barrier+conf->nr_pending == conf->nr_queued+2,
+ conf->nr_pending == conf->nr_queued+1,
conf->resync_lock,
- raid10_unplug(conf->mddev->queue));
+ ({ flush_pending_writes(conf);
+ raid10_unplug(conf->mddev->queue); }));
spin_unlock_irq(&conf->resync_lock);
}
@@ -892,6 +935,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
blk_plug_device(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
+ /* In case raid10d snuck in to freeze_array */
+ wake_up(&conf->wait_barrier);
+
if (do_sync)
md_wakeup_thread(mddev->thread);
@@ -1464,28 +1510,14 @@ static void raid10d(mddev_t *mddev)
for (;;) {
char b[BDEVNAME_SIZE];
- spin_lock_irqsave(&conf->device_lock, flags);
- if (conf->pending_bio_list.head) {
- bio = bio_list_get(&conf->pending_bio_list);
- blk_remove_plug(mddev->queue);
- spin_unlock_irqrestore(&conf->device_lock, flags);
- /* flush any pending bitmap writes to disk before proceeding w/ I/O */
- bitmap_unplug(mddev->bitmap);
-
- while (bio) { /* submit pending writes */
- struct bio *next = bio->bi_next;
- bio->bi_next = NULL;
- generic_make_request(bio);
- bio = next;
- }
- unplug = 1;
-
- continue;
- }
+ unplug += flush_pending_writes(conf);
- if (list_empty(head))
+ spin_lock_irqsave(&conf->device_lock, flags);
+ if (list_empty(head)) {
+ spin_unlock_irqrestore(&conf->device_lock, flags);
break;
+ }
r10_bio = list_entry(head->prev, r10bio_t, retry_list);
list_del(head->prev);
conf->nr_queued--;
@@ -1548,7 +1580,6 @@ static void raid10d(mddev_t *mddev)
}
}
}
- spin_unlock_irqrestore(&conf->device_lock, flags);
if (unplug)
unplug_slaves(mddev);
}
@@ -1787,6 +1818,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
if (j == conf->copies) {
/* Cannot recover, so abort the recovery */
put_buf(r10_bio);
+ if (rb2)
+ atomic_dec(&rb2->remaining);
r10_bio = rb2;
if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery))
printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",