aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c136
1 files changed, 99 insertions, 37 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 55e7c56045a..ae16794bef2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -101,6 +101,40 @@
const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
#endif
+/*
+ * We maintain a biased count of active stripes in the bottom 16 bits of
+ * bi_phys_segments, and a count of processed stripes in the upper 16 bits
+ */
+static inline int raid5_bi_phys_segments(struct bio *bio)
+{
+ return bio->bi_phys_segments & 0xffff;
+}
+
+static inline int raid5_bi_hw_segments(struct bio *bio)
+{
+ return (bio->bi_phys_segments >> 16) & 0xffff;
+}
+
+static inline int raid5_dec_bi_phys_segments(struct bio *bio)
+{
+ --bio->bi_phys_segments;
+ return raid5_bi_phys_segments(bio);
+}
+
+static inline int raid5_dec_bi_hw_segments(struct bio *bio)
+{
+ unsigned short val = raid5_bi_hw_segments(bio);
+
+ --val;
+ bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
+ return val;
+}
+
+static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
+{
+ bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
+}
+
static inline int raid6_next_disk(int disk, int raid_disks)
{
disk++;
@@ -507,7 +541,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
while (rbi && rbi->bi_sector <
dev->sector + STRIPE_SECTORS) {
rbi2 = r5_next_bio(rbi, dev->sector);
- if (--rbi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(rbi)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
@@ -1725,7 +1759,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
if (*bip)
bi->bi_next = *bip;
*bip = bi;
- bi->bi_phys_segments ++;
+ bi->bi_phys_segments++;
spin_unlock_irq(&conf->device_lock);
spin_unlock(&sh->lock);
@@ -1819,7 +1853,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(bi)) {
md_write_end(conf->mddev);
bi->bi_next = *return_bi;
*return_bi = bi;
@@ -1834,7 +1868,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(bi)) {
md_write_end(conf->mddev);
bi->bi_next = *return_bi;
*return_bi = bi;
@@ -1858,7 +1892,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
struct bio *nextbi =
r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
- if (--bi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(bi)) {
bi->bi_next = *return_bi;
*return_bi = bi;
}
@@ -2033,7 +2067,7 @@ static void handle_stripe_clean_event(raid5_conf_t *conf,
while (wbi && wbi->bi_sector <
dev->sector + STRIPE_SECTORS) {
wbi2 = r5_next_bio(wbi, dev->sector);
- if (--wbi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(wbi)) {
md_write_end(conf->mddev);
wbi->bi_next = *return_bi;
*return_bi = wbi;
@@ -2507,7 +2541,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
*
*/
-static void handle_stripe5(struct stripe_head *sh)
+static bool handle_stripe5(struct stripe_head *sh)
{
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks, i;
@@ -2568,10 +2602,10 @@ static void handle_stripe5(struct stripe_head *sh)
if (dev->written)
s.written++;
rdev = rcu_dereference(conf->disks[i].rdev);
- if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ if (blocked_rdev == NULL &&
+ rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
blocked_rdev = rdev;
atomic_inc(&rdev->nr_pending);
- break;
}
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
@@ -2588,8 +2622,14 @@ static void handle_stripe5(struct stripe_head *sh)
rcu_read_unlock();
if (unlikely(blocked_rdev)) {
- set_bit(STRIPE_HANDLE, &sh->state);
- goto unlock;
+ if (s.syncing || s.expanding || s.expanded ||
+ s.to_write || s.written) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto unlock;
+ }
+ /* There is nothing for the blocked_rdev to block */
+ rdev_dec_pending(blocked_rdev, conf->mddev);
+ blocked_rdev = NULL;
}
if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
@@ -2717,10 +2757,11 @@ static void handle_stripe5(struct stripe_head *sh)
if (sh->reconstruct_state == reconstruct_state_result) {
sh->reconstruct_state = reconstruct_state_idle;
clear_bit(STRIPE_EXPANDING, &sh->state);
- for (i = conf->raid_disks; i--; )
+ for (i = conf->raid_disks; i--; ) {
set_bit(R5_Wantwrite, &sh->dev[i].flags);
- set_bit(R5_LOCKED, &dev->flags);
+ set_bit(R5_LOCKED, &sh->dev[i].flags);
s.locked++;
+ }
}
if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
@@ -2754,9 +2795,11 @@ static void handle_stripe5(struct stripe_head *sh)
ops_run_io(sh, &s);
return_io(return_bi);
+
+ return blocked_rdev == NULL;
}
-static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
{
raid6_conf_t *conf = sh->raid_conf;
int disks = sh->disks;
@@ -2805,7 +2848,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
copy_data(0, rbi, dev->page, dev->sector);
rbi2 = r5_next_bio(rbi, dev->sector);
spin_lock_irq(&conf->device_lock);
- if (--rbi->bi_phys_segments == 0) {
+ if (!raid5_dec_bi_phys_segments(rbi)) {
rbi->bi_next = return_bi;
return_bi = rbi;
}
@@ -2829,10 +2872,10 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
if (dev->written)
s.written++;
rdev = rcu_dereference(conf->disks[i].rdev);
- if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ if (blocked_rdev == NULL &&
+ rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
blocked_rdev = rdev;
atomic_inc(&rdev->nr_pending);
- break;
}
if (!rdev || !test_bit(In_sync, &rdev->flags)) {
/* The ReadError flag will just be confusing now */
@@ -2850,9 +2893,16 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
rcu_read_unlock();
if (unlikely(blocked_rdev)) {
- set_bit(STRIPE_HANDLE, &sh->state);
- goto unlock;
+ if (s.syncing || s.expanding || s.expanded ||
+ s.to_write || s.written) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto unlock;
+ }
+ /* There is nothing for the blocked_rdev to block */
+ rdev_dec_pending(blocked_rdev, conf->mddev);
+ blocked_rdev = NULL;
}
+
pr_debug("locked=%d uptodate=%d to_read=%d"
" to_write=%d failed=%d failed_num=%d,%d\n",
s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -2967,14 +3017,17 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
ops_run_io(sh, &s);
return_io(return_bi);
+
+ return blocked_rdev == NULL;
}
-static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
+/* returns true if the stripe was handled */
+static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page)
{
if (sh->raid_conf->level == 6)
- handle_stripe6(sh, tmp_page);
+ return handle_stripe6(sh, tmp_page);
else
- handle_stripe5(sh);
+ return handle_stripe5(sh);
}
@@ -3136,8 +3189,11 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf)
if(bi) {
conf->retry_read_aligned_list = bi->bi_next;
bi->bi_next = NULL;
+ /*
+ * this sets the active strip count to 1 and the processed
+ * strip count to zero (upper 8 bits)
+ */
bi->bi_phys_segments = 1; /* biased count of active stripes */
- bi->bi_hw_segments = 0; /* count of processed stripes */
}
return bi;
@@ -3187,8 +3243,7 @@ static int bio_fits_rdev(struct bio *bi)
if ((bi->bi_size>>9) > q->max_sectors)
return 0;
blk_recount_segments(q, bi);
- if (bi->bi_phys_segments > q->max_phys_segments ||
- bi->bi_hw_segments > q->max_hw_segments)
+ if (bi->bi_phys_segments > q->max_phys_segments)
return 0;
if (q->merge_bvec_fn)
@@ -3332,7 +3387,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
sector_t logical_sector, last_sector;
struct stripe_head *sh;
const int rw = bio_data_dir(bi);
- int remaining;
+ int cpu, remaining;
if (unlikely(bio_barrier(bi))) {
bio_endio(bi, -EOPNOTSUPP);
@@ -3341,8 +3396,11 @@ static int make_request(struct request_queue *q, struct bio * bi)
md_write_start(mddev, bi);
- disk_stat_inc(mddev->gendisk, ios[rw]);
- disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi));
+ cpu = part_stat_lock();
+ part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+ part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
+ bio_sectors(bi));
+ part_stat_unlock();
if (rw == READ &&
mddev->reshape_position == MaxSector &&
@@ -3449,7 +3507,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
}
spin_lock_irq(&conf->device_lock);
- remaining = --bi->bi_phys_segments;
+ remaining = raid5_dec_bi_phys_segments(bi);
spin_unlock_irq(&conf->device_lock);
if (remaining == 0) {
@@ -3692,7 +3750,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
clear_bit(STRIPE_INSYNC, &sh->state);
spin_unlock(&sh->lock);
- handle_stripe(sh, NULL);
+ /* wait for any blocked device to be handled */
+ while(unlikely(!handle_stripe(sh, NULL)))
+ ;
release_stripe(sh);
return STRIPE_SECTORS;
@@ -3731,7 +3791,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
sector += STRIPE_SECTORS,
scnt++) {
- if (scnt < raid_bio->bi_hw_segments)
+ if (scnt < raid5_bi_hw_segments(raid_bio))
/* already done this stripe */
continue;
@@ -3739,7 +3799,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
if (!sh) {
/* failed to get a stripe - must wait */
- raid_bio->bi_hw_segments = scnt;
+ raid5_set_bi_hw_segments(raid_bio, scnt);
conf->retry_read_aligned = raid_bio;
return handled;
}
@@ -3747,7 +3807,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
release_stripe(sh);
- raid_bio->bi_hw_segments = scnt;
+ raid5_set_bi_hw_segments(raid_bio, scnt);
conf->retry_read_aligned = raid_bio;
return handled;
}
@@ -3757,7 +3817,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
handled++;
}
spin_lock_irq(&conf->device_lock);
- remaining = --raid_bio->bi_phys_segments;
+ remaining = raid5_dec_bi_phys_segments(raid_bio);
spin_unlock_irq(&conf->device_lock);
if (remaining == 0)
bio_endio(raid_bio, 0);
@@ -3811,10 +3871,8 @@ static void raid5d(mddev_t *mddev)
sh = __get_priority_stripe(conf);
- if (!sh) {
- async_tx_issue_pending_all();
+ if (!sh)
break;
- }
spin_unlock_irq(&conf->device_lock);
handled++;
@@ -3827,6 +3885,7 @@ static void raid5d(mddev_t *mddev)
spin_unlock_irq(&conf->device_lock);
+ async_tx_issue_pending_all();
unplug_slaves(mddev);
pr_debug("--- raid5d inactive\n");
@@ -4439,6 +4498,9 @@ static int raid5_check_reshape(mddev_t *mddev)
return -EINVAL; /* Cannot shrink array or change level yet */
if (mddev->delta_disks == 0)
return 0; /* nothing to do */
+ if (mddev->bitmap)
+ /* Cannot grow a bitmap yet */
+ return -EBUSY;
/* Can only proceed if there are plenty of stripe_heads.
* We need a minimum of one full stripe,, and for sensible progress