X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fmd%2Fraid10.c;h=867f06ae33d944e8ef7178d28fc3fa8905d963b3;hb=fcda46128d5cb50075339b79ce585ab767337e9e;hp=62ebb1bc72be24cc32ecc775c5bd71603d44e5c6;hpb=62778ba1aa2589dc78c36a32edc6f5a6ccaf50c6;p=linux-2.6 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 62ebb1bc72..867f06ae33 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -47,7 +47,7 @@ static void unplug_slaves(mddev_t *mddev); -static void * r10bio_pool_alloc(unsigned int __nocast gfp_flags, void *data) +static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) { conf_t *conf = data; r10bio_t *r10_bio; @@ -81,7 +81,7 @@ static void r10bio_pool_free(void *r10_bio, void *data) * one for write (we recover only one drive per r10buf) * */ -static void * r10buf_pool_alloc(unsigned int __nocast gfp_flags, void *data) +static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) { conf_t *conf = data; struct page *page; @@ -496,6 +496,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) int disk, slot, nslot; const int sectors = r10_bio->sectors; sector_t new_distance, current_distance; + mdk_rdev_t *rdev; raid10_find_phys(conf, r10_bio); rcu_read_lock(); @@ -510,8 +511,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) slot = 0; disk = r10_bio->devs[slot].devnum; - while (!conf->mirrors[disk].rdev || - !conf->mirrors[disk].rdev->in_sync) { + while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL || + !test_bit(In_sync, &rdev->flags)) { slot++; if (slot == conf->copies) { slot = 0; @@ -527,8 +528,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) /* make sure the disk is operational */ slot = 0; disk = r10_bio->devs[slot].devnum; - while (!conf->mirrors[disk].rdev || - !conf->mirrors[disk].rdev->in_sync) { + while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL || + !test_bit(In_sync, &rdev->flags)) { slot ++; if (slot == conf->copies) { disk = -1; @@ -538,7 +539,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) } - current_distance = abs(this_sector - conf->mirrors[disk].head_position); + current_distance = abs(r10_bio->devs[slot].addr - + conf->mirrors[disk].head_position); /* Find the disk whose head is closest */ @@ -546,11 +548,11 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) int ndisk = r10_bio->devs[nslot].devnum; - if (!conf->mirrors[ndisk].rdev || - !conf->mirrors[ndisk].rdev->in_sync) + if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL || + !test_bit(In_sync, &rdev->flags)) continue; - if (!atomic_read(&conf->mirrors[ndisk].rdev->nr_pending)) { + if (!atomic_read(&rdev->nr_pending)) { disk = ndisk; slot = nslot; break; @@ -568,7 +570,7 @@ rb_out: r10_bio->read_slot = slot; /* conf->next_seq_sect = this_sector + sectors;*/ - if (disk >= 0 && conf->mirrors[disk].rdev) + if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL) atomic_inc(&conf->mirrors[disk].rdev->nr_pending); rcu_read_unlock(); @@ -582,8 +584,8 @@ static void unplug_slaves(mddev_t *mddev) rcu_read_lock(); for (i=0; iraid_disks; i++) { - mdk_rdev_t *rdev = conf->mirrors[i].rdev; - if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) { + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); + if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) { request_queue_t *r_queue = bdev_get_queue(rdev->bdev); atomic_inc(&rdev->nr_pending); @@ -613,8 +615,8 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, rcu_read_lock(); for (i=0; iraid_disks && ret == 0; i++) { - mdk_rdev_t *rdev = conf->mirrors[i].rdev; - if (rdev && !rdev->faulty) { + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); + if (rdev && !test_bit(Faulty, &rdev->flags)) { struct block_device *bdev = rdev->bdev; request_queue_t *r_queue = bdev_get_queue(bdev); @@ -667,6 +669,12 @@ static int make_request(request_queue_t *q, struct bio * bio) struct bio *read_bio; int i; int chunk_sects = conf->chunk_mask + 1; + const int rw = bio_data_dir(bio); + + if (unlikely(bio_barrier(bio))) { + bio_endio(bio, bio->bi_size, -EOPNOTSUPP); + return 0; + } /* If this request crosses a chunk boundary, we need to * split it. This will only happen for 1 PAGE (or less) requests. @@ -712,13 +720,8 @@ static int make_request(request_queue_t *q, struct bio * bio) conf->nr_pending++; spin_unlock_irq(&conf->resync_lock); - if (bio_data_dir(bio)==WRITE) { - disk_stat_inc(mddev->gendisk, writes); - disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bio)); - } else { - disk_stat_inc(mddev->gendisk, reads); - disk_stat_add(mddev->gendisk, read_sectors, bio_sectors(bio)); - } + disk_stat_inc(mddev->gendisk, ios[rw]); + disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); @@ -728,7 +731,7 @@ static int make_request(request_queue_t *q, struct bio * bio) r10_bio->mddev = mddev; r10_bio->sector = bio->bi_sector; - if (bio_data_dir(bio) == READ) { + if (rw == READ) { /* * read balancing logic: */ @@ -766,9 +769,10 @@ static int make_request(request_queue_t *q, struct bio * bio) rcu_read_lock(); for (i = 0; i < conf->copies; i++) { int d = r10_bio->devs[i].devnum; - if (conf->mirrors[d].rdev && - !conf->mirrors[d].rdev->faulty) { - atomic_inc(&conf->mirrors[d].rdev->nr_pending); + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev); + if (rdev && + !test_bit(Faulty, &rdev->flags)) { + atomic_inc(&rdev->nr_pending); r10_bio->devs[i].bio = bio; } else r10_bio->devs[i].bio = NULL; @@ -822,7 +826,7 @@ static void status(struct seq_file *seq, mddev_t *mddev) for (i = 0; i < conf->raid_disks; i++) seq_printf(seq, "%s", conf->mirrors[i].rdev && - conf->mirrors[i].rdev->in_sync ? "U" : "_"); + test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_"); seq_printf(seq, "]"); } @@ -837,7 +841,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) * next level up know. * else mark the drive as failed */ - if (rdev->in_sync + if (test_bit(In_sync, &rdev->flags) && conf->working_disks == 1) /* * Don't fail the drive, just return an IO error. @@ -847,7 +851,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) * really dead" tests... */ return; - if (rdev->in_sync) { + if (test_bit(In_sync, &rdev->flags)) { mddev->degraded++; conf->working_disks--; /* @@ -855,8 +859,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) */ set_bit(MD_RECOVERY_ERR, &mddev->recovery); } - rdev->in_sync = 0; - rdev->faulty = 1; + clear_bit(In_sync, &rdev->flags); + set_bit(Faulty, &rdev->flags); mddev->sb_dirty = 1; printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n" " Operation continuing on %d devices\n", @@ -881,7 +885,8 @@ static void print_conf(conf_t *conf) tmp = conf->mirrors + i; if (tmp->rdev) printk(" disk %d, wo:%d, o:%d, dev:%s\n", - i, !tmp->rdev->in_sync, !tmp->rdev->faulty, + i, !test_bit(In_sync, &tmp->rdev->flags), + !test_bit(Faulty, &tmp->rdev->flags), bdevname(tmp->rdev->bdev,b)); } } @@ -900,6 +905,27 @@ static void close_sync(conf_t *conf) conf->r10buf_pool = NULL; } +/* check if there are enough drives for + * every block to appear on atleast one + */ +static int enough(conf_t *conf) +{ + int first = 0; + + do { + int n = conf->copies; + int cnt = 0; + while (n--) { + if (conf->mirrors[first].rdev) + cnt++; + first = (first+1) % conf->raid_disks; + } + if (cnt == 0) + return 0; + } while (first != 0); + return 1; +} + static int raid10_spare_active(mddev_t *mddev) { int i; @@ -913,11 +939,11 @@ static int raid10_spare_active(mddev_t *mddev) for (i = 0; i < conf->raid_disks; i++) { tmp = conf->mirrors + i; if (tmp->rdev - && !tmp->rdev->faulty - && !tmp->rdev->in_sync) { + && !test_bit(Faulty, &tmp->rdev->flags) + && !test_bit(In_sync, &tmp->rdev->flags)) { conf->working_disks++; mddev->degraded--; - tmp->rdev->in_sync = 1; + set_bit(In_sync, &tmp->rdev->flags); } } @@ -938,6 +964,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) * very different from resync */ return 0; + if (!enough(conf)) + return 0; for (mirror=0; mirror < mddev->raid_disks; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { @@ -955,7 +983,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) p->head_position = 0; rdev->raid_disk = mirror; found = 1; - p->rdev = rdev; + rcu_assign_pointer(p->rdev, rdev); break; } @@ -973,7 +1001,7 @@ static int raid10_remove_disk(mddev_t *mddev, int number) print_conf(conf); rdev = p->rdev; if (rdev) { - if (rdev->in_sync || + if (test_bit(In_sync, &rdev->flags) || atomic_read(&rdev->nr_pending)) { err = -EBUSY; goto abort; @@ -1389,7 +1417,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i for (i=0 ; iraid_disks; i++) if (conf->mirrors[i].rdev && - !conf->mirrors[i].rdev->in_sync) { + !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) { /* want to reconstruct this device */ r10bio_t *rb2 = r10_bio; @@ -1410,7 +1438,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i for (j=0; jcopies;j++) { int d = r10_bio->devs[j].devnum; if (conf->mirrors[d].rdev && - conf->mirrors[d].rdev->in_sync) { + test_bit(In_sync, &conf->mirrors[d].rdev->flags)) { /* This is where we read from */ bio = r10_bio->devs[0].bio; bio->bi_next = biolist; @@ -1445,7 +1473,13 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i } } if (j == conf->copies) { - BUG(); + /* Cannot recover, so abort the recovery */ + put_buf(r10_bio); + r10_bio = rb2; + if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery)) + printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n", + mdname(mddev)); + break; } } if (biolist == NULL) { @@ -1480,7 +1514,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio = r10_bio->devs[i].bio; bio->bi_end_io = NULL; if (conf->mirrors[d].rdev == NULL || - conf->mirrors[d].rdev->faulty) + test_bit(Faulty, &conf->mirrors[d].rdev->flags)) continue; atomic_inc(&conf->mirrors[d].rdev->nr_pending); atomic_inc(&r10_bio->remaining); @@ -1666,7 +1700,7 @@ static int run(mddev_t *mddev) mddev->queue->max_sectors = (PAGE_SIZE>>9); disk->head_position = 0; - if (!rdev->faulty && rdev->in_sync) + if (!test_bit(Faulty, &rdev->flags) && test_bit(In_sync, &rdev->flags)) conf->working_disks++; } conf->raid_disks = mddev->raid_disks; @@ -1678,9 +1712,10 @@ static int run(mddev_t *mddev) init_waitqueue_head(&conf->wait_idle); init_waitqueue_head(&conf->wait_resume); - if (!conf->working_disks) { - printk(KERN_ERR "raid10: no operational mirrors for %s\n", - mdname(mddev)); + /* need to check that every block has at least one working mirror */ + if (!enough(conf)) { + printk(KERN_ERR "raid10: not enough operational mirrors for %s\n", + mdname(mddev)); goto out_free_conf; }