static void unplug_slaves(mddev_t *mddev);
-static void * r10bio_pool_alloc(unsigned int __nocast gfp_flags, void *data)
+static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
{
conf_t *conf = data;
r10bio_t *r10_bio;
* one for write (we recover only one drive per r10buf)
*
*/
-static void * r10buf_pool_alloc(unsigned int __nocast gfp_flags, void *data)
+static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
{
conf_t *conf = data;
struct page *page;
int disk, slot, nslot;
const int sectors = r10_bio->sectors;
sector_t new_distance, current_distance;
+ mdk_rdev_t *rdev;
raid10_find_phys(conf, r10_bio);
rcu_read_lock();
slot = 0;
disk = r10_bio->devs[slot].devnum;
- while (!conf->mirrors[disk].rdev ||
- !conf->mirrors[disk].rdev->in_sync) {
+ while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+ !test_bit(In_sync, &rdev->flags)) {
slot++;
if (slot == conf->copies) {
slot = 0;
/* make sure the disk is operational */
slot = 0;
disk = r10_bio->devs[slot].devnum;
- while (!conf->mirrors[disk].rdev ||
- !conf->mirrors[disk].rdev->in_sync) {
+ while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+ !test_bit(In_sync, &rdev->flags)) {
slot ++;
if (slot == conf->copies) {
disk = -1;
}
- current_distance = abs(this_sector - conf->mirrors[disk].head_position);
+ current_distance = abs(r10_bio->devs[slot].addr -
+ conf->mirrors[disk].head_position);
/* Find the disk whose head is closest */
int ndisk = r10_bio->devs[nslot].devnum;
- if (!conf->mirrors[ndisk].rdev ||
- !conf->mirrors[ndisk].rdev->in_sync)
+ if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
+ !test_bit(In_sync, &rdev->flags))
continue;
- if (!atomic_read(&conf->mirrors[ndisk].rdev->nr_pending)) {
+ if (!atomic_read(&rdev->nr_pending)) {
disk = ndisk;
slot = nslot;
break;
r10_bio->read_slot = slot;
/* conf->next_seq_sect = this_sector + sectors;*/
- if (disk >= 0 && conf->mirrors[disk].rdev)
+ if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL)
atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
rcu_read_unlock();
rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
- if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
atomic_inc(&rdev->nr_pending);
rcu_read_lock();
for (i=0; i<mddev->raid_disks && ret == 0; i++) {
- mdk_rdev_t *rdev = conf->mirrors[i].rdev;
- if (rdev && !rdev->faulty) {
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
struct block_device *bdev = rdev->bdev;
request_queue_t *r_queue = bdev_get_queue(bdev);
struct bio *read_bio;
int i;
int chunk_sects = conf->chunk_mask + 1;
+ const int rw = bio_data_dir(bio);
+
+ if (unlikely(bio_barrier(bio))) {
+ bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
+ return 0;
+ }
/* If this request crosses a chunk boundary, we need to
* split it. This will only happen for 1 PAGE (or less) requests.
conf->nr_pending++;
spin_unlock_irq(&conf->resync_lock);
- if (bio_data_dir(bio)==WRITE) {
- disk_stat_inc(mddev->gendisk, writes);
- disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bio));
- } else {
- disk_stat_inc(mddev->gendisk, reads);
- disk_stat_add(mddev->gendisk, read_sectors, bio_sectors(bio));
- }
+ disk_stat_inc(mddev->gendisk, ios[rw]);
+ disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
r10_bio->mddev = mddev;
r10_bio->sector = bio->bi_sector;
- if (bio_data_dir(bio) == READ) {
+ if (rw == READ) {
/*
* read balancing logic:
*/
rcu_read_lock();
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
- if (conf->mirrors[d].rdev &&
- !conf->mirrors[d].rdev->faulty) {
- atomic_inc(&conf->mirrors[d].rdev->nr_pending);
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
+ if (rdev &&
+ !test_bit(Faulty, &rdev->flags)) {
+ atomic_inc(&rdev->nr_pending);
r10_bio->devs[i].bio = bio;
} else
r10_bio->devs[i].bio = NULL;
for (i = 0; i < conf->raid_disks; i++)
seq_printf(seq, "%s",
conf->mirrors[i].rdev &&
- conf->mirrors[i].rdev->in_sync ? "U" : "_");
+ test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_");
seq_printf(seq, "]");
}
* next level up know.
* else mark the drive as failed
*/
- if (rdev->in_sync
+ if (test_bit(In_sync, &rdev->flags)
&& conf->working_disks == 1)
/*
* Don't fail the drive, just return an IO error.
* really dead" tests...
*/
return;
- if (rdev->in_sync) {
+ if (test_bit(In_sync, &rdev->flags)) {
mddev->degraded++;
conf->working_disks--;
/*
*/
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
}
- rdev->in_sync = 0;
- rdev->faulty = 1;
+ clear_bit(In_sync, &rdev->flags);
+ set_bit(Faulty, &rdev->flags);
mddev->sb_dirty = 1;
printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
" Operation continuing on %d devices\n",
tmp = conf->mirrors + i;
if (tmp->rdev)
printk(" disk %d, wo:%d, o:%d, dev:%s\n",
- i, !tmp->rdev->in_sync, !tmp->rdev->faulty,
+ i, !test_bit(In_sync, &tmp->rdev->flags),
+ !test_bit(Faulty, &tmp->rdev->flags),
bdevname(tmp->rdev->bdev,b));
}
}
conf->r10buf_pool = NULL;
}
+/* check if there are enough drives for
+ * every block to appear on atleast one
+ */
+static int enough(conf_t *conf)
+{
+ int first = 0;
+
+ do {
+ int n = conf->copies;
+ int cnt = 0;
+ while (n--) {
+ if (conf->mirrors[first].rdev)
+ cnt++;
+ first = (first+1) % conf->raid_disks;
+ }
+ if (cnt == 0)
+ return 0;
+ } while (first != 0);
+ return 1;
+}
+
static int raid10_spare_active(mddev_t *mddev)
{
int i;
for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->mirrors + i;
if (tmp->rdev
- && !tmp->rdev->faulty
- && !tmp->rdev->in_sync) {
+ && !test_bit(Faulty, &tmp->rdev->flags)
+ && !test_bit(In_sync, &tmp->rdev->flags)) {
conf->working_disks++;
mddev->degraded--;
- tmp->rdev->in_sync = 1;
+ set_bit(In_sync, &tmp->rdev->flags);
}
}
* very different from resync
*/
return 0;
+ if (!enough(conf))
+ return 0;
for (mirror=0; mirror < mddev->raid_disks; mirror++)
if ( !(p=conf->mirrors+mirror)->rdev) {
p->head_position = 0;
rdev->raid_disk = mirror;
found = 1;
- p->rdev = rdev;
+ rcu_assign_pointer(p->rdev, rdev);
break;
}
print_conf(conf);
rdev = p->rdev;
if (rdev) {
- if (rdev->in_sync ||
+ if (test_bit(In_sync, &rdev->flags) ||
atomic_read(&rdev->nr_pending)) {
err = -EBUSY;
goto abort;
for (i=0 ; i<conf->raid_disks; i++)
if (conf->mirrors[i].rdev &&
- !conf->mirrors[i].rdev->in_sync) {
+ !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) {
/* want to reconstruct this device */
r10bio_t *rb2 = r10_bio;
for (j=0; j<conf->copies;j++) {
int d = r10_bio->devs[j].devnum;
if (conf->mirrors[d].rdev &&
- conf->mirrors[d].rdev->in_sync) {
+ test_bit(In_sync, &conf->mirrors[d].rdev->flags)) {
/* This is where we read from */
bio = r10_bio->devs[0].bio;
bio->bi_next = biolist;
}
}
if (j == conf->copies) {
- BUG();
+ /* Cannot recover, so abort the recovery */
+ put_buf(r10_bio);
+ r10_bio = rb2;
+ if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery))
+ printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",
+ mdname(mddev));
+ break;
}
}
if (biolist == NULL) {
bio = r10_bio->devs[i].bio;
bio->bi_end_io = NULL;
if (conf->mirrors[d].rdev == NULL ||
- conf->mirrors[d].rdev->faulty)
+ test_bit(Faulty, &conf->mirrors[d].rdev->flags))
continue;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
atomic_inc(&r10_bio->remaining);
mddev->queue->max_sectors = (PAGE_SIZE>>9);
disk->head_position = 0;
- if (!rdev->faulty && rdev->in_sync)
+ if (!test_bit(Faulty, &rdev->flags) && test_bit(In_sync, &rdev->flags))
conf->working_disks++;
}
conf->raid_disks = mddev->raid_disks;
init_waitqueue_head(&conf->wait_idle);
init_waitqueue_head(&conf->wait_resume);
- if (!conf->working_disks) {
- printk(KERN_ERR "raid10: no operational mirrors for %s\n",
- mdname(mddev));
+ /* need to check that every block has at least one working mirror */
+ if (!enough(conf)) {
+ printk(KERN_ERR "raid10: not enough operational mirrors for %s\n",
+ mdname(mddev));
goto out_free_conf;
}