current_distance = abs(r10_bio->devs[slot].addr -
conf->mirrors[disk].head_position);
- /* Find the disk whose head is closest */
+ /* Find the disk whose head is closest,
+ * or - for far > 1 - find the closest to partition beginning */
for (nslot = slot; nslot < conf->copies; nslot++) {
int ndisk = r10_bio->devs[nslot].devnum;
slot = nslot;
break;
}
- new_distance = abs(r10_bio->devs[nslot].addr -
- conf->mirrors[ndisk].head_position);
+
+ /* for far > 1 always use the lowest address */
+ if (conf->far_copies > 1)
+ new_distance = r10_bio->devs[nslot].addr;
+ else
+ new_distance = abs(r10_bio->devs[nslot].addr -
+ conf->mirrors[ndisk].head_position);
if (new_distance < current_distance) {
current_distance = new_distance;
disk = ndisk;
/* stop syncio and normal IO and wait for everything to
* go quiet.
* We increment barrier and nr_waiting, and then
- * wait until barrier+nr_pending match nr_queued+2
+ * wait until nr_pending match nr_queued+1
+ * This is called in the context of one normal IO request
+ * that has failed. Thus any sync request that might be pending
+ * will be blocked by nr_pending, and we need to wait for
+ * pending IO requests to complete or be queued for re-try.
+ * Thus the number queued (nr_queued) plus this request (1)
+ * must match the number of pending IOs (nr_pending) before
+ * we continue.
*/
spin_lock_irq(&conf->resync_lock);
conf->barrier++;
conf->nr_waiting++;
wait_event_lock_irq(conf->wait_barrier,
- conf->barrier+conf->nr_pending == conf->nr_queued+2,
+ conf->nr_pending == conf->nr_queued+1,
conf->resync_lock,
({ flush_pending_writes(conf);
raid10_unplug(conf->mddev->queue); }));
const int do_sync = bio_sync(bio);
struct bio_list bl;
unsigned long flags;
+ mdk_rdev_t *blocked_rdev;
if (unlikely(bio_barrier(bio))) {
bio_endio(bio, -EOPNOTSUPP);
/*
* WRITE:
*/
- /* first select target devices under spinlock and
+ /* first select target devices under rcu_lock and
* inc refcount on their rdev. Record them by setting
* bios[x] to bio
*/
raid10_find_phys(conf, r10_bio);
+ retry_write:
+ blocked_rdev = 0;
rcu_read_lock();
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
- if (rdev &&
- !test_bit(Faulty, &rdev->flags)) {
+ if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+ atomic_inc(&rdev->nr_pending);
+ blocked_rdev = rdev;
+ break;
+ }
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
r10_bio->devs[i].bio = bio;
} else {
}
rcu_read_unlock();
+ if (unlikely(blocked_rdev)) {
+ /* Have to wait for this device to get unblocked, then retry */
+ int j;
+ int d;
+
+ for (j = 0; j < i; j++)
+ if (r10_bio->devs[j].bio) {
+ d = r10_bio->devs[j].devnum;
+ rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+ }
+ allow_barrier(conf);
+ md_wait_for_blocked_rdev(blocked_rdev, mddev);
+ wait_barrier(conf);
+ goto retry_write;
+ }
+
atomic_set(&r10_bio->remaining, 0);
bio_list_init(&bl);
}
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
- printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
- " Operation continuing on %d devices\n",
+ printk(KERN_ALERT "raid10: Disk failure on %s, disabling device.\n"
+ "raid10: Operation continuing on %d devices.\n",
bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
}
if (j == conf->copies) {
/* Cannot recover, so abort the recovery */
put_buf(r10_bio);
+ if (rb2)
+ atomic_dec(&rb2->remaining);
r10_bio = rb2;
if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery))
printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",