#define __inline__
#endif
+#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
+
#if !RAID6_USE_EMPTY_ZERO_PAGE
/* In .bss so it's zeroed */
const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
return_bi = bi->bi_next;
bi->bi_next = NULL;
bi->bi_size = 0;
- bi->bi_end_io(bi,
- test_bit(BIO_UPTODATE, &bi->bi_flags)
- ? 0 : -EIO);
+ bio_endio(bi, 0);
bi = return_bi;
}
}
static void ops_complete_check(void *stripe_head_ref)
{
struct stripe_head *sh = stripe_head_ref;
- int pd_idx = sh->pd_idx;
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);
- if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) &&
- sh->ops.zero_sum_result == 0)
- set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
-
set_bit(STRIPE_OP_CHECK, &sh->ops.complete);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
&sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
- if (tx)
- set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);
- else
- clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);
-
atomic_inc(&sh->count);
tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
ops_complete_check, sh);
set_bit(R5_UPTODATE, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
rdev = conf->disks[i].rdev;
- printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n",
- mdname(conf->mddev), STRIPE_SECTORS,
- (unsigned long long)(sh->sector + rdev->data_offset),
- bdevname(rdev->bdev, b));
+ printk_rl(KERN_INFO "raid5:%s: read error corrected"
+ " (%lu sectors at %llu on %s)\n",
+ mdname(conf->mddev), STRIPE_SECTORS,
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdevname(rdev->bdev, b));
clear_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReWrite, &sh->dev[i].flags);
}
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
atomic_inc(&rdev->read_errors);
if (conf->mddev->degraded)
- printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n",
- mdname(conf->mddev),
- (unsigned long long)(sh->sector + rdev->data_offset),
- bdn);
+ printk_rl(KERN_WARNING
+ "raid5:%s: read error not correctable "
+ "(sector %llu on %s).\n",
+ mdname(conf->mddev),
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdn);
else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
/* Oh, no!!! */
- printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n",
- mdname(conf->mddev),
- (unsigned long long)(sh->sector + rdev->data_offset),
- bdn);
+ printk_rl(KERN_WARNING
+ "raid5:%s: read error NOT corrected!! "
+ "(sector %llu on %s).\n",
+ mdname(conf->mddev),
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdn);
else if (atomic_read(&rdev->read_errors)
> conf->max_nr_stripes)
printk(KERN_WARNING
/*
* if recovery was running, make sure it aborts.
*/
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
set_bit(Faulty, &rdev->flags);
printk (KERN_ALERT
* have quiesced.
*/
if ((s->uptodate == disks - 1) &&
+ (s->failed && disk_idx == s->failed_num) &&
!test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
set_bit(R5_Wantcompute, &dev->flags);
/* we would like to get this block, possibly
* by computing it, but we might not be able to
*/
- if (s->uptodate == disks-1) {
+ if ((s->uptodate == disks - 1) &&
+ (s->failed && (i == r6s->failed_num[0] ||
+ i == r6s->failed_num[1]))) {
pr_debug("Computing stripe %llu block %d\n",
(unsigned long long)sh->sector, i);
compute_block_1(sh, i, 0);
struct r5dev *dev;
unsigned long pending = 0;
mdk_rdev_t *blocked_rdev = NULL;
+ int prexor;
memset(&s, 0, sizeof(s));
pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
/* leave prexor set until postxor is done, allows us to distinguish
* a rmw from a rcw during biodrain
*/
+ prexor = 0;
if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+ prexor = 1;
clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
if (!test_and_set_bit(
STRIPE_OP_IO, &sh->ops.pending))
sh->ops.count++;
+ if (prexor)
+ continue;
if (!test_bit(R5_Insync, &dev->flags) ||
(i == sh->pd_idx && s.failed == 0))
set_bit(STRIPE_INSYNC, &sh->state);
for (i = conf->raid_disks; i--; ) {
set_bit(R5_Wantwrite, &sh->dev[i].flags);
+ set_bit(R5_LOCKED, &dev->flags);
+ s.locked++;
if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
sh->ops.count++;
}
conf->raid_disks);
s.locked += handle_write_operations5(sh, 1, 1);
} else if (s.expanded &&
+ s.locked == 0 &&
!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes);
if ( rw == WRITE )
md_write_end(mddev);
- bi->bi_end_io(bi,
- test_bit(BIO_UPTODATE, &bi->bi_flags)
- ? 0 : -EIO);
+ bio_endio(bi, 0);
}
return 0;
}
spin_lock_irq(&conf->device_lock);
remaining = --raid_bio->bi_phys_segments;
spin_unlock_irq(&conf->device_lock);
- if (remaining == 0) {
-
- raid_bio->bi_end_io(raid_bio,
- test_bit(BIO_UPTODATE, &raid_bio->bi_flags)
- ? 0 : -EIO);
- }
+ if (remaining == 0)
+ bio_endio(raid_bio, 0);
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_stripe);
return handled;
" disk %d\n", bdevname(rdev->bdev,b),
raid_disk);
working_disks++;
- }
+ } else
+ /* Cannot rely on bitmap to complete recovery */
+ conf->fullsync = 1;
}
/*
err = -EBUSY;
goto abort;
}
+ /* Only remove non-faulty devices if recovery
+ * isn't possible.
+ */
+ if (!test_bit(Faulty, &rdev->flags) &&
+ mddev->degraded <= conf->max_degraded) {
+ err = -EBUSY;
+ goto abort;
+ }
p->rdev = NULL;
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
{
raid5_conf_t *conf = mddev->private;
- int found = 0;
+ int err = -EEXIST;
int disk;
struct disk_info *p;
+ int first = 0;
+ int last = conf->raid_disks - 1;
if (mddev->degraded > conf->max_degraded)
/* no point adding a device */
- return 0;
+ return -EINVAL;
+
+ if (rdev->raid_disk >= 0)
+ first = last = rdev->raid_disk;
/*
* find the disk ... but prefer rdev->saved_raid_disk
* if possible.
*/
if (rdev->saved_raid_disk >= 0 &&
+ rdev->saved_raid_disk >= first &&
conf->disks[rdev->saved_raid_disk].rdev == NULL)
disk = rdev->saved_raid_disk;
else
- disk = 0;
- for ( ; disk < conf->raid_disks; disk++)
+ disk = first;
+ for ( ; disk <= last ; disk++)
if ((p=conf->disks + disk)->rdev == NULL) {
clear_bit(In_sync, &rdev->flags);
rdev->raid_disk = disk;
- found = 1;
+ err = 0;
if (rdev->saved_raid_disk != disk)
conf->fullsync = 1;
rcu_assign_pointer(p->rdev, rdev);
break;
}
print_raid5_conf(conf);
- return found;
+ return err;
}
static int raid5_resize(mddev_t *mddev, sector_t sectors)
rdev_for_each(rdev, rtmp, mddev)
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags)) {
- if (raid5_add_disk(mddev, rdev)) {
+ if (raid5_add_disk(mddev, rdev) == 0) {
char nm[20];
set_bit(In_sync, &rdev->flags);
added_devices++;