X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fmd%2Fraid5.c;h=0b66afef2d82009be4f54c34880df588a8a22b61;hb=b91cba52e9b7b3f1c0037908a192d93a869ca9e5;hp=810cf831edda3bc8bdfd703f3b134b5009f37d84;hpb=e89f89629b5de76e504d1be75c82c4a6b2419583;p=linux-2.6 diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 810cf831ed..0b66afef2d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1516,130 +1516,6 @@ static void copy_data(int frombio, struct bio *bio, } \ } while(0) - -static void compute_block(struct stripe_head *sh, int dd_idx) -{ - int i, count, disks = sh->disks; - void *ptr[MAX_XOR_BLOCKS], *dest, *p; - - pr_debug("compute_block, stripe %llu, idx %d\n", - (unsigned long long)sh->sector, dd_idx); - - dest = page_address(sh->dev[dd_idx].page); - memset(dest, 0, STRIPE_SIZE); - count = 0; - for (i = disks ; i--; ) { - if (i == dd_idx) - continue; - p = page_address(sh->dev[i].page); - if (test_bit(R5_UPTODATE, &sh->dev[i].flags)) - ptr[count++] = p; - else - printk(KERN_ERR "compute_block() %d, stripe %llu, %d" - " not present\n", dd_idx, - (unsigned long long)sh->sector, i); - - check_xor(); - } - if (count) - xor_blocks(count, STRIPE_SIZE, dest, ptr); - set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); -} - -static void compute_parity5(struct stripe_head *sh, int method) -{ - raid5_conf_t *conf = sh->raid_conf; - int i, pd_idx = sh->pd_idx, disks = sh->disks, count; - void *ptr[MAX_XOR_BLOCKS], *dest; - struct bio *chosen; - - pr_debug("compute_parity5, stripe %llu, method %d\n", - (unsigned long long)sh->sector, method); - - count = 0; - dest = page_address(sh->dev[pd_idx].page); - switch(method) { - case READ_MODIFY_WRITE: - BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags)); - for (i=disks ; i-- ;) { - if (i==pd_idx) - continue; - if (sh->dev[i].towrite && - test_bit(R5_UPTODATE, &sh->dev[i].flags)) { - ptr[count++] = page_address(sh->dev[i].page); - chosen = sh->dev[i].towrite; - sh->dev[i].towrite = NULL; - - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wake_up(&conf->wait_for_overlap); - - BUG_ON(sh->dev[i].written); - sh->dev[i].written = chosen; - check_xor(); - } - } - break; - case RECONSTRUCT_WRITE: - memset(dest, 0, STRIPE_SIZE); - for (i= disks; i-- ;) - if (i!=pd_idx && sh->dev[i].towrite) { - chosen = sh->dev[i].towrite; - sh->dev[i].towrite = NULL; - - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) - wake_up(&conf->wait_for_overlap); - - BUG_ON(sh->dev[i].written); - sh->dev[i].written = chosen; - } - break; - case CHECK_PARITY: - break; - } - if (count) { - xor_blocks(count, STRIPE_SIZE, dest, ptr); - count = 0; - } - - for (i = disks; i--;) - if (sh->dev[i].written) { - sector_t sector = sh->dev[i].sector; - struct bio *wbi = sh->dev[i].written; - while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) { - copy_data(1, wbi, sh->dev[i].page, sector); - wbi = r5_next_bio(wbi, sector); - } - - set_bit(R5_LOCKED, &sh->dev[i].flags); - set_bit(R5_UPTODATE, &sh->dev[i].flags); - } - - switch(method) { - case RECONSTRUCT_WRITE: - case CHECK_PARITY: - for (i=disks; i--;) - if (i != pd_idx) { - ptr[count++] = page_address(sh->dev[i].page); - check_xor(); - } - break; - case READ_MODIFY_WRITE: - for (i = disks; i--;) - if (sh->dev[i].written) { - ptr[count++] = page_address(sh->dev[i].page); - check_xor(); - } - } - if (count) - xor_blocks(count, STRIPE_SIZE, dest, ptr); - - if (method != CHECK_PARITY) { - set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); - set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); - } else - clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); -} - static void compute_parity6(struct stripe_head *sh, int method) { raid6_conf_t *conf = sh->raid_conf; @@ -2049,9 +1925,12 @@ handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh, bi = bi2; } - /* fail any reads if this device is non-operational */ - if (!test_bit(R5_Insync, &sh->dev[i].flags) || - test_bit(R5_ReadError, &sh->dev[i].flags)) { + /* fail any reads if this device is non-operational and + * the data has not reached the cache yet. + */ + if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && + (!test_bit(R5_Insync, &sh->dev[i].flags) || + test_bit(R5_ReadError, &sh->dev[i].flags))) { bi = sh->dev[i].toread; sh->dev[i].toread = NULL; if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) @@ -2323,6 +2202,9 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, "%d for r-m-w\n", i); set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantread, &dev->flags); + if (!test_and_set_bit( + STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; s->locked++; } else { set_bit(STRIPE_DELAYED, &sh->state); @@ -2346,6 +2228,9 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, "%d for Reconstruct\n", i); set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantread, &dev->flags); + if (!test_and_set_bit( + STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; s->locked++; } else { set_bit(STRIPE_DELAYED, &sh->state); @@ -2542,6 +2427,9 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; + clear_bit(STRIPE_DEGRADED, &sh->state); s->locked++; set_bit(STRIPE_INSYNC, &sh->state); @@ -2650,6 +2538,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, /* We have read all the blocks in this stripe and now we need to * copy some of them into a target stripe for expand. */ + struct dma_async_tx_descriptor *tx = NULL; clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); for (i = 0; i < sh->disks; i++) if (i != sh->pd_idx && (r6s && i != r6s->qd_idx)) { @@ -2675,9 +2564,12 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, release_stripe(sh2); continue; } - memcpy(page_address(sh2->dev[dd_idx].page), - page_address(sh->dev[i].page), - STRIPE_SIZE); + + /* place all the copies on one channel */ + tx = async_memcpy(sh2->dev[dd_idx].page, + sh->dev[i].page, 0, 0, STRIPE_SIZE, + ASYNC_TX_DEP_ACK, tx, NULL, NULL); + set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); for (j = 0; j < conf->raid_disks; j++) @@ -2690,6 +2582,12 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, set_bit(STRIPE_HANDLE, &sh2->state); } release_stripe(sh2); + + /* done submitting copies, wait for them to complete */ + if (i + 1 >= sh->disks) { + async_tx_ack(tx); + dma_wait_for_async_tx(tx); + } } } @@ -2740,37 +2638,27 @@ static void handle_stripe5(struct stripe_head *sh) struct r5dev *dev = &sh->dev[i]; clear_bit(R5_Insync, &dev->flags); - pr_debug("check %d: state 0x%lx read %p write %p written %p\n", - i, dev->flags, dev->toread, dev->towrite, dev->written); - /* maybe we can reply to a read */ - if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { - struct bio *rbi, *rbi2; - pr_debug("Return read for disc %d\n", i); - spin_lock_irq(&conf->device_lock); - rbi = dev->toread; - dev->toread = NULL; - if (test_and_clear_bit(R5_Overlap, &dev->flags)) - wake_up(&conf->wait_for_overlap); - spin_unlock_irq(&conf->device_lock); - while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { - copy_data(0, rbi, dev->page, dev->sector); - rbi2 = r5_next_bio(rbi, dev->sector); - spin_lock_irq(&conf->device_lock); - if (--rbi->bi_phys_segments == 0) { - rbi->bi_next = return_bi; - return_bi = rbi; - } - spin_unlock_irq(&conf->device_lock); - rbi = rbi2; - } - } + pr_debug("check %d: state 0x%lx toread %p read %p write %p " + "written %p\n", i, dev->flags, dev->toread, dev->read, + dev->towrite, dev->written); + + /* maybe we can request a biofill operation + * + * new wantfill requests are only permitted while + * STRIPE_OP_BIOFILL is clear + */ + if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && + !test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) + set_bit(R5_Wantfill, &dev->flags); /* now count some things */ if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++; - if (dev->toread) + if (test_bit(R5_Wantfill, &dev->flags)) + s.to_fill++; + else if (dev->toread) s.to_read++; if (dev->towrite) { s.to_write++; @@ -2793,6 +2681,10 @@ static void handle_stripe5(struct stripe_head *sh) set_bit(R5_Insync, &dev->flags); } rcu_read_unlock(); + + if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) + sh->ops.count++; + pr_debug("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d\n", s.locked, s.uptodate, s.to_read, s.to_write, @@ -2923,29 +2815,49 @@ static void handle_stripe5(struct stripe_head *sh) dev = &sh->dev[s.failed_num]; if (!test_bit(R5_ReWrite, &dev->flags)) { set_bit(R5_Wantwrite, &dev->flags); + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; set_bit(R5_ReWrite, &dev->flags); set_bit(R5_LOCKED, &dev->flags); s.locked++; } else { /* let's read it back */ set_bit(R5_Wantread, &dev->flags); + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; set_bit(R5_LOCKED, &dev->flags); s.locked++; } } - if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { - /* Need to write out all blocks after computing parity */ - sh->disks = conf->raid_disks; - sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks); - compute_parity5(sh, RECONSTRUCT_WRITE); + /* Finish postxor operations initiated by the expansion + * process + */ + if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) && + !test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) { + + clear_bit(STRIPE_EXPANDING, &sh->state); + + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack); + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); + for (i = conf->raid_disks; i--; ) { - set_bit(R5_LOCKED, &sh->dev[i].flags); - s.locked++; set_bit(R5_Wantwrite, &sh->dev[i].flags); + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) + sh->ops.count++; } - clear_bit(STRIPE_EXPANDING, &sh->state); - } else if (s.expanded) { + } + + if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && + !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { + /* Need to write out all blocks after computing parity */ + sh->disks = conf->raid_disks; + sh->pd_idx = stripe_to_pdidx(sh->sector, conf, + conf->raid_disks); + s.locked += handle_write_operations5(sh, 0, 1); + } else if (s.expanded && + !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); wake_up(&conf->wait_for_overlap); @@ -2965,64 +2877,6 @@ static void handle_stripe5(struct stripe_head *sh) return_io(return_bi); - for (i=disks; i-- ;) { - int rw; - struct bio *bi; - mdk_rdev_t *rdev; - if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) - rw = WRITE; - else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) - rw = READ; - else - continue; - - bi = &sh->dev[i].req; - - bi->bi_rw = rw; - if (rw == WRITE) - bi->bi_end_io = raid5_end_write_request; - else - bi->bi_end_io = raid5_end_read_request; - - rcu_read_lock(); - rdev = rcu_dereference(conf->disks[i].rdev); - if (rdev && test_bit(Faulty, &rdev->flags)) - rdev = NULL; - if (rdev) - atomic_inc(&rdev->nr_pending); - rcu_read_unlock(); - - if (rdev) { - if (s.syncing || s.expanding || s.expanded) - md_sync_acct(rdev->bdev, STRIPE_SECTORS); - - bi->bi_bdev = rdev->bdev; - pr_debug("for %llu schedule op %ld on disc %d\n", - (unsigned long long)sh->sector, bi->bi_rw, i); - atomic_inc(&sh->count); - bi->bi_sector = sh->sector + rdev->data_offset; - bi->bi_flags = 1 << BIO_UPTODATE; - bi->bi_vcnt = 1; - bi->bi_max_vecs = 1; - bi->bi_idx = 0; - bi->bi_io_vec = &sh->dev[i].vec; - bi->bi_io_vec[0].bv_len = STRIPE_SIZE; - bi->bi_io_vec[0].bv_offset = 0; - bi->bi_size = STRIPE_SIZE; - bi->bi_next = NULL; - if (rw == WRITE && - test_bit(R5_ReWrite, &sh->dev[i].flags)) - atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); - generic_make_request(bi); - } else { - if (rw == WRITE) - set_bit(STRIPE_DEGRADED, &sh->state); - pr_debug("skip op %ld on disc %d for sector %llu\n", - bi->bi_rw, i, (unsigned long long)sh->sector); - clear_bit(R5_LOCKED, &sh->dev[i].flags); - set_bit(STRIPE_HANDLE, &sh->state); - } - } } static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)