]> err.no Git - linux-2.6/blobdiff - drivers/md/raid10.c
Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[linux-2.6] / drivers / md / raid10.c
index 64bb4ddc6798232f1cda5cfbf87fa6e7c6d3792f..617012bc107a0205c597890d5167f03d86c45475 100644 (file)
@@ -59,10 +59,8 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
        int size = offsetof(struct r10bio_s, devs[conf->copies]);
 
        /* allocate a r10bio with room for raid_disks entries in the bios array */
-       r10_bio = kmalloc(size, gfp_flags);
-       if (r10_bio)
-               memset(r10_bio, 0, size);
-       else
+       r10_bio = kzalloc(size, gfp_flags);
+       if (!r10_bio)
                unplug_slaves(conf->mddev);
 
        return r10_bio;
@@ -134,10 +132,10 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 
 out_free_pages:
        for ( ; i > 0 ; i--)
-               __free_page(bio->bi_io_vec[i-1].bv_page);
+               safe_put_page(bio->bi_io_vec[i-1].bv_page);
        while (j--)
                for (i = 0; i < RESYNC_PAGES ; i++)
-                       __free_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
+                       safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
        j = -1;
 out_free_bio:
        while ( ++j < nalloc )
@@ -157,7 +155,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
                struct bio *bio = r10bio->devs[j].bio;
                if (bio) {
                        for (i = 0; i < RESYNC_PAGES; i++) {
-                               __free_page(bio->bi_io_vec[i].bv_page);
+                               safe_put_page(bio->bi_io_vec[i].bv_page);
                                bio->bi_io_vec[i].bv_page = NULL;
                        }
                        bio_put(bio);
@@ -172,13 +170,13 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
 
        for (i = 0; i < conf->copies; i++) {
                struct bio **bio = & r10_bio->devs[i].bio;
-               if (*bio)
+               if (*bio && *bio != IO_BLOCKED)
                        bio_put(*bio);
                *bio = NULL;
        }
 }
 
-static inline void free_r10bio(r10bio_t *r10_bio)
+static void free_r10bio(r10bio_t *r10_bio)
 {
        conf_t *conf = mddev_to_conf(r10_bio->mddev);
 
@@ -192,7 +190,7 @@ static inline void free_r10bio(r10bio_t *r10_bio)
        mempool_free(r10_bio, conf->r10bio_pool);
 }
 
-static inline void put_buf(r10bio_t *r10_bio)
+static void put_buf(r10bio_t *r10_bio)
 {
        conf_t *conf = mddev_to_conf(r10_bio->mddev);
 
@@ -500,6 +498,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
                disk = r10_bio->devs[slot].devnum;
 
                while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+                      r10_bio->devs[slot].bio == IO_BLOCKED ||
                       !test_bit(In_sync, &rdev->flags)) {
                        slot++;
                        if (slot == conf->copies) {
@@ -517,6 +516,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
        slot = 0;
        disk = r10_bio->devs[slot].devnum;
        while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+              r10_bio->devs[slot].bio == IO_BLOCKED ||
               !test_bit(In_sync, &rdev->flags)) {
                slot ++;
                if (slot == conf->copies) {
@@ -537,6 +537,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 
 
                if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
+                   r10_bio->devs[nslot].bio == IO_BLOCKED ||
                    !test_bit(In_sync, &rdev->flags))
                        continue;
 
@@ -564,6 +565,8 @@ rb_out:
 
        if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL)
                atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
+       else
+               disk = -1;
        rcu_read_unlock();
 
        return disk;
@@ -711,7 +714,7 @@ static void allow_barrier(conf_t *conf)
 static void freeze_array(conf_t *conf)
 {
        /* stop syncio and normal IO and wait for everything to
-        * go quite.
+        * go quiet.
         * We increment barrier and nr_waiting, and then
         * wait until barrier+nr_pending match nr_queued+2
         */
@@ -1104,7 +1107,6 @@ abort:
 
 static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
        conf_t *conf = mddev_to_conf(r10_bio->mddev);
        int i,d;
@@ -1115,13 +1117,19 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
        for (i=0; i<conf->copies; i++)
                if (r10_bio->devs[i].bio == bio)
                        break;
-       if (i == conf->copies)
-               BUG();
+       BUG_ON(i == conf->copies);
        update_head_pos(i, r10_bio);
        d = r10_bio->devs[i].devnum;
-       if (!uptodate)
-               md_error(r10_bio->mddev,
-                        conf->mirrors[d].rdev);
+
+       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+               set_bit(R10BIO_Uptodate, &r10_bio->state);
+       else {
+               atomic_add(r10_bio->sectors,
+                          &conf->mirrors[d].rdev->corrected_errors);
+               if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
+                       md_error(r10_bio->mddev,
+                                conf->mirrors[d].rdev);
+       }
 
        /* for reconstruct, we always reschedule after a read.
         * for resync, only after all reads
@@ -1209,25 +1217,30 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
        fbio = r10_bio->devs[i].bio;
 
        /* now find blocks with errors */
-       for (i=first+1 ; i < conf->copies ; i++) {
-               int vcnt, j, d;
+       for (i=0 ; i < conf->copies ; i++) {
+               int  j, d;
+               int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
 
-               if (!test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
-                       continue;
-               /* We know that the bi_io_vec layout is the same for
-                * both 'first' and 'i', so we just compare them.
-                * All vec entries are PAGE_SIZE;
-                */
                tbio = r10_bio->devs[i].bio;
-               vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
-               for (j = 0; j < vcnt; j++)
-                       if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
-                                  page_address(tbio->bi_io_vec[j].bv_page),
-                                  PAGE_SIZE))
-                               break;
-               if (j == vcnt)
+
+               if (tbio->bi_end_io != end_sync_read)
                        continue;
-               mddev->resync_mismatches += r10_bio->sectors;
+               if (i == first)
+                       continue;
+               if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
+                       /* We know that the bi_io_vec layout is the same for
+                        * both 'first' and 'i', so we just compare them.
+                        * All vec entries are PAGE_SIZE;
+                        */
+                       for (j = 0; j < vcnt; j++)
+                               if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
+                                          page_address(tbio->bi_io_vec[j].bv_page),
+                                          PAGE_SIZE))
+                                       break;
+                       if (j == vcnt)
+                               continue;
+                       mddev->resync_mismatches += r10_bio->sectors;
+               }
                if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
                        /* Don't fix anything. */
                        continue;
@@ -1308,7 +1321,10 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
 
        atomic_inc(&conf->mirrors[d].rdev->nr_pending);
        md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
-       generic_make_request(wbio);
+       if (test_bit(R10BIO_Uptodate, &r10_bio->state))
+               generic_make_request(wbio);
+       else
+               bio_endio(wbio, wbio->bi_size, -EIO);
 }
 
 
@@ -1410,6 +1426,7 @@ static void raid10d(mddev_t *mddev)
                                } while (!success && sl != r10_bio->read_slot);
 
                                if (success) {
+                                       int start = sl;
                                        /* write it back and re-read */
                                        while (sl != r10_bio->read_slot) {
                                                int d;
@@ -1418,19 +1435,33 @@ static void raid10d(mddev_t *mddev)
                                                sl--;
                                                d = r10_bio->devs[sl].devnum;
                                                rdev = conf->mirrors[d].rdev;
+                                               atomic_add(s, &rdev->corrected_errors);
                                                if (rdev &&
                                                    test_bit(In_sync, &rdev->flags)) {
                                                        if (sync_page_io(rdev->bdev,
                                                                         r10_bio->devs[sl].addr +
                                                                         sect + rdev->data_offset,
-                                                                        s<<9, conf->tmppage, WRITE) == 0 ||
-                                                           sync_page_io(rdev->bdev,
+                                                                        s<<9, conf->tmppage, WRITE) == 0)
+                                                               /* Well, this device is dead */
+                                                               md_error(mddev, rdev);
+                                               }
+                                       }
+                                       sl = start;
+                                       while (sl != r10_bio->read_slot) {
+                                               int d;
+                                               if (sl==0)
+                                                       sl = conf->copies;
+                                               sl--;
+                                               d = r10_bio->devs[sl].devnum;
+                                               rdev = conf->mirrors[d].rdev;
+                                               if (rdev &&
+                                                   test_bit(In_sync, &rdev->flags)) {
+                                                       if (sync_page_io(rdev->bdev,
                                                                         r10_bio->devs[sl].addr +
                                                                         sect + rdev->data_offset,
-                                                                        s<<9, conf->tmppage, READ) == 0) {
+                                                                        s<<9, conf->tmppage, READ) == 0)
                                                                /* Well, this device is dead */
                                                                md_error(mddev, rdev);
-                                                       }
                                                }
                                        }
                                } else {
@@ -1445,7 +1476,8 @@ static void raid10d(mddev_t *mddev)
                        unfreeze_array(conf);
 
                        bio = r10_bio->devs[r10_bio->read_slot].bio;
-                       r10_bio->devs[r10_bio->read_slot].bio = NULL;
+                       r10_bio->devs[r10_bio->read_slot].bio =
+                               mddev->ro ? IO_BLOCKED : NULL;
                        bio_put(bio);
                        mirror = read_balance(conf, r10_bio);
                        if (mirror == -1) {
@@ -1485,8 +1517,7 @@ static int init_resync(conf_t *conf)
        int buffs;
 
        buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
-       if (conf->r10buf_pool)
-               BUG();
+       BUG_ON(conf->r10buf_pool);
        conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf);
        if (!conf->r10buf_pool)
                return -ENOMEM;
@@ -1660,8 +1691,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
                                for (j=0; j<conf->copies;j++) {
                                        int d = r10_bio->devs[j].devnum;
                                        if (conf->mirrors[d].rdev == NULL ||
-                                           test_bit(Faulty, &conf->mirrors[d].rdev->flags))
+                                           test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
                                                still_degraded = 1;
+                                               break;
+                                       }
                                }
                                must_sync = bitmap_start_sync(mddev->bitmap, sect,
                                                              &sync_blocks, still_degraded);
@@ -1869,11 +1902,11 @@ static int run(mddev_t *mddev)
        int nc, fc;
        sector_t stride, size;
 
-       if (mddev->level != 10) {
-               printk(KERN_ERR "raid10: %s: raid level not set correctly... (%d)\n",
-                      mdname(mddev), mddev->level);
-               goto out;
+       if (mddev->chunk_size == 0) {
+               printk(KERN_ERR "md/raid10: non-zero chunk size required.\n");
+               return -EINVAL;
        }
+
        nc = mddev->layout & 255;
        fc = (mddev->layout >> 8) & 255;
        if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
@@ -2001,7 +2034,7 @@ static int run(mddev_t *mddev)
         * maybe...
         */
        {
-               int stripe = conf->raid_disks * mddev->chunk_size / PAGE_CACHE_SIZE;
+               int stripe = conf->raid_disks * mddev->chunk_size / PAGE_SIZE;
                stripe /= conf->near_copies;
                if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
                        mddev->queue->backing_dev_info.ra_pages = 2* stripe;
@@ -2014,7 +2047,7 @@ static int run(mddev_t *mddev)
 out_free_conf:
        if (conf->r10bio_pool)
                mempool_destroy(conf->r10bio_pool);
-       put_page(conf->tmppage);
+       safe_put_page(conf->tmppage);
        kfree(conf->mirrors);
        kfree(conf);
        mddev->private = NULL;
@@ -2058,9 +2091,10 @@ static void raid10_quiesce(mddev_t *mddev, int state)
        }
 }
 
-static mdk_personality_t raid10_personality =
+static struct mdk_personality raid10_personality =
 {
        .name           = "raid10",
+       .level          = 10,
        .owner          = THIS_MODULE,
        .make_request   = make_request,
        .run            = run,
@@ -2076,15 +2110,17 @@ static mdk_personality_t raid10_personality =
 
 static int __init raid_init(void)
 {
-       return register_md_personality(RAID10, &raid10_personality);
+       return register_md_personality(&raid10_personality);
 }
 
 static void raid_exit(void)
 {
-       unregister_md_personality(RAID10);
+       unregister_md_personality(&raid10_personality);
 }
 
 module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-9"); /* RAID10 */
+MODULE_ALIAS("md-raid10");
+MODULE_ALIAS("md-level-10");