]> err.no Git - linux-2.6/blobdiff - drivers/md/raid5.c
[PATCH] md: handle bypassing the read cache (assuming nothing fails)
[linux-2.6] / drivers / md / raid5.c
index e14f4578072006e3b464668d3c8500187df76453..269b7771a30b79e2d1c01b4cca0fd43593ec27c1 100644 (file)
@@ -348,7 +348,7 @@ static int grow_one_stripe(raid5_conf_t *conf)
 
 static int grow_stripes(raid5_conf_t *conf, int num)
 {
-       kmem_cache_t *sc;
+       struct kmem_cache *sc;
        int devs = conf->raid_disks;
 
        sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev));
@@ -397,7 +397,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
        LIST_HEAD(newstripes);
        struct disk_info *ndisks;
        int err = 0;
-       kmem_cache_t *sc;
+       struct kmem_cache *sc;
        int i;
 
        if (newsize <= conf->pool_size)
@@ -2611,6 +2611,106 @@ static int raid5_congested(void *data, int bits)
        return 0;
 }
 
+/* We want read requests to align with chunks where possible,
+ * but write requests don't need to.
+ */
+static int raid5_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec)
+{
+       mddev_t *mddev = q->queuedata;
+       sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+       int max;
+       unsigned int chunk_sectors = mddev->chunk_size >> 9;
+       unsigned int bio_sectors = bio->bi_size >> 9;
+
+       if (bio_data_dir(bio))
+               return biovec->bv_len; /* always allow writes to be mergeable */
+
+       max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
+       if (max < 0) max = 0;
+       if (max <= biovec->bv_len && bio_sectors == 0)
+               return biovec->bv_len;
+       else
+               return max;
+}
+
+
+static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
+{
+       sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+       unsigned int chunk_sectors = mddev->chunk_size >> 9;
+       unsigned int bio_sectors = bio->bi_size >> 9;
+
+       return  chunk_sectors >=
+               ((sector & (chunk_sectors - 1)) + bio_sectors);
+}
+
+/*
+ *  The "raid5_align_endio" should check if the read succeeded and if it
+ *  did, call bio_endio on the original bio (having bio_put the new bio
+ *  first).
+ *  If the read failed..
+ */
+int raid5_align_endio(struct bio *bi, unsigned int bytes , int error)
+{
+       struct bio* raid_bi  = bi->bi_private;
+       if (bi->bi_size)
+               return 1;
+       bio_put(bi);
+       bio_endio(raid_bi, bytes, error);
+       return 0;
+}
+
+static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
+{
+       mddev_t *mddev = q->queuedata;
+       raid5_conf_t *conf = mddev_to_conf(mddev);
+       const unsigned int raid_disks = conf->raid_disks;
+       const unsigned int data_disks = raid_disks - 1;
+       unsigned int dd_idx, pd_idx;
+       struct bio* align_bi;
+       mdk_rdev_t *rdev;
+
+       if (!in_chunk_boundary(mddev, raid_bio)) {
+               printk("chunk_aligned_read : non aligned\n");
+               return 0;
+       }
+       /*
+        * use bio_clone to make a copy of the bio
+        */
+       align_bi = bio_clone(raid_bio, GFP_NOIO);
+       if (!align_bi)
+               return 0;
+       /*
+        *   set bi_end_io to a new function, and set bi_private to the
+        *     original bio.
+        */
+       align_bi->bi_end_io  = raid5_align_endio;
+       align_bi->bi_private = raid_bio;
+       /*
+        *      compute position
+        */
+       align_bi->bi_sector =  raid5_compute_sector(raid_bio->bi_sector,
+                                       raid_disks,
+                                       data_disks,
+                                       &dd_idx,
+                                       &pd_idx,
+                                       conf);
+
+       rcu_read_lock();
+       rdev = rcu_dereference(conf->disks[dd_idx].rdev);
+       if (rdev && test_bit(In_sync, &rdev->flags)) {
+               align_bi->bi_bdev =  rdev->bdev;
+               atomic_inc(&rdev->nr_pending);
+               rcu_read_unlock();
+               generic_make_request(align_bi);
+               return 1;
+       } else {
+               rcu_read_unlock();
+               return 0;
+       }
+}
+
+
 static int make_request(request_queue_t *q, struct bio * bi)
 {
        mddev_t *mddev = q->queuedata;
@@ -3320,6 +3420,8 @@ static int run(mddev_t *mddev)
        mddev->array_size =  mddev->size * (conf->previous_raid_disks -
                                            conf->max_degraded);
 
+       blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
+
        return 0;
 abort:
        if (conf) {
@@ -3659,7 +3761,7 @@ static void end_reshape(raid5_conf_t *conf)
                bdev = bdget_disk(conf->mddev->gendisk, 0);
                if (bdev) {
                        mutex_lock(&bdev->bd_inode->i_mutex);
-                       i_size_write(bdev->bd_inode, conf->mddev->array_size << 10);
+                       i_size_write(bdev->bd_inode, (loff_t)conf->mddev->array_size << 10);
                        mutex_unlock(&bdev->bd_inode->i_mutex);
                        bdput(bdev);
                }