X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fmd%2Fraid5.c;h=269b7771a30b79e2d1c01b4cca0fd43593ec27c1;hb=f679623f50545bc0577caf2d0f8675b61162f059;hp=e14f4578072006e3b464668d3c8500187df76453;hpb=12e36b2f41b6cbc67386fcb9c59c32a3e2033905;p=linux-2.6 diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e14f457807..269b7771a3 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -348,7 +348,7 @@ static int grow_one_stripe(raid5_conf_t *conf) static int grow_stripes(raid5_conf_t *conf, int num) { - kmem_cache_t *sc; + struct kmem_cache *sc; int devs = conf->raid_disks; sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev)); @@ -397,7 +397,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) LIST_HEAD(newstripes); struct disk_info *ndisks; int err = 0; - kmem_cache_t *sc; + struct kmem_cache *sc; int i; if (newsize <= conf->pool_size) @@ -2611,6 +2611,106 @@ static int raid5_congested(void *data, int bits) return 0; } +/* We want read requests to align with chunks where possible, + * but write requests don't need to. + */ +static int raid5_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) +{ + mddev_t *mddev = q->queuedata; + sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + int max; + unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int bio_sectors = bio->bi_size >> 9; + + if (bio_data_dir(bio)) + return biovec->bv_len; /* always allow writes to be mergeable */ + + max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; + if (max < 0) max = 0; + if (max <= biovec->bv_len && bio_sectors == 0) + return biovec->bv_len; + else + return max; +} + + +static int in_chunk_boundary(mddev_t *mddev, struct bio *bio) +{ + sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + unsigned int chunk_sectors = mddev->chunk_size >> 9; + unsigned int bio_sectors = bio->bi_size >> 9; + + return chunk_sectors >= + ((sector & (chunk_sectors - 1)) + bio_sectors); +} + +/* + * The "raid5_align_endio" should check if the read succeeded and if it + * did, call bio_endio on the original bio (having bio_put the new bio + * first). + * If the read failed.. + */ +int raid5_align_endio(struct bio *bi, unsigned int bytes , int error) +{ + struct bio* raid_bi = bi->bi_private; + if (bi->bi_size) + return 1; + bio_put(bi); + bio_endio(raid_bi, bytes, error); + return 0; +} + +static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio) +{ + mddev_t *mddev = q->queuedata; + raid5_conf_t *conf = mddev_to_conf(mddev); + const unsigned int raid_disks = conf->raid_disks; + const unsigned int data_disks = raid_disks - 1; + unsigned int dd_idx, pd_idx; + struct bio* align_bi; + mdk_rdev_t *rdev; + + if (!in_chunk_boundary(mddev, raid_bio)) { + printk("chunk_aligned_read : non aligned\n"); + return 0; + } + /* + * use bio_clone to make a copy of the bio + */ + align_bi = bio_clone(raid_bio, GFP_NOIO); + if (!align_bi) + return 0; + /* + * set bi_end_io to a new function, and set bi_private to the + * original bio. + */ + align_bi->bi_end_io = raid5_align_endio; + align_bi->bi_private = raid_bio; + /* + * compute position + */ + align_bi->bi_sector = raid5_compute_sector(raid_bio->bi_sector, + raid_disks, + data_disks, + &dd_idx, + &pd_idx, + conf); + + rcu_read_lock(); + rdev = rcu_dereference(conf->disks[dd_idx].rdev); + if (rdev && test_bit(In_sync, &rdev->flags)) { + align_bi->bi_bdev = rdev->bdev; + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + generic_make_request(align_bi); + return 1; + } else { + rcu_read_unlock(); + return 0; + } +} + + static int make_request(request_queue_t *q, struct bio * bi) { mddev_t *mddev = q->queuedata; @@ -3320,6 +3420,8 @@ static int run(mddev_t *mddev) mddev->array_size = mddev->size * (conf->previous_raid_disks - conf->max_degraded); + blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); + return 0; abort: if (conf) { @@ -3659,7 +3761,7 @@ static void end_reshape(raid5_conf_t *conf) bdev = bdget_disk(conf->mddev->gendisk, 0); if (bdev) { mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, conf->mddev->array_size << 10); + i_size_write(bdev->bd_inode, (loff_t)conf->mddev->array_size << 10); mutex_unlock(&bdev->bd_inode->i_mutex); bdput(bdev); }