From: NeilBrown Date: Tue, 3 Oct 2006 08:15:54 +0000 (-0700) Subject: [PATCH] md: define ->congested_fn for raid1, raid10, and multipath X-Git-Tag: v2.6.19-rc1~241 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0d12922823408b26f83b15cae4a4feff4bd22f28;p=linux-2.6 [PATCH] md: define ->congested_fn for raid1, raid10, and multipath raid1, raid10 and multipath don't report their 'congested' status through bdi_*_congested, but should. This patch adds the appropriate functions which just check the 'congested' status of all active members (with appropriate locking). raid1 read_balance should be modified to prefer devices where bdi_read_congested returns false. Then we could use the '&' branch rather than the '|' branch. However that should would need some benchmarking first to make sure it is actually a good idea. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index e4f168d063..171ff41b52 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -228,6 +228,28 @@ static int multipath_issue_flush(request_queue_t *q, struct gendisk *disk, rcu_read_unlock(); return ret; } +static int multipath_congested(void *data, int bits) +{ + mddev_t *mddev = data; + multipath_conf_t *conf = mddev_to_conf(mddev); + int i, ret = 0; + + rcu_read_lock(); + for (i = 0; i < mddev->raid_disks ; i++) { + mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); + if (rdev && !test_bit(Faulty, &rdev->flags)) { + request_queue_t *q = bdev_get_queue(rdev->bdev); + + ret |= bdi_congested(&q->backing_dev_info, bits); + /* Just like multipath_map, we just check the + * first available device + */ + break; + } + } + rcu_read_unlock(); + return ret; +} /* * Careful, this can execute in IRQ contexts as well! @@ -509,6 +531,8 @@ static int multipath_run (mddev_t *mddev) mddev->queue->unplug_fn = multipath_unplug; mddev->queue->issue_flush_fn = multipath_issue_flush; + mddev->queue->backing_dev_info.congested_fn = multipath_congested; + mddev->queue->backing_dev_info.congested_data = mddev; return 0; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 99c4e031c7..dc9d2def02 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -601,6 +601,32 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk, return ret; } +static int raid1_congested(void *data, int bits) +{ + mddev_t *mddev = data; + conf_t *conf = mddev_to_conf(mddev); + int i, ret = 0; + + rcu_read_lock(); + for (i = 0; i < mddev->raid_disks; i++) { + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); + if (rdev && !test_bit(Faulty, &rdev->flags)) { + request_queue_t *q = bdev_get_queue(rdev->bdev); + + /* Note the '|| 1' - when read_balance prefers + * non-congested targets, it can be removed + */ + if ((bits & (1<backing_dev_info, bits); + else + ret &= bdi_congested(&q->backing_dev_info, bits); + } + } + rcu_read_unlock(); + return ret; +} + + /* Barriers.... * Sometimes we need to suspend IO while we do something else, * either some resync/recovery, or reconfigure the array. @@ -1965,6 +1991,8 @@ static int run(mddev_t *mddev) mddev->queue->unplug_fn = raid1_unplug; mddev->queue->issue_flush_fn = raid1_issue_flush; + mddev->queue->backing_dev_info.congested_fn = raid1_congested; + mddev->queue->backing_dev_info.congested_data = mddev; return 0; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 64f8016ab7..1250f0eab4 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -648,6 +648,26 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk, return ret; } +static int raid10_congested(void *data, int bits) +{ + mddev_t *mddev = data; + conf_t *conf = mddev_to_conf(mddev); + int i, ret = 0; + + rcu_read_lock(); + for (i = 0; i < mddev->raid_disks && ret == 0; i++) { + mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); + if (rdev && !test_bit(Faulty, &rdev->flags)) { + request_queue_t *q = bdev_get_queue(rdev->bdev); + + ret |= bdi_congested(&q->backing_dev_info, bits); + } + } + rcu_read_unlock(); + return ret; +} + + /* Barriers.... * Sometimes we need to suspend IO while we do something else, * either some resync/recovery, or reconfigure the array. @@ -2094,6 +2114,8 @@ static int run(mddev_t *mddev) mddev->queue->unplug_fn = raid10_unplug; mddev->queue->issue_flush_fn = raid10_issue_flush; + mddev->queue->backing_dev_info.congested_fn = raid10_congested; + mddev->queue->backing_dev_info.congested_data = mddev; /* Calculate max read-ahead size. * We need to readahead at least twice a whole stripe....