From: Linus Torvalds Date: Fri, 15 Aug 2008 16:30:24 +0000 (-0700) Subject: Merge branch 'for-linus' of git://neil.brown.name/md X-Git-Tag: v2.6.27-rc4~66 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3141eb6c50f1dafa99874e702d8b444034e2bb10;hp=7db9cbb3748c46b80e5c99ffa91945b8dd4ed5e5;p=linux-2.6 Merge branch 'for-linus' of git://neil.brown.name/md * 'for-linus' of git://neil.brown.name/md: md: cancel check/repair requests when recovery is needed Allow raid10 resync to happening in larger chunks. Allow faulty devices to be removed from a readonly array. Don't let a blocked_rdev interfere with read request in raid5/6 Fail safely when trying to grow an array with a write-intent bitmap. Restore force switch of md array to readonly at reboot time. Make writes to md/safe_mode_delay immediately effective. --- diff --git a/drivers/md/md.c b/drivers/md/md.c index c7aae66c6f..8cfadc5bd2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2393,6 +2393,8 @@ static void analyze_sbs(mddev_t * mddev) } +static void md_safemode_timeout(unsigned long data); + static ssize_t safe_delay_show(mddev_t *mddev, char *page) { @@ -2432,9 +2434,12 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) if (msec == 0) mddev->safemode_delay = 0; else { + unsigned long old_delay = mddev->safemode_delay; mddev->safemode_delay = (msec*HZ)/1000; if (mddev->safemode_delay == 0) mddev->safemode_delay = 1; + if (mddev->safemode_delay < old_delay) + md_safemode_timeout((unsigned long)mddev); } return len; } @@ -4634,6 +4639,11 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) */ if (mddev->sync_thread) return -EBUSY; + if (mddev->bitmap) + /* Sorry, cannot grow a bitmap yet, just remove it, + * grow, and re-add. + */ + return -EBUSY; rdev_for_each(rdev, tmp, mddev) { sector_t avail; avail = rdev->size * 2; @@ -5993,7 +6003,7 @@ static int remove_and_add_spares(mddev_t *mddev) } } - if (mddev->degraded) { + if (mddev->degraded && ! mddev->ro) { rdev_for_each(rdev, rtmp, mddev) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && @@ -6067,6 +6077,8 @@ void md_check_recovery(mddev_t *mddev) flush_signals(current); } + if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) + return; if ( ! ( (mddev->flags && !mddev->external) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || @@ -6080,6 +6092,15 @@ void md_check_recovery(mddev_t *mddev) if (mddev_trylock(mddev)) { int spares = 0; + if (mddev->ro) { + /* Only thing we do on a ro array is remove + * failed devices. + */ + remove_and_add_spares(mddev); + clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + goto unlock; + } + if (!mddev->external) { int did_change = 0; spin_lock_irq(&mddev->write_lock); @@ -6117,7 +6138,8 @@ void md_check_recovery(mddev_t *mddev) /* resync has finished, collect result */ md_unregister_thread(mddev->sync_thread); mddev->sync_thread = NULL; - if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && + !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* success...*/ /* activate any spares */ if (mddev->pers->spare_active(mddev)) @@ -6169,6 +6191,7 @@ void md_check_recovery(mddev_t *mddev) } else if ((spares = remove_and_add_spares(mddev))) { clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); } else if (mddev->recovery_cp < MaxSector) { set_bit(MD_RECOVERY_SYNC, &mddev->recovery); @@ -6232,7 +6255,11 @@ static int md_notify_reboot(struct notifier_block *this, for_each_mddev(mddev, tmp) if (mddev_trylock(mddev)) { - do_md_stop (mddev, 1, 0); + /* Force a switch to readonly even array + * appears to still be in use. Hence + * the '100'. + */ + do_md_stop (mddev, 1, 100); mddev_unlock(mddev); } /* diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d41bebb6da..e34cd0e624 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -76,11 +76,13 @@ static void r10bio_pool_free(void *r10_bio, void *data) kfree(r10_bio); } +/* Maximum size of each resync request */ #define RESYNC_BLOCK_SIZE (64*1024) -//#define RESYNC_BLOCK_SIZE PAGE_SIZE -#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) -#define RESYNC_WINDOW (2048*1024) +/* amount of memory to reserve for resync requests */ +#define RESYNC_WINDOW (1024*1024) +/* maximum number of concurrent requests, memory permitting */ +#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE) /* * When performing a resync, we need to read and compare, so @@ -690,7 +692,6 @@ static int flush_pending_writes(conf_t *conf) * there is no normal IO happeing. It must arrange to call * lower_barrier when the particular background IO completes. */ -#define RESYNC_DEPTH 32 static void raise_barrier(conf_t *conf, int force) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 40e9396756..224de022e7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2568,10 +2568,10 @@ static bool handle_stripe5(struct stripe_head *sh) if (dev->written) s.written++; rdev = rcu_dereference(conf->disks[i].rdev); - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + if (blocked_rdev == NULL && + rdev && unlikely(test_bit(Blocked, &rdev->flags))) { blocked_rdev = rdev; atomic_inc(&rdev->nr_pending); - break; } if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ @@ -2588,8 +2588,14 @@ static bool handle_stripe5(struct stripe_head *sh) rcu_read_unlock(); if (unlikely(blocked_rdev)) { - set_bit(STRIPE_HANDLE, &sh->state); - goto unlock; + if (s.syncing || s.expanding || s.expanded || + s.to_write || s.written) { + set_bit(STRIPE_HANDLE, &sh->state); + goto unlock; + } + /* There is nothing for the blocked_rdev to block */ + rdev_dec_pending(blocked_rdev, conf->mddev); + blocked_rdev = NULL; } if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { @@ -2832,10 +2838,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) if (dev->written) s.written++; rdev = rcu_dereference(conf->disks[i].rdev); - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + if (blocked_rdev == NULL && + rdev && unlikely(test_bit(Blocked, &rdev->flags))) { blocked_rdev = rdev; atomic_inc(&rdev->nr_pending); - break; } if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ @@ -2853,9 +2859,16 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) rcu_read_unlock(); if (unlikely(blocked_rdev)) { - set_bit(STRIPE_HANDLE, &sh->state); - goto unlock; + if (s.syncing || s.expanding || s.expanded || + s.to_write || s.written) { + set_bit(STRIPE_HANDLE, &sh->state); + goto unlock; + } + /* There is nothing for the blocked_rdev to block */ + rdev_dec_pending(blocked_rdev, conf->mddev); + blocked_rdev = NULL; } + pr_debug("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, @@ -4446,6 +4459,9 @@ static int raid5_check_reshape(mddev_t *mddev) return -EINVAL; /* Cannot shrink array or change level yet */ if (mddev->delta_disks == 0) return 0; /* nothing to do */ + if (mddev->bitmap) + /* Cannot grow a bitmap yet */ + return -EBUSY; /* Can only proceed if there are plenty of stripe_heads. * We need a minimum of one full stripe,, and for sensible progress