md: allow a maximum extent to be set for resyncing

author NeilBrown <neilb@suse.de>

Wed, 6 Feb 2008 09:39:52 +0000 (01:39 -0800)

committer Linus Torvalds <torvalds@woody.linux-foundation.org>

Wed, 6 Feb 2008 18:41:18 +0000 (10:41 -0800)
author NeilBrown <neilb@suse.de>
Wed, 6 Feb 2008 09:39:52 +0000 (01:39 -0800)
committer Linus Torvalds <torvalds@woody.linux-foundation.org>
Wed, 6 Feb 2008 18:41:18 +0000 (10:41 -0800)
diff --git a/Documentation/md.txt b/Documentation/md.txt

index 5818628207b5ec8b5f32ed6d727857f5f6dcbbec..396cdd982c26505ee39a577a64a7fb2c7472985e 100644 (file)
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -416,6 +416,16 @@ also have
       sectors in total that could need to be processed.  The two
       numbers are separated by a '/'  thus effectively showing one
       value, a fraction of the process that is complete.
+     A 'select' on this attribute will return when resync completes,
+     when it reaches the current sync_max (below) and possibly at
+     other times.
+
+   sync_max
+     This is a number of sectors at which point a resync/recovery
+     process will pause.  When a resync is active, the value can
+     only ever be increased, never decreased.  The value of 'max'
+     effectively disables the limit.
+
  
     sync_speed
       This shows the current actual speed, in K/sec, of the current
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 00788c56276ff1f5db5b2250faf0abeadabb16c1..79eb63fdb4b3a37ad6707d696f434bddef463873 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -275,6 +275,7 @@ static mddev_t * mddev_find(dev_t unit)
         spin_lock_init(&new->write_lock);
         init_waitqueue_head(&new->sb_wait);
         new->reshape_position = MaxSector;
+       new->resync_max = MaxSector;
  
         new->queue = blk_alloc_queue(GFP_KERNEL);
         if (!new->queue) {
@@ -2920,6 +2921,43 @@ sync_completed_show(mddev_t *mddev, char *page)
  
  static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
  
+static ssize_t
+max_sync_show(mddev_t *mddev, char *page)
+{
+       if (mddev->resync_max == MaxSector)
+               return sprintf(page, "max\n");
+       else
+               return sprintf(page, "%llu\n",
+                              (unsigned long long)mddev->resync_max);
+}
+static ssize_t
+max_sync_store(mddev_t *mddev, const char *buf, size_t len)
+{
+       if (strncmp(buf, "max", 3) == 0)
+               mddev->resync_max = MaxSector;
+       else {
+               char *ep;
+               unsigned long long max = simple_strtoull(buf, &ep, 10);
+               if (ep == buf || (*ep != 0 && *ep != '\n'))
+                       return -EINVAL;
+               if (max < mddev->resync_max &&
+                   test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+                       return -EBUSY;
+
+               /* Must be a multiple of chunk_size */
+               if (mddev->chunk_size) {
+                       if (max & (sector_t)((mddev->chunk_size>>9)-1))
+                               return -EINVAL;
+               }
+               mddev->resync_max = max;
+       }
+       wake_up(&mddev->recovery_wait);
+       return len;
+}
+
+static struct md_sysfs_entry md_max_sync =
+__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
+
  static ssize_t
  suspend_lo_show(mddev_t *mddev, char *page)
  {
@@ -3030,6 +3068,7 @@ static struct attribute *md_redundancy_attrs[] = {
         &md_sync_max.attr,
         &md_sync_speed.attr,
         &md_sync_completed.attr,
+       &md_max_sync.attr,
         &md_suspend_lo.attr,
         &md_suspend_hi.attr,
         &md_bitmap.attr,
@@ -3579,6 +3618,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
                 mddev->size = 0;
                 mddev->raid_disks = 0;
                 mddev->recovery_cp = 0;
+               mddev->resync_max = MaxSector;
                 mddev->reshape_position = MaxSector;
                 mddev->external = 0;
  
@@ -5443,8 +5483,16 @@ void md_do_sync(mddev_t *mddev)
                 sector_t sectors;
  
                 skipped = 0;
+               if (j >= mddev->resync_max) {
+                       sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+                       wait_event(mddev->recovery_wait,
+                                  mddev->resync_max > j
+                                  || kthread_should_stop());
+               }
+               if (kthread_should_stop())
+                       goto interrupted;
                 sectors = mddev->pers->sync_request(mddev, j, &skipped,
-                                           currspeed < speed_min(mddev));
+                                                 currspeed < speed_min(mddev));
                 if (sectors == 0) {
                         set_bit(MD_RECOVERY_ERR, &mddev->recovery);
                         goto out;
@@ -5486,15 +5534,9 @@ void md_do_sync(mddev_t *mddev)
                 }
  
  
-               if (kthread_should_stop()) {
-                       /*
-                        * got a signal, exit.
-                        */
-                       printk(KERN_INFO 
-                               "md: md_do_sync() got signal ... exiting\n");
-                       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-                       goto out;
-               }
+               if (kthread_should_stop())
+                       goto interrupted;
+
  
                 /*
                  * this loop exits only if either when we are slower than
@@ -5558,9 +5600,22 @@ void md_do_sync(mddev_t *mddev)
  
   skip:
         mddev->curr_resync = 0;
+       mddev->resync_max = MaxSector;
+       sysfs_notify(&mddev->kobj, NULL, "sync_completed");
         wake_up(&resync_wait);
         set_bit(MD_RECOVERY_DONE, &mddev->recovery);
         md_wakeup_thread(mddev->thread);
+       return;
+
+ interrupted:
+       /*
+        * got a signal, exit.
+        */
+       printk(KERN_INFO
+              "md: md_do_sync() got signal ... exiting\n");
+       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+       goto out;
+
  }
  EXPORT_SYMBOL_GPL(md_do_sync);
  
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index e0b8d0dd7a87b441b0b19d9fff24ec18467905eb..ae7c15207df5034d2da1e33935f64893a17a1ab7 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1767,6 +1767,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
                 return rv;
         }
  
+       if (max_sector > mddev->resync_max)
+               max_sector = mddev->resync_max; /* Don't do IO beyond here */
         nr_sectors = 0;
         sync_blocks = 0;
         do {
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index ba125277c6c4d71020f8c98269ccf5fefe916893..d6f12882424d96c8b6512f8825eed6f4ba9358a6 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1657,6 +1657,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
                 return (max_sector - sector_nr) + sectors_skipped;
         }
  
+       if (max_sector > mddev->resync_max)
+               max_sector = mddev->resync_max; /* Don't do IO beyond here */
+
         /* make sure whole request will fit in a chunk - if chunks
          * are meaningful
          */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 388a974d63ef9aed767329c989c1849c431403c8..e946de6f46bc6431690d8443dee9fb998244ea96 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3698,6 +3698,25 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                 release_stripe(sh);
                 first_sector += STRIPE_SECTORS;
         }
+       /* If this takes us to the resync_max point where we have to pause,
+        * then we need to write out the superblock.
+        */
+       sector_nr += conf->chunk_size>>9;
+       if (sector_nr >= mddev->resync_max) {
+               /* Cannot proceed until we've updated the superblock... */
+               wait_event(conf->wait_for_overlap,
+                          atomic_read(&conf->reshape_stripes) == 0);
+               mddev->reshape_position = conf->expand_progress;
+               set_bit(MD_CHANGE_DEVS, &mddev->flags);
+               md_wakeup_thread(mddev->thread);
+               wait_event(mddev->sb_wait,
+                          !test_bit(MD_CHANGE_DEVS, &mddev->flags)
+                          || kthread_should_stop());
+               spin_lock_irq(&conf->device_lock);
+               conf->expand_lo = mddev->reshape_position;
+               spin_unlock_irq(&conf->device_lock);
+               wake_up(&conf->wait_for_overlap);
+       }
         return conf->chunk_size>>9;
  }
  
@@ -3734,6 +3753,12 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
         if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
                 return reshape_request(mddev, sector_nr, skipped);
  
+       /* No need to check resync_max as we never do more than one
+        * stripe, and as resync_max will always be on a chunk boundary,
+        * if the check in md_do_sync didn't fire, there is no chance
+        * of overstepping resync_max here
+        */
+
         /* if there is too many failed drives and we are trying
          * to resync, then assert that we are finished, because there is
          * nothing we can do.
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h

index b579cc628303309c0fcb57ae332fd1fe0561805e..c77dca3221ed5891dcb6cae277b8470f487ab5a5 100644 (file)
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -219,6 +219,8 @@ struct mddev_s
         atomic_t                        recovery_active; /* blocks scheduled, but not written */
         wait_queue_head_t               recovery_wait;
         sector_t                        recovery_cp;
+       sector_t                        resync_max;     /* resync should pause
+                                                        * when it gets here */
  
         spinlock_t                      write_lock;
         wait_queue_head_t               sb_wait;        /* for waiting on superblock updates */
author	NeilBrown <neilb@suse.de>
	Wed, 6 Feb 2008 09:39:52 +0000 (01:39 -0800)
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>
	Wed, 6 Feb 2008 18:41:18 +0000 (10:41 -0800)
Documentation/md.txt		patch \| blob \| history
drivers/md/md.c		patch \| blob \| history
drivers/md/raid1.c		patch \| blob \| history
drivers/md/raid10.c		patch \| blob \| history
drivers/md/raid5.c		patch \| blob \| history
include/linux/raid/md_k.h		patch \| blob \| history