[PATCH] md: Allow stripes to be expanded in preparation for expanding an array

author NeilBrown <neilb@suse.de>

Mon, 27 Mar 2006 09:18:07 +0000 (01:18 -0800)

committer Linus Torvalds <torvalds@g5.osdl.org>

Mon, 27 Mar 2006 16:45:01 +0000 (08:45 -0800)
author NeilBrown <neilb@suse.de>
Mon, 27 Mar 2006 09:18:07 +0000 (01:18 -0800)
committer Linus Torvalds <torvalds@g5.osdl.org>
Mon, 27 Mar 2006 16:45:01 +0000 (08:45 -0800)
diff --git a/drivers/md/md.c b/drivers/md/md.c

index a3ecaf8ed30a5ee6207acac842148de8503a290d..c7b7656f9aa5cd4e98ad9999b32d8ddb44199838 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2775,7 +2775,6 @@ static void autorun_array(mddev_t *mddev)
   */
  static void autorun_devices(int part)
  {
-       struct list_head candidates;
         struct list_head *tmp;
         mdk_rdev_t *rdev0, *rdev;
         mddev_t *mddev;
@@ -2784,6 +2783,7 @@ static void autorun_devices(int part)
         printk(KERN_INFO "md: autorun ...\n");
         while (!list_empty(&pending_raid_disks)) {
                 dev_t dev;
+               LIST_HEAD(candidates);
                 rdev0 = list_entry(pending_raid_disks.next,
                                          mdk_rdev_t, same_set);
  
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 03f31379cebb0cb965ffce503467e1fae9234157..6c20b44509d875bb1fd5557d4b5670c829ac52a3 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -313,20 +313,143 @@ static int grow_stripes(raid5_conf_t *conf, int num)
         kmem_cache_t *sc;
         int devs = conf->raid_disks;
  
-       sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev));
-
-       sc = kmem_cache_create(conf->cache_name, 
+       sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev));
+       sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev));
+       conf->active_name = 0;
+       sc = kmem_cache_create(conf->cache_name[conf->active_name],
                                sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
                                0, 0, NULL, NULL);
         if (!sc)
                 return 1;
         conf->slab_cache = sc;
+       conf->pool_size = devs;
         while (num--) {
                 if (!grow_one_stripe(conf))
                         return 1;
         }
         return 0;
  }
+static int resize_stripes(raid5_conf_t *conf, int newsize)
+{
+       /* Make all the stripes able to hold 'newsize' devices.
+        * New slots in each stripe get 'page' set to a new page.
+        *
+        * This happens in stages:
+        * 1/ create a new kmem_cache and allocate the required number of
+        *    stripe_heads.
+        * 2/ gather all the old stripe_heads and tranfer the pages across
+        *    to the new stripe_heads.  This will have the side effect of
+        *    freezing the array as once all stripe_heads have been collected,
+        *    no IO will be possible.  Old stripe heads are freed once their
+        *    pages have been transferred over, and the old kmem_cache is
+        *    freed when all stripes are done.
+        * 3/ reallocate conf->disks to be suitable bigger.  If this fails,
+        *    we simple return a failre status - no need to clean anything up.
+        * 4/ allocate new pages for the new slots in the new stripe_heads.
+        *    If this fails, we don't bother trying the shrink the
+        *    stripe_heads down again, we just leave them as they are.
+        *    As each stripe_head is processed the new one is released into
+        *    active service.
+        *
+        * Once step2 is started, we cannot afford to wait for a write,
+        * so we use GFP_NOIO allocations.
+        */
+       struct stripe_head *osh, *nsh;
+       LIST_HEAD(newstripes);
+       struct disk_info *ndisks;
+       int err = 0;
+       kmem_cache_t *sc;
+       int i;
+
+       if (newsize <= conf->pool_size)
+               return 0; /* never bother to shrink */
+
+       /* Step 1 */
+       sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
+                              sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
+                              0, 0, NULL, NULL);
+       if (!sc)
+               return -ENOMEM;
+
+       for (i = conf->max_nr_stripes; i; i--) {
+               nsh = kmem_cache_alloc(sc, GFP_KERNEL);
+               if (!nsh)
+                       break;
+
+               memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
+
+               nsh->raid_conf = conf;
+               spin_lock_init(&nsh->lock);
+
+               list_add(&nsh->lru, &newstripes);
+       }
+       if (i) {
+               /* didn't get enough, give up */
+               while (!list_empty(&newstripes)) {
+                       nsh = list_entry(newstripes.next, struct stripe_head, lru);
+                       list_del(&nsh->lru);
+                       kmem_cache_free(sc, nsh);
+               }
+               kmem_cache_destroy(sc);
+               return -ENOMEM;
+       }
+       /* Step 2 - Must use GFP_NOIO now.
+        * OK, we have enough stripes, start collecting inactive
+        * stripes and copying them over
+        */
+       list_for_each_entry(nsh, &newstripes, lru) {
+               spin_lock_irq(&conf->device_lock);
+               wait_event_lock_irq(conf->wait_for_stripe,
+                                   !list_empty(&conf->inactive_list),
+                                   conf->device_lock,
+                                   unplug_slaves(conf->mddev);
+                       );
+               osh = get_free_stripe(conf);
+               spin_unlock_irq(&conf->device_lock);
+               atomic_set(&nsh->count, 1);
+               for(i=0; i<conf->pool_size; i++)
+                       nsh->dev[i].page = osh->dev[i].page;
+               for( ; i<newsize; i++)
+                       nsh->dev[i].page = NULL;
+               kmem_cache_free(conf->slab_cache, osh);
+       }
+       kmem_cache_destroy(conf->slab_cache);
+
+       /* Step 3.
+        * At this point, we are holding all the stripes so the array
+        * is completely stalled, so now is a good time to resize
+        * conf->disks.
+        */
+       ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
+       if (ndisks) {
+               for (i=0; i<conf->raid_disks; i++)
+                       ndisks[i] = conf->disks[i];
+               kfree(conf->disks);
+               conf->disks = ndisks;
+       } else
+               err = -ENOMEM;
+
+       /* Step 4, return new stripes to service */
+       while(!list_empty(&newstripes)) {
+               nsh = list_entry(newstripes.next, struct stripe_head, lru);
+               list_del_init(&nsh->lru);
+               for (i=conf->raid_disks; i < newsize; i++)
+                       if (nsh->dev[i].page == NULL) {
+                               struct page *p = alloc_page(GFP_NOIO);
+                               nsh->dev[i].page = p;
+                               if (!p)
+                                       err = -ENOMEM;
+                       }
+               release_stripe(nsh);
+       }
+       /* critical section pass, GFP_NOIO no longer needed */
+
+       conf->slab_cache = sc;
+       conf->active_name = 1-conf->active_name;
+       conf->pool_size = newsize;
+       return err;
+}
+
  
  static int drop_one_stripe(raid5_conf_t *conf)
  {
@@ -339,7 +462,7 @@ static int drop_one_stripe(raid5_conf_t *conf)
                 return 0;
         if (atomic_read(&sh->count))
                 BUG();
-       shrink_buffers(sh, conf->raid_disks);
+       shrink_buffers(sh, conf->pool_size);
         kmem_cache_free(conf->slab_cache, sh);
         atomic_dec(&conf->active_stripes);
         return 1;
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c

index c7632f6cc48718ceeea0f17d65325f4ca241fb73..6df4930fddecae952d7e5e07c165a97877d12825 100644 (file)
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -331,9 +331,9 @@ static int grow_stripes(raid6_conf_t *conf, int num)
         kmem_cache_t *sc;
         int devs = conf->raid_disks;
  
-       sprintf(conf->cache_name, "raid6/%s", mdname(conf->mddev));
+       sprintf(conf->cache_name[0], "raid6/%s", mdname(conf->mddev));
  
-       sc = kmem_cache_create(conf->cache_name,
+       sc = kmem_cache_create(conf->cache_name[0],
                                sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
                                0, 0, NULL, NULL);
         if (!sc)
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h

index 94dbdd406f1210e75cd4544f276a81d2490c1c63..b7b2653af7bb06e898a728266810b7c216ee836b 100644 (file)
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -216,7 +216,11 @@ struct raid5_private_data {
         struct list_head        bitmap_list; /* stripes delaying awaiting bitmap update */
         atomic_t                preread_active_stripes; /* stripes with scheduled io */
  
-       char                    cache_name[20];
+       /* unfortunately we need two cache names as we temporarily have
+        * two caches.
+        */
+       int                     active_name;
+       char                    cache_name[2][20];
         kmem_cache_t            *slab_cache; /* for allocating stripes */
  
         int                     seq_flush, seq_write;
@@ -238,7 +242,8 @@ struct raid5_private_data {
         wait_queue_head_t       wait_for_overlap;
         int                     inactive_blocked;       /* release of inactive stripes blocked,
                                                          * waiting for 25% to be free
-                                                        */        
+                                                        */
+       int                     pool_size; /* number of disks in stripeheads in pool */
         spinlock_t              device_lock;
         struct disk_info        *disks;
  };
author	NeilBrown <neilb@suse.de>
	Mon, 27 Mar 2006 09:18:07 +0000 (01:18 -0800)
committer	Linus Torvalds <torvalds@g5.osdl.org>
	Mon, 27 Mar 2006 16:45:01 +0000 (08:45 -0800)
drivers/md/md.c		patch \| blob \| history
drivers/md/raid5.c		patch \| blob \| history
drivers/md/raid6main.c		patch \| blob \| history
include/linux/raid/raid5.h		patch \| blob \| history