Merge branches 'release', 'bugzilla-6217', 'bugzilla-6629', 'bugzilla-6933', 'bugzill...

[linux-2.6] / drivers / md / raid5.c
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 82af3465a900883061c69d6f2b52549fdb3e6e02..2d6f1a51359cc490dcfd4f9b6bc5b1add12cf9a0 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -688,7 +688,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
  }
  
  static struct dma_async_tx_descriptor *
-ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
+                unsigned long pending)
  {
         int disks = sh->disks;
         int pd_idx = sh->pd_idx, i;
@@ -696,7 +697,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
         /* check if prexor is active which means only process blocks
          * that are part of a read-modify-write (Wantprexor)
          */
-       int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+       int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
  
         pr_debug("%s: stripe %llu\n", __FUNCTION__,
                 (unsigned long long)sh->sector);
@@ -773,7 +774,8 @@ static void ops_complete_write(void *stripe_head_ref)
  }
  
  static void
-ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
+               unsigned long pending)
  {
         /* kernel stack size limits the total number of disks */
         int disks = sh->disks;
@@ -781,7 +783,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
  
         int count = 0, pd_idx = sh->pd_idx, i;
         struct page *xor_dest;
-       int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+       int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
         unsigned long flags;
         dma_async_tx_callback callback;
  
@@ -808,7 +810,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
         }
  
         /* check whether this postxor is part of a write */
-       callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ?
+       callback = test_bit(STRIPE_OP_BIODRAIN, &pending) ?
                 ops_complete_write : ops_complete_postxor;
  
         /* 1/ if we prexor'd then the dest is reused as a source
@@ -896,12 +898,12 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
                 tx = ops_run_prexor(sh, tx);
  
         if (test_bit(STRIPE_OP_BIODRAIN, &pending)) {
-               tx = ops_run_biodrain(sh, tx);
+               tx = ops_run_biodrain(sh, tx, pending);
                 overlap_clear++;
         }
  
         if (test_bit(STRIPE_OP_POSTXOR, &pending))
-               ops_run_postxor(sh, tx);
+               ops_run_postxor(sh, tx, pending);
  
         if (test_bit(STRIPE_OP_CHECK, &pending))
                 ops_run_check(sh);
@@ -2863,7 +2865,8 @@ static void handle_stripe5(struct stripe_head *sh)
                 md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
         }
  
-       if (s.expanding && s.locked == 0)
+       if (s.expanding && s.locked == 0 &&
+           !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
                 handle_stripe_expansion(conf, sh, NULL);
  
         if (sh->ops.count)
@@ -3065,7 +3068,8 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                 md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
         }
  
-       if (s.expanding && s.locked == 0)
+       if (s.expanding && s.locked == 0 &&
+           !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
                 handle_stripe_expansion(conf, sh, &r6s);
  
         spin_unlock(&sh->lock);
@@ -3155,7 +3159,8 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
                                 atomic_inc(&conf->preread_active_stripes);
                         list_add_tail(&sh->lru, &conf->handle_list);
                 }
-       }
+       } else
+               blk_plug_device(conf->mddev->queue);
  }
  
  static void activate_bit_delay(raid5_conf_t *conf)
@@ -3186,8 +3191,7 @@ static void unplug_slaves(mddev_t *mddev)
                         atomic_inc(&rdev->nr_pending);
                         rcu_read_unlock();
  
-                       if (r_queue->unplug_fn)
-                               r_queue->unplug_fn(r_queue);
+                       blk_unplug(r_queue);
  
                         rdev_dec_pending(rdev, mddev);
                         rcu_read_lock();
@@ -3546,7 +3550,8 @@ static int make_request(struct request_queue *q, struct bio * bi)
                                 goto retry;
                         }
                         finish_wait(&conf->wait_for_overlap, &w);
-                       handle_stripe(sh, NULL);
+                       set_bit(STRIPE_HANDLE, &sh->state);
+                       clear_bit(STRIPE_DELAYED, &sh->state);
                         release_stripe(sh);
                 } else {
                         /* cannot get stripe for read-ahead, just give-up */
@@ -3695,6 +3700,25 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                 release_stripe(sh);
                 first_sector += STRIPE_SECTORS;
         }
+       /* If this takes us to the resync_max point where we have to pause,
+        * then we need to write out the superblock.
+        */
+       sector_nr += conf->chunk_size>>9;
+       if (sector_nr >= mddev->resync_max) {
+               /* Cannot proceed until we've updated the superblock... */
+               wait_event(conf->wait_for_overlap,
+                          atomic_read(&conf->reshape_stripes) == 0);
+               mddev->reshape_position = conf->expand_progress;
+               set_bit(MD_CHANGE_DEVS, &mddev->flags);
+               md_wakeup_thread(mddev->thread);
+               wait_event(mddev->sb_wait,
+                          !test_bit(MD_CHANGE_DEVS, &mddev->flags)
+                          || kthread_should_stop());
+               spin_lock_irq(&conf->device_lock);
+               conf->expand_lo = mddev->reshape_position;
+               spin_unlock_irq(&conf->device_lock);
+               wake_up(&conf->wait_for_overlap);
+       }
         return conf->chunk_size>>9;
  }
  
@@ -3731,6 +3755,12 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
         if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
                 return reshape_request(mddev, sector_nr, skipped);
  
+       /* No need to check resync_max as we never do more than one
+        * stripe, and as resync_max will always be on a chunk boundary,
+        * if the check in md_do_sync didn't fire, there is no chance
+        * of overstepping resync_max here
+        */
+
         /* if there is too many failed drives and we are trying
          * to resync, then assert that we are finished, because there is
          * nothing we can do.
@@ -3750,6 +3780,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
                 return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */
         }
  
+
+       bitmap_cond_end_sync(mddev->bitmap, sector_nr);
+
         pd_idx = stripe_to_pdidx(sector_nr, conf, raid_disks);
         sh = get_active_stripe(conf, sector_nr, raid_disks, pd_idx, 1);
         if (sh == NULL) {
@@ -3861,7 +3894,7 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
   * During the scan, completed stripes are saved for us by the interrupt
   * handler, so that they will not have to wait for our next wakeup.
   */
-static void raid5d (mddev_t *mddev)
+static void raid5d(mddev_t *mddev)
  {
         struct stripe_head *sh;
         raid5_conf_t *conf = mddev_to_conf(mddev);
@@ -3886,12 +3919,6 @@ static void raid5d (mddev_t *mddev)
                         activate_bit_delay(conf);
                 }
  
-               if (list_empty(&conf->handle_list) &&
-                   atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD &&
-                   !blk_queue_plugged(mddev->queue) &&
-                   !list_empty(&conf->delayed_list))
-                       raid5_activate_delayed(conf);
-
                 while ((bio = remove_bio_from_retry(conf))) {
                         int ok;
                         spin_unlock_irq(&conf->device_lock);
@@ -4105,7 +4132,7 @@ static int run(mddev_t *mddev)
  
         pr_debug("raid5: run(%s) called.\n", mdname(mddev));
  
-       ITERATE_RDEV(mddev,rdev,tmp) {
+       rdev_for_each(rdev, tmp, mddev) {
                 raid_disk = rdev->raid_disk;
                 if (raid_disk >= conf->raid_disks
                     || raid_disk < 0)
@@ -4518,7 +4545,7 @@ static int raid5_start_reshape(mddev_t *mddev)
         if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                 return -EBUSY;
  
-       ITERATE_RDEV(mddev, rdev, rtmp)
+       rdev_for_each(rdev, rtmp, mddev)
                 if (rdev->raid_disk < 0 &&
                     !test_bit(Faulty, &rdev->flags))
                         spares++;
@@ -4540,7 +4567,7 @@ static int raid5_start_reshape(mddev_t *mddev)
         /* Add some new drives, as many as will fit.
          * We know there are enough to make the newly sized array work.
          */
-       ITERATE_RDEV(mddev, rdev, rtmp)
+       rdev_for_each(rdev, rtmp, mddev)
                 if (rdev->raid_disk < 0 &&
                     !test_bit(Faulty, &rdev->flags)) {
                         if (raid5_add_disk(mddev, rdev)) {