[SCSI] fc_transport: make all rports wait dev_loss_tmo before removing them

author James Smart <James.Smart@Emulex.Com>

Fri, 27 Apr 2007 15:53:17 +0000 (11:53 -0400)

committer James Bottomley <jejb@mulgrave.il.steeleye.com>

Sun, 6 May 2007 14:33:20 +0000 (09:33 -0500)
author James Smart <James.Smart@Emulex.Com>
Fri, 27 Apr 2007 15:53:17 +0000 (11:53 -0400)
committer James Bottomley <jejb@mulgrave.il.steeleye.com>
Sun, 6 May 2007 14:33:20 +0000 (09:33 -0500)
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c

index 14c4f065b2b8091c88c853bd7cbc732e027f160d..b4d1ece46f789d92e6cc94acdb38410625852233 100644 (file)
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -1718,31 +1718,12 @@ fc_starget_delete(struct work_struct *work)
         struct fc_rport *rport =
                 container_of(work, struct fc_rport, stgt_delete_work);
         struct Scsi_Host *shost = rport_to_shost(rport);
-       unsigned long flags;
         struct fc_internal *i = to_fc_internal(shost->transportt);
  
-       /*
-        * Involve the LLDD if possible. All io on the rport is to
-        * be terminated, either as part of the dev_loss_tmo callback
-        * processing, or via the terminate_rport_io function.
-        */
-       if (i->f->dev_loss_tmo_callbk)
-               i->f->dev_loss_tmo_callbk(rport);
-       else if (i->f->terminate_rport_io)
+       /* Involve the LLDD if possible to terminate all io on the rport. */
+       if (i->f->terminate_rport_io)
                 i->f->terminate_rport_io(rport);
  
-       spin_lock_irqsave(shost->host_lock, flags);
-       if (rport->flags & FC_RPORT_DEVLOSS_PENDING) {
-               spin_unlock_irqrestore(shost->host_lock, flags);
-               if (!cancel_delayed_work(&rport->fail_io_work))
-                       fc_flush_devloss(shost);
-               if (!cancel_delayed_work(&rport->dev_loss_work))
-                       fc_flush_devloss(shost);
-               spin_lock_irqsave(shost->host_lock, flags);
-               rport->flags &= ~FC_RPORT_DEVLOSS_PENDING;
-       }
-       spin_unlock_irqrestore(shost->host_lock, flags);
-
         scsi_remove_target(&rport->dev);
  }
  
@@ -1760,6 +1741,7 @@ fc_rport_final_delete(struct work_struct *work)
         struct device *dev = &rport->dev;
         struct Scsi_Host *shost = rport_to_shost(rport);
         struct fc_internal *i = to_fc_internal(shost->transportt);
+       unsigned long flags;
  
         /*
          * if a scan is pending, flush the SCSI Host work_q so that 
@@ -1768,13 +1750,37 @@ fc_rport_final_delete(struct work_struct *work)
         if (rport->flags & FC_RPORT_SCAN_PENDING)
                 scsi_flush_work(shost);
  
+       /* involve the LLDD to terminate all pending i/o */
+       if (i->f->terminate_rport_io)
+               i->f->terminate_rport_io(rport);
+
+       /*
+        * Cancel any outstanding timers. These should really exist
+        * only when rmmod'ing the LLDD and we're asking for
+        * immediate termination of the rports
+        */
+       spin_lock_irqsave(shost->host_lock, flags);
+       if (rport->flags & FC_RPORT_DEVLOSS_PENDING) {
+               spin_unlock_irqrestore(shost->host_lock, flags);
+               if (!cancel_delayed_work(&rport->fail_io_work))
+                       fc_flush_devloss(shost);
+               if (!cancel_delayed_work(&rport->dev_loss_work))
+                       fc_flush_devloss(shost);
+               spin_lock_irqsave(shost->host_lock, flags);
+               rport->flags &= ~FC_RPORT_DEVLOSS_PENDING;
+       }
+       spin_unlock_irqrestore(shost->host_lock, flags);
+
         /* Delete SCSI target and sdevs */
         if (rport->scsi_target_id != -1)
                 fc_starget_delete(&rport->stgt_delete_work);
-       else if (i->f->dev_loss_tmo_callbk)
+
+       /*
+        * Notify the driver that the rport is now dead. The LLDD will
+        * also guarantee that any communication to the rport is terminated
+        */
+       if (i->f->dev_loss_tmo_callbk)
                 i->f->dev_loss_tmo_callbk(rport);
-       else if (i->f->terminate_rport_io)
-               i->f->terminate_rport_io(rport);
  
         transport_remove_device(dev);
         device_del(dev);
@@ -1963,8 +1969,6 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
                         }
  
                         if (match) {
-                               struct delayed_work *work =
-                                                       &rport->dev_loss_work;
  
                                 memcpy(&rport->node_name, &ids->node_name,
                                         sizeof(rport->node_name));
@@ -1982,46 +1986,61 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
                                                 fci->f->dd_fcrport_size);
  
                                 /*
-                                * If we were blocked, we were a target.
-                                * If no longer a target, we leave the timer
-                                * running in case the port changes roles
-                                * prior to the timer expiring. If the timer
-                                * fires, the target will be torn down.
+                                * If we were not a target, cancel the
+                                * io terminate and rport timers, and
+                                * we're done.
+                                *
+                                * If we were a target, but our new role
+                                * doesn't indicate a target, leave the
+                                * timers running expecting the role to
+                                * change as the target fully logs in. If
+                                * it doesn't, the target will be torn down.
+                                *
+                                * If we were a target, and our role shows
+                                * we're still a target, cancel the timers
+                                * and kick off a scan.
                                  */
-                               if (!(ids->roles & FC_RPORT_ROLE_FCP_TARGET))
-                                       return rport;
  
-                               /* restart the target */
+                               /* was a target, not in roles */
+                               if ((rport->scsi_target_id != -1) &&
+                                   (!(ids->roles & FC_RPORT_ROLE_FCP_TARGET)))
+                                       return rport;
  
                                 /*
-                                * Stop the target timers first. Take no action
-                                * on the del_timer failure as the state
-                                * machine state change will validate the
-                                * transaction.
+                                * Stop the fail io and dev_loss timers.
+                                * If they flush, the port_state will
+                                * be checked and will NOOP the function.
                                  */
                                 if (!cancel_delayed_work(&rport->fail_io_work))
                                         fc_flush_devloss(shost);
-                               if (!cancel_delayed_work(work))
+                               if (!cancel_delayed_work(&rport->dev_loss_work))
                                         fc_flush_devloss(shost);
  
                                 spin_lock_irqsave(shost->host_lock, flags);
  
                                 rport->flags &= ~FC_RPORT_DEVLOSS_PENDING;
  
-                               /* initiate a scan of the target */
-                               rport->flags |= FC_RPORT_SCAN_PENDING;
-                               scsi_queue_work(shost, &rport->scan_work);
-
-                               spin_unlock_irqrestore(shost->host_lock, flags);
-
-                               scsi_target_unblock(&rport->dev);
+                               /* if target, initiate a scan */
+                               if (rport->scsi_target_id != -1) {
+                                       rport->flags |= FC_RPORT_SCAN_PENDING;
+                                       scsi_queue_work(shost,
+                                                       &rport->scan_work);
+                                       spin_unlock_irqrestore(shost->host_lock,
+                                                       flags);
+                                       scsi_target_unblock(&rport->dev);
+                               } else
+                                       spin_unlock_irqrestore(shost->host_lock,
+                                                       flags);
  
                                 return rport;
                         }
                 }
         }
  
-       /* Search the bindings array */
+       /*
+        * Search the bindings array
+        * Note: if never a FCP target, you won't be on this list
+        */
         if (fc_host->tgtid_bind_type != FC_TGTID_BIND_NONE) {
  
                 /* search for a matching consistent binding */
@@ -2158,15 +2177,24 @@ fc_remote_port_delete(struct fc_rport  *rport)
  
         spin_lock_irqsave(shost->host_lock, flags);
  
-       /* If no scsi target id mapping, delete it */
-       if (rport->scsi_target_id == -1) {
-               list_del(&rport->peers);
-               rport->port_state = FC_PORTSTATE_DELETED;
-               fc_queue_work(shost, &rport->rport_delete_work);
+       if (rport->port_state != FC_PORTSTATE_ONLINE) {
                 spin_unlock_irqrestore(shost->host_lock, flags);
                 return;
         }
  
+       /*
+        * In the past, we if this was not an FCP-Target, we would
+        * unconditionally just jump to deleting the rport.
+        * However, rports can be used as node containers by the LLDD,
+        * and its not appropriate to just terminate the rport at the
+        * first sign of a loss in connectivity. The LLDD may want to
+        * send ELS traffic to re-validate the login. If the rport is
+        * immediately deleted, it makes it inappropriate for a node
+        * container.
+        * So... we now unconditionally wait dev_loss_tmo before
+        * destroying an rport.
+        */
+
         rport->port_state = FC_PORTSTATE_BLOCKED;
  
         rport->flags |= FC_RPORT_DEVLOSS_PENDING;
@@ -2263,11 +2291,11 @@ fc_remote_port_rolechg(struct fc_rport  *rport, u32 roles)
  EXPORT_SYMBOL(fc_remote_port_rolechg);
  
  /**
- * fc_timeout_deleted_rport - Timeout handler for a deleted remote port that
- *                       was a SCSI target (thus was blocked), and failed
- *                       to return in the alloted time.
+ * fc_timeout_deleted_rport - Timeout handler for a deleted remote port,
+ *                     which we blocked, and has now failed to return
+ *                     in the allotted time.
   * 
- * @work:      rport target that failed to reappear in the alloted time.
+ * @work:      rport target that failed to reappear in the allotted time.
   **/
  static void
  fc_timeout_deleted_rport(struct work_struct *work)
@@ -2283,10 +2311,12 @@ fc_timeout_deleted_rport(struct work_struct *work)
         rport->flags &= ~FC_RPORT_DEVLOSS_PENDING;
  
         /*
-        * If the port is ONLINE, then it came back. Validate it's still an
-        * FCP target. If not, tear down the scsi_target on it.
+        * If the port is ONLINE, then it came back. If it was a SCSI
+        * target, validate it still is. If not, tear down the
+        * scsi_target on it.
          */
         if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
+           (rport->scsi_target_id != -1) &&
             !(rport->roles & FC_RPORT_ROLE_FCP_TARGET)) {
                 dev_printk(KERN_ERR, &rport->dev,
                         "blocked FC remote port time out: no longer"
@@ -2297,18 +2327,24 @@ fc_timeout_deleted_rport(struct work_struct *work)
                 return;
         }
  
+       /* NOOP state - we're flushing workq's */
         if (rport->port_state != FC_PORTSTATE_BLOCKED) {
                 spin_unlock_irqrestore(shost->host_lock, flags);
                 dev_printk(KERN_ERR, &rport->dev,
-                       "blocked FC remote port time out: leaving target alone\n");
+                       "blocked FC remote port time out: leaving"
+                       " rport%s alone\n",
+                       (rport->scsi_target_id != -1) ?  " and starget" : "");
                 return;
         }
  
-       if (fc_host->tgtid_bind_type == FC_TGTID_BIND_NONE) {
+       if ((fc_host->tgtid_bind_type == FC_TGTID_BIND_NONE) ||
+           (rport->scsi_target_id == -1)) {
                 list_del(&rport->peers);
                 rport->port_state = FC_PORTSTATE_DELETED;
                 dev_printk(KERN_ERR, &rport->dev,
-                       "blocked FC remote port time out: removing target\n");
+                       "blocked FC remote port time out: removing"
+                       " rport%s\n",
+                       (rport->scsi_target_id != -1) ?  " and starget" : "");
                 fc_queue_work(shost, &rport->rport_delete_work);
                 spin_unlock_irqrestore(shost->host_lock, flags);
                 return;
author	James Smart <James.Smart@Emulex.Com>
	Fri, 27 Apr 2007 15:53:17 +0000 (11:53 -0400)
committer	James Bottomley <jejb@mulgrave.il.steeleye.com>
	Sun, 6 May 2007 14:33:20 +0000 (09:33 -0500)