]> err.no Git - linux-2.6/commitdiff
IB/ipath: Fix SDMA error recovery in absence of link status change
authorJohn Gregor <john.gregor@qlogic.com>
Wed, 7 May 2008 18:01:10 +0000 (11:01 -0700)
committerRoland Dreier <rolandd@cisco.com>
Wed, 7 May 2008 18:01:10 +0000 (11:01 -0700)
What's fixed:

    in ipath_cancel_sends()

        We need to unconditionally set ABORTING.  So, swap the tests
        so the set_bit() isn't shadowed by the &&.

        If we've disarmed the piobufs, then we need to unconditionally
        set DISARMED.  So, move it out from the overly protective if
        at the bottom.

    in sdma_abort_task()

        Abort_task was written knowing that the SDMA engine would always
        be reset (and restarted) on error.  A recent change broke that
        fundamental assumption by taking the restart portion and making
        it conditional on a link status change.  But, SDMA can go boom
        without a link status change in some conditions.

Signed-off-by: John Gregor <john.gregor@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/hw/ipath/ipath_driver.c
drivers/infiniband/hw/ipath/ipath_sdma.c

index 2036d38fac477a83bdd616f2034dc51187558a72..ce7b7c34360eae4b1ec985a07f8062899b1242cc 100644 (file)
@@ -1898,8 +1898,8 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
 
                spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
                skip_cancel =
-                       !test_bit(IPATH_SDMA_DISABLED, statp) &&
-                       test_and_set_bit(IPATH_SDMA_ABORTING, statp);
+                       test_and_set_bit(IPATH_SDMA_ABORTING, statp)
+                       && !test_bit(IPATH_SDMA_DISABLED, statp);
                spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
                if (skip_cancel)
                        goto bail;
@@ -1930,6 +1930,9 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
        ipath_disarm_piobufs(dd, 0,
                dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
 
+       if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+               set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+
        if (restore_sendctrl) {
                /* else done by caller later if needed */
                spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -1949,7 +1952,6 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
                /* only wait so long for intr */
                dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
                dd->ipath_sdma_reset_wait = 200;
-               __set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
                if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
                        tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
                spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
index 0d07682c7310f8da67ae087817c27bb1fe7e64e1..3697449c1ba4e7042d89f34ac334b66ff4872e2a 100644 (file)
@@ -308,13 +308,15 @@ static void sdma_abort_task(unsigned long opaque)
                spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
 
                /*
-                * Don't restart sdma here. Wait until link is up to ACTIVE.
-                * VL15 MADs used to bring the link up use PIO, and multiple
-                * link transitions otherwise cause the sdma engine to be
+                * Don't restart sdma here (with the exception
+                * below). Wait until link is up to ACTIVE.  VL15 MADs
+                * used to bring the link up use PIO, and multiple link
+                * transitions otherwise cause the sdma engine to be
                 * stopped and started multiple times.
-                * The disable is done here, including the shadow, so the
-                * state is kept consistent.
-                * See ipath_restart_sdma() for the actual starting of sdma.
+                * The disable is done here, including the shadow,
+                * so the state is kept consistent.
+                * See ipath_restart_sdma() for the actual starting
+                * of sdma.
                 */
                spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
                dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
@@ -326,6 +328,13 @@ static void sdma_abort_task(unsigned long opaque)
                /* make sure I see next message */
                dd->ipath_sdma_abort_jiffies = 0;
 
+               /*
+                * Not everything that takes SDMA offline is a link
+                * status change.  If the link was up, restart SDMA.
+                */
+               if (dd->ipath_flags & IPATH_LINKACTIVE)
+                       ipath_restart_sdma(dd);
+
                goto done;
        }
 
@@ -427,7 +436,12 @@ int setup_sdma(struct ipath_devdata *dd)
                goto done;
        }
 
-       dd->ipath_sdma_status = 0;
+       /*
+        * Set initial status as if we had been up, then gone down.
+        * This lets initial start on transition to ACTIVE be the
+        * same as restart after link flap.
+        */
+       dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
        dd->ipath_sdma_abort_jiffies = 0;
        dd->ipath_sdma_generation = 0;
        dd->ipath_sdma_descq_tail = 0;
@@ -618,6 +632,9 @@ void ipath_restart_sdma(struct ipath_devdata *dd)
        ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
        spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
 
+       /* notify upper layers */
+       ipath_ib_piobufavail(dd->verbs_dev);
+
 bail:
        return;
 }