#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
+
+/* TID entries (memory), HT-only */
+#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
+#define INFINIPATH_RT_VALID 0x8000000000000000ULL
+#define INFINIPATH_RT_ADDR_SHIFT 0
+#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
+#define INFINIPATH_RT_BUFSIZE_SHIFT 48
+
/*
* masks and bits that are different in different chips, or present only
* in one
INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
};
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
+ << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
+
+static int ipath_ht_txe_recover(struct ipath_devdata *);
+
/**
* ipath_ht_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
/*
* make sure we get this much out, unless told to be quiet,
+ * it's a parity error we may recover from,
* or it's occurred within the last 5 seconds
*/
- if ((hwerrs & ~(dd->ipath_lasthwerror |
- ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
- (ipath_debug & __IPATH_VERBDBG))
+ if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
+ RXE_EAGER_PARITY)) ||
+ (ipath_debug & __IPATH_VERBDBG))
dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
"(cleared)\n", (unsigned long long) hwerrs);
dd->ipath_lasthwerror |= hwerrs;
(hwerrs & ~dd->ipath_hwe_bitsextant));
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
- if (ctrl & INFINIPATH_C_FREEZEMODE) {
+ if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
/*
* parity errors in send memory are recoverable,
* just cancel the send (if indicated in * sendbuffererror),
* occur if a processor speculative read is done to the PIO
* buffer while we are sending a packet, for example.
*/
- if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- ipath_stats.sps_txeparity++;
- ipath_dbg("Recovering from TXE parity error (%llu), "
- "hwerrstatus=%llx\n",
- (unsigned long long) ipath_stats.sps_txeparity,
- (unsigned long long) hwerrs);
- ipath_disarm_senderrbufs(dd);
- hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
- if (!hwerrs) { /* else leave in freeze mode */
- ipath_write_kreg(dd,
- dd->ipath_kregs->kr_control,
- dd->ipath_control);
- return;
- }
- }
- if (hwerrs) {
- /*
- * if any set that we aren't ignoring; only
- * make the complaint once, in case it's stuck
- * or recurring, and we get here multiple
- * times.
- */
- if (dd->ipath_flags & IPATH_INITTED) {
- ipath_dev_err(dd, "Fatal Hardware Error (freeze "
- "mode), no longer usable, SN %.16s\n",
- dd->ipath_serial);
- isfatal = 1;
- }
- *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
- /* mark as having had error */
- *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
- /*
- * mark as not usable, at a minimum until driver
- * is reloaded, probably until reboot, since no
- * other reset is possible.
- */
- dd->ipath_flags &= ~IPATH_INITTED;
- } else {
- ipath_dbg("Clearing freezemode on ignored hardware "
- "error\n");
+ if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
+ hwerrs &= ~TXE_PIO_PARITY;
+ if (hwerrs & RXE_EAGER_PARITY)
+ ipath_dev_err(dd, "RXE parity, Eager TID error is not "
+ "recoverable\n");
+ if (!hwerrs) {
+ ipath_dbg("Clearing freezemode on ignored or "
+ "recovered hardware error\n");
ctrl &= ~INFINIPATH_C_FREEZEMODE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
ctrl);
dd->ipath_hwerrmask);
}
- ipath_dev_err(dd, "%s hardware error\n", msg);
+ if (hwerrs) {
+ /*
+ * if any set that we aren't ignoring; only
+ * make the complaint once, in case it's stuck
+ * or recurring, and we get here multiple
+ * times.
+ */
+ ipath_dev_err(dd, "%s hardware error\n", msg);
+ if (dd->ipath_flags & IPATH_INITTED) {
+ ipath_dev_err(dd, "Fatal Hardware Error (freeze "
+ "mode), no longer usable, SN %.16s\n",
+ dd->ipath_serial);
+ isfatal = 1;
+ }
+ *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+ /* mark as having had error */
+ *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ /*
+ * mark as not usable, at a minimum until driver
+ * is reloaded, probably until reboot, since no
+ * other reset is possible.
+ */
+ dd->ipath_flags &= ~IPATH_INITTED;
+ }
+ else
+ *msg = 0; /* recovered from all of them */
if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
/*
* for status file; if no trailing brace is copied,
if (n)
snprintf(name, namelen, "%s", n);
- if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
+ if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
+ dd->ipath_minrev > 3)) {
/*
* This version of the driver only supports Rev 3.2 and 3.3
*/
if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
ipath_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
+ ipath_dbg("MemBIST corrected\n");
ipath_check_htlink(dd);
u64 __iomem *tidptr, u32 type,
unsigned long pa)
{
+ if (!dd->ipath_kregbase)
+ return;
+
if (pa != dd->ipath_tidinvalid) {
if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
dev_info(&dd->pcidev->dev,
pa |= lenvalid | INFINIPATH_RT_VALID;
}
}
- if (dd->ipath_kregbase)
- writeq(pa, tidptr);
+ writeq(pa, tidptr);
}
+
/**
* ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
* @dd: the infinipath device
INFINIPATH_S_ABORT);
ipath_get_eeprom_info(dd);
- if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
+ if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
/*
* Later production QHT7040 has same changes as QHT7140, so
return 0;
}
+
+static int ipath_ht_txe_recover(struct ipath_devdata *dd)
+{
+ int cnt = ++ipath_stats.sps_txeparity;
+ if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
+ if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
+ ipath_dev_err(dd,
+ "Too many attempts to recover from "
+ "TXE parity, giving up\n");
+ return 0;
+ }
+ dev_info(&dd->pcidev->dev,
+ "Recovering from TXE PIO parity error\n");
+ ipath_disarm_senderrbufs(dd, 1);
+ return 1;
+}
+
+
/**
* ipath_init_ht_get_base_info - set chip-specific flags for user code
* @dd: the infinipath device
INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
};
+#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
+ INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
+ << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
+
+static int ipath_pe_txe_recover(struct ipath_devdata *);
+
/**
* ipath_pe_handle_hwerrors - display hardware errors.
* @dd: the infinipath device
* occur if a processor speculative read is done to the PIO
* buffer while we are sending a packet, for example.
*/
- if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
- ipath_stats.sps_txeparity++;
- ipath_dbg("Recovering from TXE parity error (%llu), "
- "hwerrstatus=%llx\n",
- (unsigned long long) ipath_stats.sps_txeparity,
- (unsigned long long) hwerrs);
- ipath_disarm_senderrbufs(dd);
- hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
- INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
- << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
- if (!hwerrs) { /* else leave in freeze mode */
- ipath_write_kreg(dd,
- dd->ipath_kregs->kr_control,
- dd->ipath_control);
- return;
- }
- }
+ if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd))
+ hwerrs &= ~TXE_PIO_PARITY;
if (hwerrs) {
/*
* if any set that we aren't ignoring only make the
if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
ipath_dev_err(dd, "MemBIST did not complete!\n");
+ if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
+ ipath_dbg("MemBIST corrected\n");
val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
dd->ipath_irq = 0;
}
+/*
+ * On platforms using this chip, and not having ordered WC stores, we
+ * can get TXE parity errors due to speculative reads to the PIO buffers,
+ * and this, due to a chip bug can result in (many) false parity error
+ * reports. So it's a debug print on those, and an info print on systems
+ * where the speculative reads don't occur.
+ * Because we can get lots of false errors, we have no upper limit
+ * on recovery attempts on those platforms.
+ */
+static int ipath_pe_txe_recover(struct ipath_devdata *dd)
+{
+ if (ipath_unordered_wc())
+ ipath_dbg("Recovering from TXE PIO parity error\n");
+ else {
+ int cnt = ++ipath_stats.sps_txeparity;
+ if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
+ if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
+ ipath_dev_err(dd,
+ "Too many attempts to recover from "
+ "TXE parity, giving up\n");
+ return 0;
+ }
+ dev_info(&dd->pcidev->dev,
+ "Recovering from TXE PIO parity error\n");
+ }
+ ipath_disarm_senderrbufs(dd, 1);
+ return 1;
+}
+
/**
* ipath_init_iba6120_funcs - set up the chip-specific function pointers
* @dd: the infinipath device
#include "ipath_verbs.h"
#include "ipath_common.h"
+/*
+ * clear (write) a pio buffer, to clear a parity error. This routine
+ * should only be called when in freeze mode, and the buffer should be
+ * canceled afterwards.
+ */
+static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
+{
+ u32 __iomem *pbuf;
+ u32 dwcnt; /* dword count to write */
+ if (pnum < dd->ipath_piobcnt2k) {
+ pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
+ dd->ipath_palign);
+ dwcnt = dd->ipath_piosize2k >> 2;
+ }
+ else {
+ pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
+ (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
+ dwcnt = dd->ipath_piosize4k >> 2;
+ }
+ dev_info(&dd->pcidev->dev,
+ "Rewrite PIO buffer %u, to recover from parity error\n",
+ pnum);
+ *pbuf = dwcnt+1; /* no flush required, since already in freeze */
+ while(--dwcnt)
+ *pbuf++ = 0;
+}
+
/*
* Called when we might have an error that is specific to a particular
* PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ * If rewrite is true, and bits are set in the sendbufferror registers,
+ * we'll write to the buffer, for error recovery on parity errors.
*/
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
{
u32 piobcnt;
unsigned long sbuf[4];
}
for (i = 0; i < piobcnt; i++)
- if (test_bit(i, sbuf))
+ if (test_bit(i, sbuf)) {
+ if (rewrite)
+ ipath_clrpiobuf(dd, i);
ipath_disarm_piobufs(dd, i, 1);
+ }
dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
}
}
{
u64 ignore_this_time = 0;
- ipath_disarm_senderrbufs(dd);
+ ipath_disarm_senderrbufs(dd, 0);
if ((errs & E_SUM_LINK_PKTERRS) &&
!(dd->ipath_flags & IPATH_LINKACTIVE)) {
/*