From: Andi Kleen Date: Mon, 12 Sep 2005 16:49:24 +0000 (+0200) Subject: [PATCH] x86-64: Make lockless machine check record passing a bit more robust. X-Git-Tag: v2.6.14-rc1~68 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=673242c10d535bfe238d9d8e82ac93432d35b88e;p=linux-2.6 [PATCH] x86-64: Make lockless machine check record passing a bit more robust. One machine is constantly throwing NMI watchdog timeouts in mce_log This was one attempt to fix it. (AK: this doesn't actually fix the bug I'm seeing unfortunately, probably drop. I don't like it that the reader can spin forever now waiting for a writer) Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 8aa56736cd..87ea8fdd43 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -56,15 +56,19 @@ void mce_log(struct mce *mce) smp_wmb(); for (;;) { entry = rcu_dereference(mcelog.next); - /* When the buffer fills up discard new entries. Assume - that the earlier errors are the more interesting. */ - if (entry >= MCE_LOG_LEN) { - set_bit(MCE_OVERFLOW, &mcelog.flags); - return; + for (;;) { + /* When the buffer fills up discard new entries. Assume + that the earlier errors are the more interesting. */ + if (entry >= MCE_LOG_LEN) { + set_bit(MCE_OVERFLOW, &mcelog.flags); + return; + } + /* Old left over entry. Skip. */ + if (mcelog.entry[entry].finished) { + entry++; + continue; + } } - /* Old left over entry. Skip. */ - if (mcelog.entry[entry].finished) - continue; smp_rmb(); next = entry + 1; if (cmpxchg(&mcelog.next, entry, next) == entry) @@ -404,9 +408,15 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff } err = 0; - for (i = 0; i < next; i++) { - if (!mcelog.entry[i].finished) - continue; + for (i = 0; i < next; i++) { + unsigned long start = jiffies; + while (!mcelog.entry[i].finished) { + if (!time_before(jiffies, start + 2)) { + memset(mcelog.entry + i,0, sizeof(struct mce)); + continue; + } + cpu_relax(); + } smp_rmb(); err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); buf += sizeof(struct mce);