From: Ju, Seokmann Date: Thu, 27 Apr 2006 09:33:06 +0000 (-0700) Subject: [SCSI] megaraid_{mm,mbox}: fix a bug in reset handler X-Git-Tag: v2.6.17-rc4~4^2~6 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c005fb4fb2d23ba29ad21dee5042b2f8451ca8ba;p=linux-2.6 [SCSI] megaraid_{mm,mbox}: fix a bug in reset handler When abort failed, the driver gets reset handleer called. In the reset handler, driver calls 'scsi_done()' callback for same SCSI command packet (struct scsi_cmnd) multiple times if there are multiple SCSI command packet in the pend_list. More over, if there are entry in the pend_lsit with IOCTL packet associated, the driver returns it to wrong free_list so that, in turn, the driver could end up with 'NULL pointer dereference..' during I/O command building with incorrect resource. Also, the patch contains several minor/cosmetic changes besides this. Signed-off-by: Seokmann Ju Signed-off-by: Andrew Morton Signed-off-by: James Bottomley --- diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid index 09f6300eda..c173806c91 100644 --- a/Documentation/scsi/ChangeLog.megaraid +++ b/Documentation/scsi/ChangeLog.megaraid @@ -1,3 +1,28 @@ +Release Date : Mon Apr 11 12:27:22 EST 2006 - Seokmann Ju +Current Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module) +Older Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module) + +1. Fixed a bug in megaraid_reset_handler(). + Customer reported "Unable to handle kernel NULL pointer dereference + at virtual address 00000000" when system goes to reset condition + for some reason. It happened randomly. + Root Cause: in the megaraid_reset_handler(), there is possibility not + returning pending packets in the pend_list if there are multiple + pending packets. + Fix: Made the change in the driver so that it will return all packets + in the pend_list. + +2. Added change request. + As found in the following URL, rmb() only didn't help the + problem. I had to increase the loop counter to 0xFFFFFF. (6 F's) + http://marc.theaimsgroup.com/?l=linux-scsi&m=110971060502497&w=2 + + I attached a patch for your reference, too. + Could you check and get this fix in your driver? + + Best Regards, + Jun'ichi Nomura + Release Date : Fri Nov 11 12:27:22 EST 2005 - Seokmann Ju Current Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module) Older Version : 2.20.4.6 (scsi module), 2.20.2.6 (cmm module) diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index c11e5ce686..bec1424eda 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -10,7 +10,7 @@ * 2 of the License, or (at your option) any later version. * * FILE : megaraid_mbox.c - * Version : v2.20.4.7 (Nov 14 2005) + * Version : v2.20.4.8 (Apr 11 2006) * * Authors: * Atul Mukker @@ -2278,6 +2278,7 @@ megaraid_mbox_dpc(unsigned long devp) unsigned long flags; uint8_t c; int status; + uioc_t *kioc; if (!adapter) return; @@ -2320,6 +2321,9 @@ megaraid_mbox_dpc(unsigned long devp) // remove from local clist list_del_init(&scb->list); + kioc = (uioc_t *)scb->gp; + kioc->status = 0; + megaraid_mbox_mm_done(adapter, scb); continue; @@ -2636,6 +2640,7 @@ megaraid_reset_handler(struct scsi_cmnd *scp) int recovery_window; int recovering; int i; + uioc_t *kioc; adapter = SCP2ADAPTER(scp); raid_dev = ADAP2RAIDDEV(adapter); @@ -2655,32 +2660,51 @@ megaraid_reset_handler(struct scsi_cmnd *scp) // Also, reset all the commands currently owned by the driver spin_lock_irqsave(PENDING_LIST_LOCK(adapter), flags); list_for_each_entry_safe(scb, tmp, &adapter->pend_list, list) { - list_del_init(&scb->list); // from pending list - con_log(CL_ANN, (KERN_WARNING - "megaraid: %ld:%d[%d:%d], reset from pending list\n", - scp->serial_number, scb->sno, - scb->dev_channel, scb->dev_target)); + if (scb->sno >= MBOX_MAX_SCSI_CMDS) { + con_log(CL_ANN, (KERN_WARNING + "megaraid: IOCTL packet with %d[%d:%d] being reset\n", + scb->sno, scb->dev_channel, scb->dev_target)); - scp->result = (DID_RESET << 16); - scp->scsi_done(scp); + scb->status = -1; - megaraid_dealloc_scb(adapter, scb); + kioc = (uioc_t *)scb->gp; + kioc->status = -EFAULT; + + megaraid_mbox_mm_done(adapter, scb); + } else { + if (scb->scp == scp) { // Found command + con_log(CL_ANN, (KERN_WARNING + "megaraid: %ld:%d[%d:%d], reset from pending list\n", + scp->serial_number, scb->sno, + scb->dev_channel, scb->dev_target)); + } else { + con_log(CL_ANN, (KERN_WARNING + "megaraid: IO packet with %d[%d:%d] being reset\n", + scb->sno, scb->dev_channel, scb->dev_target)); + } + + scb->scp->result = (DID_RESET << 16); + scb->scp->scsi_done(scb->scp); + + megaraid_dealloc_scb(adapter, scb); + } } spin_unlock_irqrestore(PENDING_LIST_LOCK(adapter), flags); if (adapter->outstanding_cmds) { con_log(CL_ANN, (KERN_NOTICE "megaraid: %d outstanding commands. Max wait %d sec\n", - adapter->outstanding_cmds, MBOX_RESET_WAIT)); + adapter->outstanding_cmds, + (MBOX_RESET_WAIT + MBOX_RESET_EXT_WAIT))); } recovery_window = MBOX_RESET_WAIT + MBOX_RESET_EXT_WAIT; recovering = adapter->outstanding_cmds; - for (i = 0; i < recovery_window && adapter->outstanding_cmds; i++) { + for (i = 0; i < recovery_window; i++) { megaraid_ack_sequence(adapter); @@ -2689,12 +2713,11 @@ megaraid_reset_handler(struct scsi_cmnd *scp) con_log(CL_ANN, ( "megaraid mbox: Wait for %d commands to complete:%d\n", adapter->outstanding_cmds, - MBOX_RESET_WAIT - i)); + (MBOX_RESET_WAIT + MBOX_RESET_EXT_WAIT) - i)); } // bailout if no recovery happended in reset time - if ((i == MBOX_RESET_WAIT) && - (recovering == adapter->outstanding_cmds)) { + if (adapter->outstanding_cmds == 0) { break; } @@ -2918,12 +2941,13 @@ mbox_post_sync_cmd_fast(adapter_t *adapter, uint8_t raw_mbox[]) wmb(); WRINDOOR(raid_dev, raid_dev->mbox_dma | 0x1); - for (i = 0; i < 0xFFFFF; i++) { + for (i = 0; i < MBOX_SYNC_WAIT_CNT; i++) { if (mbox->numstatus != 0xFF) break; rmb(); + udelay(MBOX_SYNC_DELAY_200); } - if (i == 0xFFFFF) { + if (i == MBOX_SYNC_WAIT_CNT) { // We may need to re-calibrate the counter con_log(CL_ANN, (KERN_CRIT "megaraid: fast sync command timed out\n")); @@ -3475,7 +3499,7 @@ megaraid_cmm_register(adapter_t *adapter) adp.drvr_data = (unsigned long)adapter; adp.pdev = adapter->pdev; adp.issue_uioc = megaraid_mbox_mm_handler; - adp.timeout = 300; + adp.timeout = MBOX_RESET_WAIT + MBOX_RESET_EXT_WAIT; adp.max_kioc = MBOX_MAX_USER_CMDS; if ((rval = mraid_mm_register_adp(&adp)) != 0) { @@ -3702,7 +3726,6 @@ megaraid_mbox_mm_done(adapter_t *adapter, scb_t *scb) unsigned long flags; kioc = (uioc_t *)scb->gp; - kioc->status = 0; mbox64 = (mbox64_t *)(unsigned long)kioc->cmdbuf; mbox64->mbox32.status = scb->status; raw_mbox = (uint8_t *)&mbox64->mbox32; diff --git a/drivers/scsi/megaraid/megaraid_mbox.h b/drivers/scsi/megaraid/megaraid_mbox.h index 882fb1a0b5..868fb0ec93 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.h +++ b/drivers/scsi/megaraid/megaraid_mbox.h @@ -21,8 +21,8 @@ #include "megaraid_ioctl.h" -#define MEGARAID_VERSION "2.20.4.7" -#define MEGARAID_EXT_VERSION "(Release Date: Mon Nov 14 12:27:22 EST 2005)" +#define MEGARAID_VERSION "2.20.4.8" +#define MEGARAID_EXT_VERSION "(Release Date: Mon Apr 11 12:27:22 EST 2006)" /* @@ -100,6 +100,9 @@ #define MBOX_BUSY_WAIT 10 // max usec to wait for busy mailbox #define MBOX_RESET_WAIT 180 // wait these many seconds in reset #define MBOX_RESET_EXT_WAIT 120 // extended wait reset +#define MBOX_SYNC_WAIT_CNT 0xFFFF // wait loop index for synchronous mode + +#define MBOX_SYNC_DELAY_200 200 // 200 micro-seconds /* * maximum transfer that can happen through the firmware commands issued