From: Hannes Reinecke Date: Wed, 8 Mar 2006 11:58:16 +0000 (+0100) Subject: [SCSI] aic79xx: Update error recovery X-Git-Tag: v2.6.17-rc1~1129^2~4^2~63 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6902f41610d631f74cfca7c61eac7b0950dd8990;p=linux-2.6 [SCSI] aic79xx: Update error recovery This patch updates the error recovery. Routines for TARGET RESET and ABORT COMMAND are split up as the logic is quite dissimilar. Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley --- diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c index cb5f7af606..00d48a0fb7 100644 --- a/drivers/scsi/aic7xxx/aic79xx_osm.c +++ b/drivers/scsi/aic7xxx/aic79xx_osm.c @@ -373,7 +373,7 @@ static void ahd_linux_handle_scsi_status(struct ahd_softc *, struct scb *); static void ahd_linux_queue_cmd_complete(struct ahd_softc *ahd, struct scsi_cmnd *cmd); -static int ahd_linux_queue_recovery_cmd(struct scsi_cmnd *cmd, scb_flag flag); +static int ahd_linux_queue_abort_cmd(struct scsi_cmnd *cmd); static void ahd_linux_initialize_scsi_bus(struct ahd_softc *ahd); static u_int ahd_linux_user_tagdepth(struct ahd_softc *ahd, struct ahd_devinfo *devinfo); @@ -648,10 +648,9 @@ static int ahd_linux_abort(struct scsi_cmnd *cmd) { int error; + + error = ahd_linux_queue_abort_cmd(cmd); - error = ahd_linux_queue_recovery_cmd(cmd, SCB_ABORT); - if (error != 0) - printf("aic79xx_abort returns 0x%x\n", error); return error; } @@ -661,12 +660,97 @@ ahd_linux_abort(struct scsi_cmnd *cmd) static int ahd_linux_dev_reset(struct scsi_cmnd *cmd) { - int error; + struct ahd_softc *ahd; + struct ahd_linux_device *dev; + struct scb *reset_scb; + u_int cdb_byte; + int retval = SUCCESS; + int paused; + int wait; + struct ahd_initiator_tinfo *tinfo; + struct ahd_tmode_tstate *tstate; + unsigned long flags; + DECLARE_COMPLETION(done); - error = ahd_linux_queue_recovery_cmd(cmd, SCB_DEVICE_RESET); - if (error != 0) - printf("aic79xx_dev_reset returns 0x%x\n", error); - return error; + reset_scb = NULL; + paused = FALSE; + wait = FALSE; + ahd = *(struct ahd_softc **)cmd->device->host->hostdata; + + scmd_printk(KERN_INFO, cmd, + "Attempting to queue a TARGET RESET message:"); + + printf("CDB:"); + for (cdb_byte = 0; cdb_byte < cmd->cmd_len; cdb_byte++) + printf(" 0x%x", cmd->cmnd[cdb_byte]); + printf("\n"); + + /* + * Determine if we currently own this command. + */ + dev = scsi_transport_device_data(cmd->device); + + if (dev == NULL) { + /* + * No target device for this command exists, + * so we must not still own the command. + */ + scmd_printk(KERN_INFO, cmd, "Is not an active device\n"); + return SUCCESS; + } + + /* + * Generate us a new SCB + */ + reset_scb = ahd_get_scb(ahd, AHD_NEVER_COL_IDX); + if (!reset_scb) { + scmd_printk(KERN_INFO, cmd, "No SCB available\n"); + return FAILED; + } + + tinfo = ahd_fetch_transinfo(ahd, 'A', ahd->our_id, + cmd->device->id, &tstate); + reset_scb->io_ctx = cmd; + reset_scb->platform_data->dev = dev; + reset_scb->sg_count = 0; + ahd_set_residual(reset_scb, 0); + ahd_set_sense_residual(reset_scb, 0); + reset_scb->platform_data->xfer_len = 0; + reset_scb->hscb->control = 0; + reset_scb->hscb->scsiid = BUILD_SCSIID(ahd,cmd); + reset_scb->hscb->lun = cmd->device->lun; + reset_scb->hscb->cdb_len = 0; + reset_scb->hscb->task_management = SIU_TASKMGMT_LUN_RESET; + reset_scb->flags |= SCB_DEVICE_RESET|SCB_RECOVERY_SCB|SCB_ACTIVE; + if ((tinfo->curr.ppr_options & MSG_EXT_PPR_IU_REQ) != 0) { + reset_scb->flags |= SCB_PACKETIZED; + } else { + reset_scb->hscb->control |= MK_MESSAGE; + } + dev->openings--; + dev->active++; + dev->commands_issued++; + + ahd_lock(ahd, &flags); + + LIST_INSERT_HEAD(&ahd->pending_scbs, reset_scb, pending_links); + ahd_queue_scb(ahd, reset_scb); + + ahd->platform_data->eh_done = &done; + ahd_unlock(ahd, &flags); + + printf("%s: Device reset code sleeping\n", ahd_name(ahd)); + if (!wait_for_completion_timeout(&done, 5 * HZ)) { + ahd_lock(ahd, &flags); + ahd->platform_data->eh_done = NULL; + ahd_unlock(ahd, &flags); + printf("%s: Device reset timer expired (active %d)\n", + ahd_name(ahd), dev->active); + retval = FAILED; + } + printf("%s: Device reset returning 0x%x\n", ahd_name(ahd), retval); + + return (retval); } /* @@ -1891,72 +1975,108 @@ ahd_linux_handle_scsi_status(struct ahd_softc *ahd, static void ahd_linux_queue_cmd_complete(struct ahd_softc *ahd, struct scsi_cmnd *cmd) { + int status; + int new_status = DID_OK; + int do_fallback = 0; + int scsi_status; + /* * Map CAM error codes into Linux Error codes. We * avoid the conversion so that the DV code has the * full error information available when making * state change decisions. */ - { - uint32_t status; - u_int new_status; - - status = ahd_cmd_get_transaction_status(cmd); - switch (status) { - case CAM_REQ_INPROG: - case CAM_REQ_CMP: - case CAM_SCSI_STATUS_ERROR: - new_status = DID_OK; - break; - case CAM_REQ_ABORTED: - new_status = DID_ABORT; - break; - case CAM_BUSY: - new_status = DID_BUS_BUSY; - break; - case CAM_REQ_INVALID: - case CAM_PATH_INVALID: - new_status = DID_BAD_TARGET; - break; - case CAM_SEL_TIMEOUT: - new_status = DID_NO_CONNECT; - break; - case CAM_SCSI_BUS_RESET: - case CAM_BDR_SENT: - new_status = DID_RESET; - break; - case CAM_UNCOR_PARITY: - new_status = DID_PARITY; - break; - case CAM_CMD_TIMEOUT: - new_status = DID_TIME_OUT; - break; - case CAM_UA_ABORT: - case CAM_REQ_CMP_ERR: - case CAM_AUTOSENSE_FAIL: - case CAM_NO_HBA: - case CAM_DATA_RUN_ERR: - case CAM_UNEXP_BUSFREE: - case CAM_SEQUENCE_FAIL: - case CAM_CCB_LEN_ERR: - case CAM_PROVIDE_FAIL: - case CAM_REQ_TERMIO: - case CAM_UNREC_HBA_ERROR: - case CAM_REQ_TOO_BIG: - new_status = DID_ERROR; - break; - case CAM_REQUEUE_REQ: - new_status = DID_REQUEUE; + + status = ahd_cmd_get_transaction_status(cmd); + switch (status) { + case CAM_REQ_INPROG: + case CAM_REQ_CMP: + new_status = DID_OK; + break; + case CAM_AUTOSENSE_FAIL: + new_status = DID_ERROR; + /* Fallthrough */ + case CAM_SCSI_STATUS_ERROR: + scsi_status = ahd_cmd_get_scsi_status(cmd); + + switch(scsi_status) { + case SCSI_STATUS_CMD_TERMINATED: + case SCSI_STATUS_CHECK_COND: + if ((cmd->result >> 24) != DRIVER_SENSE) { + do_fallback = 1; + } else { + struct scsi_sense_data *sense; + + sense = (struct scsi_sense_data *) + &cmd->sense_buffer; + if (sense->extra_len >= 5 && + (sense->add_sense_code == 0x47 + || sense->add_sense_code == 0x48)) + do_fallback = 1; + } break; default: - /* We should never get here */ - new_status = DID_ERROR; break; } + break; + case CAM_REQ_ABORTED: + new_status = DID_ABORT; + break; + case CAM_BUSY: + new_status = DID_BUS_BUSY; + break; + case CAM_REQ_INVALID: + case CAM_PATH_INVALID: + new_status = DID_BAD_TARGET; + break; + case CAM_SEL_TIMEOUT: + new_status = DID_NO_CONNECT; + break; + case CAM_SCSI_BUS_RESET: + case CAM_BDR_SENT: + new_status = DID_RESET; + break; + case CAM_UNCOR_PARITY: + new_status = DID_PARITY; + do_fallback = 1; + break; + case CAM_CMD_TIMEOUT: + new_status = DID_TIME_OUT; + do_fallback = 1; + break; + case CAM_REQ_CMP_ERR: + case CAM_UNEXP_BUSFREE: + case CAM_DATA_RUN_ERR: + new_status = DID_ERROR; + do_fallback = 1; + break; + case CAM_UA_ABORT: + case CAM_NO_HBA: + case CAM_SEQUENCE_FAIL: + case CAM_CCB_LEN_ERR: + case CAM_PROVIDE_FAIL: + case CAM_REQ_TERMIO: + case CAM_UNREC_HBA_ERROR: + case CAM_REQ_TOO_BIG: + new_status = DID_ERROR; + break; + case CAM_REQUEUE_REQ: + new_status = DID_REQUEUE; + break; + default: + /* We should never get here */ + new_status = DID_ERROR; + break; + } - ahd_cmd_set_transaction_status(cmd, new_status); + if (do_fallback) { + printf("%s: device overrun (status %x) on %d:%d:%d\n", + ahd_name(ahd), status, cmd->device->channel, + cmd->device->id, cmd->device->lun); } + ahd_cmd_set_transaction_status(cmd, new_status); + cmd->scsi_done(cmd); } @@ -1973,7 +2093,7 @@ ahd_release_simq(struct ahd_softc *ahd) } static int -ahd_linux_queue_recovery_cmd(struct scsi_cmnd *cmd, scb_flag flag) +ahd_linux_queue_abort_cmd(struct scsi_cmnd *cmd) { struct ahd_softc *ahd; struct ahd_linux_device *dev; @@ -1988,7 +2108,6 @@ ahd_linux_queue_recovery_cmd(struct scsi_cmnd *cmd, scb_flag flag) int paused; int wait; int disconnected; - int found; ahd_mode_state saved_modes; unsigned long flags; @@ -1998,8 +2117,7 @@ ahd_linux_queue_recovery_cmd(struct scsi_cmnd *cmd, scb_flag flag) ahd = *(struct ahd_softc **)cmd->device->host->hostdata; scmd_printk(KERN_INFO, cmd, - "Attempting to queue a%s message:", - flag == SCB_ABORT ? "n ABORT" : " TARGET RESET"); + "Attempting to queue an ABORT message:"); printf("CDB:"); for (cdb_byte = 0; cdb_byte < cmd->cmd_len; cdb_byte++) @@ -2035,19 +2153,6 @@ ahd_linux_queue_recovery_cmd(struct scsi_cmnd *cmd, scb_flag flag) break; } - if (pending_scb == NULL && flag == SCB_DEVICE_RESET) { - - /* Any SCB for this device will do for a target reset */ - LIST_FOREACH(pending_scb, &ahd->pending_scbs, pending_links) { - if (ahd_match_scb(ahd, pending_scb, - scmd_id(cmd), - scmd_channel(cmd) + 'A', - CAM_LUN_WILDCARD, - SCB_LIST_NULL, ROLE_INITIATOR)) - break; - } - } - if (pending_scb == NULL) { scmd_printk(KERN_INFO, cmd, "Command not found\n"); goto no_cmd; @@ -2081,25 +2186,17 @@ ahd_linux_queue_recovery_cmd(struct scsi_cmnd *cmd, scb_flag flag) ahd_dump_card_state(ahd); disconnected = TRUE; - if (flag == SCB_ABORT) { - if (ahd_search_qinfifo(ahd, cmd->device->id, - cmd->device->channel + 'A', - cmd->device->lun, - pending_scb->hscb->tag, - ROLE_INITIATOR, CAM_REQ_ABORTED, - SEARCH_COMPLETE) > 0) { - printf("%s:%d:%d:%d: Cmd aborted from QINFIFO\n", - ahd_name(ahd), cmd->device->channel, - cmd->device->id, cmd->device->lun); - retval = SUCCESS; - goto done; - } - } else if (ahd_search_qinfifo(ahd, cmd->device->id, - cmd->device->channel + 'A', - cmd->device->lun, pending_scb->hscb->tag, - ROLE_INITIATOR, /*status*/0, - SEARCH_COUNT) > 0) { - disconnected = FALSE; + if (ahd_search_qinfifo(ahd, cmd->device->id, + cmd->device->channel + 'A', + cmd->device->lun, + pending_scb->hscb->tag, + ROLE_INITIATOR, CAM_REQ_ABORTED, + SEARCH_COMPLETE) > 0) { + printf("%s:%d:%d:%d: Cmd aborted from QINFIFO\n", + ahd_name(ahd), cmd->device->channel, + cmd->device->id, cmd->device->lun); + retval = SUCCESS; + goto done; } saved_modes = ahd_save_modes(ahd); @@ -2107,17 +2204,12 @@ ahd_linux_queue_recovery_cmd(struct scsi_cmnd *cmd, scb_flag flag) last_phase = ahd_inb(ahd, LASTPHASE); saved_scbptr = ahd_get_scbptr(ahd); active_scbptr = saved_scbptr; - if (disconnected && ((last_phase != P_BUSFREE) || - (ahd_inb(ahd, SEQ_FLAGS) & NOT_IDENTIFIED) == 0)) { + if (disconnected && (ahd_inb(ahd, SEQ_FLAGS) & NOT_IDENTIFIED) == 0) { struct scb *bus_scb; bus_scb = ahd_lookup_scb(ahd, active_scbptr); if (bus_scb == pending_scb) disconnected = FALSE; - else if (flag != SCB_ABORT - && ahd_inb(ahd, SAVED_SCSIID) == pending_scb->hscb->scsiid - && ahd_inb(ahd, SAVED_LUN) == SCB_GET_LUN(pending_scb)) - disconnected = FALSE; } /* @@ -2126,41 +2218,26 @@ ahd_linux_queue_recovery_cmd(struct scsi_cmnd *cmd, scb_flag flag) * bus or is in the disconnected state. */ saved_scsiid = ahd_inb(ahd, SAVED_SCSIID); - if (SCB_GET_TAG(pending_scb) == active_scbptr - || (flag == SCB_DEVICE_RESET - && SCSIID_TARGET(ahd, saved_scsiid) == scmd_id(cmd))) { + if (last_phase != P_BUSFREE + && SCB_GET_TAG(pending_scb) == active_scbptr) { /* * We're active on the bus, so assert ATN * and hope that the target responds. */ pending_scb = ahd_lookup_scb(ahd, active_scbptr); - pending_scb->flags |= SCB_RECOVERY_SCB|SCB_DEVICE_RESET; + pending_scb->flags |= SCB_RECOVERY_SCB|SCB_ABORT; ahd_outb(ahd, MSG_OUT, HOST_MSG); ahd_outb(ahd, SCSISIGO, last_phase|ATNO); - scmd_printk(KERN_INFO, cmd, "BDR message in message buffer\n"); + scmd_printk(KERN_INFO, cmd, "Device is active, asserting ATN\n"); wait = TRUE; - } else if (last_phase != P_BUSFREE - && ahd_inb(ahd, SCSIPHASE) == 0) { - /* - * SCB is not identified, there - * is no pending REQ, and the sequencer - * has not seen a busfree. Looks like - * a stuck connection waiting to - * go busfree. Reset the bus. - */ - found = ahd_reset_channel(ahd, cmd->device->channel + 'A', - /*Initiate Reset*/TRUE); - printf("%s: Issued Channel %c Bus Reset. " - "%d SCBs aborted\n", ahd_name(ahd), - cmd->device->channel + 'A', found); } else if (disconnected) { /* * Actually re-queue this SCB in an attempt * to select the device before it reconnects. */ - pending_scb->flags |= SCB_RECOVERY_SCB|flag; + pending_scb->flags |= SCB_RECOVERY_SCB|SCB_ABORT; ahd_set_scbptr(ahd, SCB_GET_TAG(pending_scb)); pending_scb->hscb->cdb_len = 0; pending_scb->hscb->task_attribute = 0;