X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fscsi%2Flibata-eh.c;h=70b623988a9f9f0ac8135a2e377e73220569cc0d;hb=47005f255ed126a4b48a1a2f63164fb1d83bcb0a;hp=7244caff13a05fb7ee2be6da7de4356610a53139;hpb=3d71b3b0b634b1a5ba8632fd9ec998e0e4aedfdb;p=linux-2.6 diff --git a/drivers/scsi/libata-eh.c b/drivers/scsi/libata-eh.c index 7244caff13..70b623988a 100644 --- a/drivers/scsi/libata-eh.c +++ b/drivers/scsi/libata-eh.c @@ -39,12 +39,14 @@ #include #include #include +#include "scsi_transport_api.h" #include #include "libata.h" static void __ata_port_freeze(struct ata_port *ap); +static void ata_eh_finish(struct ata_port *ap); static void ata_ering_record(struct ata_ering *ering, int is_io, unsigned int err_mask) @@ -236,12 +238,16 @@ void ata_scsi_error(struct Scsi_Host *host) ap->eh_context.i = ap->eh_info; memset(&ap->eh_info, 0, sizeof(ap->eh_info)); + ap->flags |= ATA_FLAG_EH_IN_PROGRESS; ap->flags &= ~ATA_FLAG_EH_PENDING; spin_unlock_irqrestore(hs_lock, flags); - /* invoke EH */ - ap->ops->error_handler(ap); + /* invoke EH. if unloading, just finish failed qcs */ + if (!(ap->flags & ATA_FLAG_UNLOADING)) + ap->ops->error_handler(ap); + else + ata_eh_finish(ap); /* Exception might have happend after ->error_handler * recovered the port but before this point. Repeat @@ -285,15 +291,60 @@ void ata_scsi_error(struct Scsi_Host *host) /* clean up */ spin_lock_irqsave(hs_lock, flags); - if (ap->flags & ATA_FLAG_RECOVERED) - ata_port_printk(ap, KERN_INFO, "EH complete\n"); - ap->flags &= ~ATA_FLAG_RECOVERED; + if (ap->flags & ATA_FLAG_LOADING) { + ap->flags &= ~ATA_FLAG_LOADING; + } else { + if (ap->flags & ATA_FLAG_SCSI_HOTPLUG) + queue_work(ata_aux_wq, &ap->hotplug_task); + if (ap->flags & ATA_FLAG_RECOVERED) + ata_port_printk(ap, KERN_INFO, "EH complete\n"); + } + + ap->flags &= ~(ATA_FLAG_SCSI_HOTPLUG | ATA_FLAG_RECOVERED); + + /* tell wait_eh that we're done */ + ap->flags &= ~ATA_FLAG_EH_IN_PROGRESS; + wake_up_all(&ap->eh_wait_q); spin_unlock_irqrestore(hs_lock, flags); DPRINTK("EXIT\n"); } +/** + * ata_port_wait_eh - Wait for the currently pending EH to complete + * @ap: Port to wait EH for + * + * Wait until the currently pending EH is complete. + * + * LOCKING: + * Kernel thread context (may sleep). + */ +void ata_port_wait_eh(struct ata_port *ap) +{ + unsigned long flags; + DEFINE_WAIT(wait); + + retry: + spin_lock_irqsave(&ap->host_set->lock, flags); + + while (ap->flags & (ATA_FLAG_EH_PENDING | ATA_FLAG_EH_IN_PROGRESS)) { + prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock_irqrestore(&ap->host_set->lock, flags); + schedule(); + spin_lock_irqsave(&ap->host_set->lock, flags); + } + finish_wait(&ap->eh_wait_q, &wait); + + spin_unlock_irqrestore(&ap->host_set->lock, flags); + + /* make sure SCSI EH is complete */ + if (scsi_host_in_recovery(ap->host)) { + msleep(10); + goto retry; + } +} + /** * ata_qc_timeout - Handle timeout of queued command * @qc: Command that timed out @@ -432,7 +483,7 @@ void ata_port_schedule_eh(struct ata_port *ap) WARN_ON(!ap->ops->error_handler); ap->flags |= ATA_FLAG_EH_PENDING; - ata_schedule_scsi_eh(ap->host); + scsi_schedule_eh(ap->host); DPRINTK("port EH scheduled\n"); } @@ -627,9 +678,63 @@ void ata_eh_qc_retry(struct ata_queued_cmd *qc) __ata_eh_qc_complete(qc); } +/** + * ata_eh_detach_dev - detach ATA device + * @dev: ATA device to detach + * + * Detach @dev. + * + * LOCKING: + * None. + */ +static void ata_eh_detach_dev(struct ata_device *dev) +{ + struct ata_port *ap = dev->ap; + unsigned long flags; + + ata_dev_disable(dev); + + spin_lock_irqsave(&ap->host_set->lock, flags); + + dev->flags &= ~ATA_DFLAG_DETACH; + + if (ata_scsi_offline_dev(dev)) { + dev->flags |= ATA_DFLAG_DETACHED; + ap->flags |= ATA_FLAG_SCSI_HOTPLUG; + } + + spin_unlock_irqrestore(&ap->host_set->lock, flags); +} + +static void ata_eh_clear_action(struct ata_device *dev, + struct ata_eh_info *ehi, unsigned int action) +{ + int i; + + if (!dev) { + ehi->action &= ~action; + for (i = 0; i < ATA_MAX_DEVICES; i++) + ehi->dev_action[i] &= ~action; + } else { + /* doesn't make sense for port-wide EH actions */ + WARN_ON(!(action & ATA_EH_PERDEV_MASK)); + + /* break ehi->action into ehi->dev_action */ + if (ehi->action & action) { + for (i = 0; i < ATA_MAX_DEVICES; i++) + ehi->dev_action[i] |= ehi->action & action; + ehi->action &= ~action; + } + + /* turn off the specified per-dev action */ + ehi->dev_action[dev->devno] &= ~action; + } +} + /** * ata_eh_about_to_do - about to perform eh_action * @ap: target ATA port + * @dev: target ATA dev for per-dev action (can be NULL) * @action: action about to be performed * * Called just before performing EH actions to clear related bits @@ -639,16 +744,35 @@ void ata_eh_qc_retry(struct ata_queued_cmd *qc) * LOCKING: * None. */ -static void ata_eh_about_to_do(struct ata_port *ap, unsigned int action) +static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, + unsigned int action) { unsigned long flags; spin_lock_irqsave(&ap->host_set->lock, flags); - ap->eh_info.action &= ~action; + ata_eh_clear_action(dev, &ap->eh_info, action); ap->flags |= ATA_FLAG_RECOVERED; spin_unlock_irqrestore(&ap->host_set->lock, flags); } +/** + * ata_eh_done - EH action complete + * @ap: target ATA port + * @dev: target ATA dev for per-dev action (can be NULL) + * @action: action just completed + * + * Called right after performing EH actions to clear related bits + * in @ap->eh_context. + * + * LOCKING: + * None. + */ +static void ata_eh_done(struct ata_port *ap, struct ata_device *dev, + unsigned int action) +{ + ata_eh_clear_action(dev, &ap->eh_context.i, action); +} + /** * ata_err_string - convert err_mask to descriptive string * @err_mask: error mask to convert to string @@ -865,10 +989,8 @@ static void ata_eh_analyze_serror(struct ata_port *ap) err_mask |= AC_ERR_SYSTEM; action |= ATA_EH_SOFTRESET; } - if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) { - err_mask |= AC_ERR_ATA_BUS; - action |= ATA_EH_HARDRESET; - } + if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) + ata_ehi_hotplugged(&ehc->i); ehc->i.err_mask |= err_mask; ehc->i.action |= action; @@ -1169,9 +1291,6 @@ static void ata_eh_autopsy(struct ata_port *ap) /* inherit upper level err_mask */ qc->err_mask |= ehc->i.err_mask; - if (qc->err_mask & AC_ERR_TIMEOUT) - action |= ATA_EH_SOFTRESET; - /* analyze TF */ action |= ata_eh_analyze_tf(qc, &qc->result_tf); @@ -1197,13 +1316,25 @@ static void ata_eh_autopsy(struct ata_port *ap) is_io = 1; } - /* speed down iff command was in progress */ - if (failed_dev) + /* enforce default EH actions */ + if (ap->flags & ATA_FLAG_FROZEN || + all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) + action |= ATA_EH_SOFTRESET; + else if (all_err_mask) + action |= ATA_EH_REVALIDATE; + + /* if we have offending qcs and the associated failed device */ + if (failed_dev) { + /* speed down */ action |= ata_eh_speed_down(failed_dev, is_io, all_err_mask); - if (all_err_mask) - action |= ATA_EH_REVALIDATE; + /* perform per-dev EH action only on the offending device */ + ehc->i.dev_action[failed_dev->devno] |= + action & ATA_EH_PERDEV_MASK; + action &= ~ATA_EH_PERDEV_MASK; + } + /* record autopsy result */ ehc->i.dev = failed_dev; ehc->i.action = action; @@ -1277,35 +1408,150 @@ static void ata_eh_report(struct ata_port *ap) } } -static int ata_eh_reset(struct ata_port *ap, ata_reset_fn_t softreset, +static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset, + unsigned int *classes) +{ + int i, rc; + + for (i = 0; i < ATA_MAX_DEVICES; i++) + classes[i] = ATA_DEV_UNKNOWN; + + rc = reset(ap, classes); + if (rc) + return rc; + + /* If any class isn't ATA_DEV_UNKNOWN, consider classification + * is complete and convert all ATA_DEV_UNKNOWN to + * ATA_DEV_NONE. + */ + for (i = 0; i < ATA_MAX_DEVICES; i++) + if (classes[i] != ATA_DEV_UNKNOWN) + break; + + if (i < ATA_MAX_DEVICES) + for (i = 0; i < ATA_MAX_DEVICES; i++) + if (classes[i] == ATA_DEV_UNKNOWN) + classes[i] = ATA_DEV_NONE; + + return 0; +} + +static int ata_eh_followup_srst_needed(int rc, int classify, + const unsigned int *classes) +{ + if (rc == -EAGAIN) + return 1; + if (rc != 0) + return 0; + if (classify && classes[0] == ATA_DEV_UNKNOWN) + return 1; + return 0; +} + +static int ata_eh_reset(struct ata_port *ap, int classify, + ata_prereset_fn_t prereset, ata_reset_fn_t softreset, ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) { struct ata_eh_context *ehc = &ap->eh_context; - unsigned int classes[ATA_MAX_DEVICES]; + unsigned int *classes = ehc->classes; int tries = ATA_EH_RESET_TRIES; + int verbose = !(ap->flags & ATA_FLAG_LOADING); + unsigned int action; ata_reset_fn_t reset; - int rc; + int i, did_followup_srst, rc; + /* Determine which reset to use and record in ehc->i.action. + * prereset() may examine and modify it. + */ + action = ehc->i.action; + ehc->i.action &= ~ATA_EH_RESET_MASK; if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && - !(ehc->i.action & ATA_EH_HARDRESET)))) - reset = softreset; + !(action & ATA_EH_HARDRESET)))) + ehc->i.action |= ATA_EH_SOFTRESET; else + ehc->i.action |= ATA_EH_HARDRESET; + + if (prereset) { + rc = prereset(ap); + if (rc) { + ata_port_printk(ap, KERN_ERR, + "prereset failed (errno=%d)\n", rc); + return rc; + } + } + + /* prereset() might have modified ehc->i.action */ + if (ehc->i.action & ATA_EH_HARDRESET) reset = hardreset; + else if (ehc->i.action & ATA_EH_SOFTRESET) + reset = softreset; + else { + /* prereset told us not to reset, bang classes and return */ + for (i = 0; i < ATA_MAX_DEVICES; i++) + classes[i] = ATA_DEV_NONE; + return 0; + } + + /* did prereset() screw up? if so, fix up to avoid oopsing */ + if (!reset) { + ata_port_printk(ap, KERN_ERR, "BUG: prereset() requested " + "invalid reset type\n"); + if (softreset) + reset = softreset; + else + reset = hardreset; + } retry: - ata_port_printk(ap, KERN_INFO, "%s resetting port\n", - reset == softreset ? "soft" : "hard"); + /* shut up during boot probing */ + if (verbose) + ata_port_printk(ap, KERN_INFO, "%s resetting port\n", + reset == softreset ? "soft" : "hard"); /* reset */ - ata_eh_about_to_do(ap, ATA_EH_RESET_MASK); + ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); ehc->i.flags |= ATA_EHI_DID_RESET; rc = ata_do_reset(ap, reset, classes); + did_followup_srst = 0; + if (reset == hardreset && + ata_eh_followup_srst_needed(rc, classify, classes)) { + /* okay, let's do follow-up softreset */ + did_followup_srst = 1; + reset = softreset; + + if (!reset) { + ata_port_printk(ap, KERN_ERR, + "follow-up softreset required " + "but no softreset avaliable\n"); + return -EINVAL; + } + + ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); + rc = ata_do_reset(ap, reset, classes); + + if (rc == 0 && classify && + classes[0] == ATA_DEV_UNKNOWN) { + ata_port_printk(ap, KERN_ERR, + "classification failed\n"); + return -EINVAL; + } + } + if (rc && --tries) { + const char *type; + + if (reset == softreset) { + if (did_followup_srst) + type = "follow-up soft"; + else + type = "soft"; + } else + type = "hard"; + ata_port_printk(ap, KERN_WARNING, - "%sreset failed, retrying in 5 secs\n", - reset == softreset ? "soft" : "hard"); + "%sreset failed, retrying in 5 secs\n", type); ssleep(5); if (reset == hardreset) @@ -1316,44 +1562,72 @@ static int ata_eh_reset(struct ata_port *ap, ata_reset_fn_t softreset, } if (rc == 0) { + /* After the reset, the device state is PIO 0 and the + * controller state is undefined. Record the mode. + */ + for (i = 0; i < ATA_MAX_DEVICES; i++) + ap->device[i].pio_mode = XFER_PIO_0; + if (postreset) postreset(ap, classes); /* reset successful, schedule revalidation */ - ehc->i.dev = NULL; - ehc->i.action &= ~ATA_EH_RESET_MASK; + ata_eh_done(ap, NULL, ATA_EH_RESET_MASK); ehc->i.action |= ATA_EH_REVALIDATE; } return rc; } -static int ata_eh_revalidate(struct ata_port *ap, - struct ata_device **r_failed_dev) +static int ata_eh_revalidate_and_attach(struct ata_port *ap, + struct ata_device **r_failed_dev) { struct ata_eh_context *ehc = &ap->eh_context; struct ata_device *dev; + unsigned long flags; int i, rc = 0; DPRINTK("ENTER\n"); for (i = 0; i < ATA_MAX_DEVICES; i++) { + unsigned int action; + dev = &ap->device[i]; + action = ehc->i.action | ehc->i.dev_action[dev->devno]; - if (ehc->i.action & ATA_EH_REVALIDATE && ata_dev_enabled(dev) && - (!ehc->i.dev || ehc->i.dev == dev)) { + if (action & ATA_EH_REVALIDATE && ata_dev_enabled(dev)) { if (ata_port_offline(ap)) { rc = -EIO; break; } - ata_eh_about_to_do(ap, ATA_EH_REVALIDATE); + ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE); rc = ata_dev_revalidate(dev, ehc->i.flags & ATA_EHI_DID_RESET); if (rc) break; - ehc->i.action &= ~ATA_EH_REVALIDATE; + ata_eh_done(ap, dev, ATA_EH_REVALIDATE); + + /* schedule the scsi_rescan_device() here */ + queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); + } else if (dev->class == ATA_DEV_UNKNOWN && + ehc->tries[dev->devno] && + ata_class_enabled(ehc->classes[dev->devno])) { + dev->class = ehc->classes[dev->devno]; + + rc = ata_dev_read_id(dev, &dev->class, 1, dev->id); + if (rc == 0) + rc = ata_dev_configure(dev, 1); + + if (rc) { + dev->class = ATA_DEV_UNKNOWN; + break; + } + + spin_lock_irqsave(&ap->host_set->lock, flags); + ap->flags |= ATA_FLAG_SCSI_HOTPLUG; + spin_unlock_irqrestore(&ap->host_set->lock, flags); } } @@ -1374,18 +1648,50 @@ static int ata_port_nr_enabled(struct ata_port *ap) return cnt; } +static int ata_port_nr_vacant(struct ata_port *ap) +{ + int i, cnt = 0; + + for (i = 0; i < ATA_MAX_DEVICES; i++) + if (ap->device[i].class == ATA_DEV_UNKNOWN) + cnt++; + return cnt; +} + +static int ata_eh_skip_recovery(struct ata_port *ap) +{ + struct ata_eh_context *ehc = &ap->eh_context; + int i; + + if (ap->flags & ATA_FLAG_FROZEN || ata_port_nr_enabled(ap)) + return 0; + + /* skip if class codes for all vacant slots are ATA_DEV_NONE */ + for (i = 0; i < ATA_MAX_DEVICES; i++) { + struct ata_device *dev = &ap->device[i]; + + if (dev->class == ATA_DEV_UNKNOWN && + ehc->classes[dev->devno] != ATA_DEV_NONE) + return 0; + } + + return 1; +} + /** * ata_eh_recover - recover host port after error * @ap: host port to recover + * @prereset: prereset method (can be NULL) * @softreset: softreset method (can be NULL) * @hardreset: hardreset method (can be NULL) * @postreset: postreset method (can be NULL) * * This is the alpha and omega, eum and yang, heart and soul of * libata exception handling. On entry, actions required to - * recover each devices are recorded in eh_context. This - * function executes all the operations with appropriate retrials - * and fallbacks to resurrect failed devices. + * recover the port and hotplug requests are recorded in + * eh_context. This function executes all the operations with + * appropriate retrials and fallbacks to resurrect failed + * devices, detach goners and greet newcomers. * * LOCKING: * Kernel thread context (may sleep). @@ -1393,8 +1699,8 @@ static int ata_port_nr_enabled(struct ata_port *ap) * RETURNS: * 0 on success, -errno on failure. */ -static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset, - ata_reset_fn_t hardreset, +static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, + ata_reset_fn_t softreset, ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) { struct ata_eh_context *ehc = &ap->eh_context; @@ -1408,21 +1714,42 @@ static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset, dev = &ap->device[i]; ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; + + /* process hotplug request */ + if (dev->flags & ATA_DFLAG_DETACH) + ata_eh_detach_dev(dev); + + if (!ata_dev_enabled(dev) && + ((ehc->i.probe_mask & (1 << dev->devno)) && + !(ehc->did_probe_mask & (1 << dev->devno)))) { + ata_eh_detach_dev(dev); + ata_dev_init(dev); + ehc->did_probe_mask |= (1 << dev->devno); + ehc->i.action |= ATA_EH_SOFTRESET; + } } retry: down_xfermask = 0; rc = 0; + /* if UNLOADING, finish immediately */ + if (ap->flags & ATA_FLAG_UNLOADING) + goto out; + /* skip EH if possible. */ - if (!ata_port_nr_enabled(ap) && !(ap->flags & ATA_FLAG_FROZEN)) + if (ata_eh_skip_recovery(ap)) ehc->i.action = 0; + for (i = 0; i < ATA_MAX_DEVICES; i++) + ehc->classes[i] = ATA_DEV_UNKNOWN; + /* reset */ if (ehc->i.action & ATA_EH_RESET_MASK) { ata_eh_freeze_port(ap); - rc = ata_eh_reset(ap, softreset, hardreset, postreset); + rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset, + softreset, hardreset, postreset); if (rc) { ata_port_printk(ap, KERN_ERR, "reset failed, giving up\n"); @@ -1432,8 +1759,8 @@ static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset, ata_eh_thaw_port(ap); } - /* revalidate existing devices */ - rc = ata_eh_revalidate(ap, &dev); + /* revalidate existing devices and attach new ones */ + rc = ata_eh_revalidate_and_attach(ap, &dev); if (rc) goto dev_fail; @@ -1451,6 +1778,8 @@ static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset, dev_fail: switch (rc) { case -ENODEV: + /* device missing, schedule probing */ + ehc->i.probe_mask |= (1 << dev->devno); case -EINVAL: ehc->tries[dev->devno] = 0; break; @@ -1463,15 +1792,31 @@ static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset, ehc->tries[dev->devno] = 0; } - /* disable device if it has used up all its chances */ - if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) + if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { + /* disable device if it has used up all its chances */ ata_dev_disable(dev); - /* soft didn't work? be haaaaard */ - if (ehc->i.flags & ATA_EHI_DID_RESET) - ehc->i.action |= ATA_EH_HARDRESET; - else - ehc->i.action |= ATA_EH_SOFTRESET; + /* detach if offline */ + if (ata_port_offline(ap)) + ata_eh_detach_dev(dev); + + /* probe if requested */ + if ((ehc->i.probe_mask & (1 << dev->devno)) && + !(ehc->did_probe_mask & (1 << dev->devno))) { + ata_eh_detach_dev(dev); + ata_dev_init(dev); + + ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; + ehc->did_probe_mask |= (1 << dev->devno); + ehc->i.action |= ATA_EH_SOFTRESET; + } + } else { + /* soft didn't work? be haaaaard */ + if (ehc->i.flags & ATA_EHI_DID_RESET) + ehc->i.action |= ATA_EH_HARDRESET; + else + ehc->i.action |= ATA_EH_SOFTRESET; + } if (ata_port_nr_enabled(ap)) { ata_port_printk(ap, KERN_WARNING, "failed to recover some " @@ -1539,6 +1884,7 @@ static void ata_eh_finish(struct ata_port *ap) /** * ata_do_eh - do standard error handling * @ap: host port to handle error for + * @prereset: prereset method (can be NULL) * @softreset: softreset method (can be NULL) * @hardreset: hardreset method (can be NULL) * @postreset: postreset method (can be NULL) @@ -1548,11 +1894,15 @@ static void ata_eh_finish(struct ata_port *ap) * LOCKING: * Kernel thread context (may sleep). */ -void ata_do_eh(struct ata_port *ap, ata_reset_fn_t softreset, - ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) +void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, + ata_reset_fn_t softreset, ata_reset_fn_t hardreset, + ata_postreset_fn_t postreset) { - ata_eh_autopsy(ap); - ata_eh_report(ap); - ata_eh_recover(ap, softreset, hardreset, postreset); + if (!(ap->flags & ATA_FLAG_LOADING)) { + ata_eh_autopsy(ap); + ata_eh_report(ap); + } + + ata_eh_recover(ap, prereset, softreset, hardreset, postreset); ata_eh_finish(ap); }