Mercurial > illumos > illumos-gate
changeset 11692:8795ed2df6db
6898573 Watch dog support for PMCS
6922946 potential null dereference in pmcs_smp_release()
6922947 potential null dereference in pmcs_remove_phy_from_iport()
6923443 Inserted disk not shown in BUI but is listed as part of pool from 'zpool status'
author | Jesse Butler <Jesse.Butler@Sun.COM> |
---|---|
date | Thu, 18 Feb 2010 12:52:39 -0700 |
parents | 60b9aa653af2 |
children | 0a223da9570a |
files | usr/src/cmd/mdb/common/modules/pmcs/pmcs.c usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_attach.c usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_nvram.c usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs.h usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_param.h usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_proto.h |
diffstat | 8 files changed, 357 insertions(+), 113 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/cmd/mdb/common/modules/pmcs/pmcs.c Thu Feb 18 12:37:30 2010 -0700 +++ b/usr/src/cmd/mdb/common/modules/pmcs/pmcs.c Thu Feb 18 12:52:39 2010 -0700 @@ -2363,6 +2363,9 @@ case STATE_DEAD: state_str = "Dead"; break; + case STATE_IN_RESET: + state_str = "In Reset"; + break; } mdb_printf("%16p %9s %4d %1d %1d 0x%08x 0x%04x 0x%04x %16p\n", addr, @@ -2537,6 +2540,9 @@ case STATE_DEAD: state_str = "Dead"; break; + case STATE_IN_RESET: + state_str = "In Reset"; + break; } mdb_printf("%16p %9s %4d %1d %1d 0x%08x 0x%04x 0x%04x %16p\n", addr,
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_attach.c Thu Feb 18 12:37:30 2010 -0700 +++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_attach.c Thu Feb 18 12:52:39 2010 -0700 @@ -895,6 +895,7 @@ if (pmcs_soft_reset(pwp, B_FALSE)) { goto failure; } + pwp->last_reset_reason = PMCS_LAST_RST_ATTACH; } /* @@ -1375,6 +1376,7 @@ * Reset chip */ (void) pmcs_soft_reset(pwp, B_FALSE); + pwp->last_reset_reason = PMCS_LAST_RST_DETACH; } /* @@ -1611,6 +1613,7 @@ /* Stop MPI & Reset chip (no need to re-initialize) */ (void) pmcs_stop_mpi(pwp); (void) pmcs_soft_reset(pwp, B_TRUE); + pwp->last_reset_reason = PMCS_LAST_RST_QUIESCE; return (DDI_SUCCESS); } @@ -1836,6 +1839,74 @@ return (0); } +static void +pmcs_check_forward_progress(pmcs_hw_t *pwp) +{ + uint32_t cur_iqci; + uint32_t cur_msgu_tick; + uint32_t cur_iop_tick; + int i; + + mutex_enter(&pwp->lock); + + if (pwp->state == STATE_IN_RESET) { + mutex_exit(&pwp->lock); + return; + } + + /* Ensure that inbound work is getting picked up */ + for (i = 0; i < PMCS_NIQ; i++) { + cur_iqci = pmcs_rd_iqci(pwp, i); + if (cur_iqci == pwp->shadow_iqpi[i]) { + pwp->last_iqci[i] = cur_iqci; + continue; + } + if (cur_iqci == pwp->last_iqci[i]) { + pmcs_prt(pwp, PMCS_PRT_WARN, NULL, NULL, + "Inbound Queue stall detected, issuing reset"); + goto hot_reset; + } + pwp->last_iqci[i] = cur_iqci; + } + + /* Check heartbeat on both the MSGU and IOP */ + cur_msgu_tick = pmcs_rd_gst_tbl(pwp, PMCS_GST_MSGU_TICK); + if (cur_msgu_tick == pwp->last_msgu_tick) { + pmcs_prt(pwp, PMCS_PRT_WARN, NULL, NULL, + "Stall detected on MSGU, issuing reset"); + goto hot_reset; + } + pwp->last_msgu_tick = cur_msgu_tick; + + cur_iop_tick = pmcs_rd_gst_tbl(pwp, PMCS_GST_IOP_TICK); + if (cur_iop_tick == pwp->last_iop_tick) { + pmcs_prt(pwp, PMCS_PRT_WARN, NULL, NULL, + "Stall detected on IOP, issuing reset"); + goto hot_reset; + } + pwp->last_iop_tick = cur_iop_tick; + + mutex_exit(&pwp->lock); + return; + +hot_reset: + pwp->state = STATE_DEAD; + /* + * We've detected a stall. Attempt to recover service via hot + * reset. In case of failure, pmcs_hot_reset() will handle the + * failure and issue any required FM notifications. + * See pmcs_subr.c for more details. + */ + if (pmcs_hot_reset(pwp)) { + pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, + "%s: hot reset failure", __func__); + } else { + pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, + "%s: hot reset complete", __func__); + pwp->last_reset_reason = PMCS_LAST_RST_STALL; + } + mutex_exit(&pwp->lock); +} static void pmcs_check_commands(pmcs_hw_t *pwp) @@ -2018,6 +2089,14 @@ pwp->config_changed); /* + * Check forward progress on the chip + */ + if (++pwp->watchdog_count == PMCS_FWD_PROG_TRIGGER) { + pwp->watchdog_count = 0; + pmcs_check_forward_progress(pwp); + } + + /* * Check to see if we need to kick discovery off again */ mutex_enter(&pwp->config_lock); @@ -2032,7 +2111,6 @@ mutex_exit(&pwp->config_lock); mutex_enter(&pwp->lock); - if (pwp->state != STATE_RUNNING) { mutex_exit(&pwp->lock); return; @@ -2047,7 +2125,9 @@ } pwp->wdhandle = timeout(pmcs_watchdog, pwp, drv_usectohz(PMCS_WATCH_INTERVAL)); + mutex_exit(&pwp->lock); + pmcs_check_commands(pwp); pmcs_handle_dead_phys(pwp); } @@ -2570,18 +2650,24 @@ pmcs_fatal_handler(pmcs_hw_t *pwp) { pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, "Fatal Interrupt caught"); + mutex_enter(&pwp->lock); pwp->state = STATE_DEAD; - pmcs_register_dump_int(pwp); - pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff); - pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff); + + /* + * Attempt a hot reset. In case of failure, pmcs_hot_reset() will + * handle the failure and issue any required FM notifications. + * See pmcs_subr.c for more details. + */ + if (pmcs_hot_reset(pwp)) { + pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, + "%s: hot reset failure", __func__); + } else { + pmcs_prt(pwp, PMCS_PRT_ERR, NULL, NULL, + "%s: hot reset complete", __func__); + pwp->last_reset_reason = PMCS_LAST_RST_FATAL_ERROR; + } mutex_exit(&pwp->lock); - pmcs_fm_ereport(pwp, DDI_FM_DEVICE_NO_RESPONSE); - ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST); - -#ifdef DEBUG - cmn_err(CE_PANIC, "PMCS Fatal Firmware Error"); -#endif } /*
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_nvram.c Thu Feb 18 12:37:30 2010 -0700 +++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_nvram.c Thu Feb 18 12:52:39 2010 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END * * - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -215,6 +215,7 @@ } else { pmcs_prt(pwp, PMCS_PRT_WARN, NULL, NULL, "%s: Firmware successfully upgraded.", __func__); + pwp->last_reset_reason = PMCS_LAST_RST_FW_UPGRADE; } return (0); }
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c Thu Feb 18 12:37:30 2010 -0700 +++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c Thu Feb 18 12:52:39 2010 -0700 @@ -225,7 +225,7 @@ /* * See if there's already a target softstate. If not, allocate one. */ - tgt = pmcs_get_target(iport, tgt_port); + tgt = pmcs_get_target(iport, tgt_port, B_TRUE); if (tgt == NULL) { goto tgt_init_fail; @@ -435,7 +435,6 @@ pwp = ITRAN2PMC(tran); mutex_enter(&pwp->lock); mutex_enter(&target->statlock); - ASSERT(target->phy); phyp = target->phy; pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, phyp, target, @@ -1239,7 +1238,7 @@ mutex_enter(&pwp->lock); /* Retrieve softstate using unit-address */ - tgt = pmcs_get_target(iport, tgt_port); + tgt = pmcs_get_target(iport, tgt_port, B_TRUE); if (tgt == NULL) { pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, "%s: tgt softstate not found", __func__); @@ -1410,8 +1409,10 @@ (void *)tgt, tgt->target_num); pwp->targets[tgt->target_num] = NULL; tgt->target_num = PMCS_INVALID_TARGET_NUM; - tgt->phy->target = NULL; - tgt->phy = NULL; + if (tgt->phy) { + tgt->phy->target = NULL; + tgt->phy = NULL; + } pmcs_destroy_target(tgt); } else { mutex_exit(&tgt->statlock); @@ -1442,7 +1443,7 @@ } pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, "%s called", __func__); - pwp->blocked = 1; + pwp->quiesced = pwp->blocked = 1; while (totactive) { totactive = 0; for (target = 0; target < pwp->max_dev; target++) { @@ -1502,7 +1503,7 @@ return (-1); } pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, "%s called", __func__); - pwp->blocked = 0; + pwp->blocked = pwp->quiesced = 0; mutex_exit(&pwp->lock); /* @@ -2161,7 +2162,6 @@ } out: - pmcs_pwork(pwp, pwrk); pmcs_dma_unload(pwp, sp); /* @@ -2175,6 +2175,17 @@ mutex_enter(&xp->statlock); /* + * If the device no longer has a PHY pointer, clear the PHY pointer + * from the work structure before we free it. Otherwise, pmcs_pwork + * may decrement the ref_count on a PHY that's been freed. + */ + if (xp->phy == NULL) { + pwrk->phy = NULL; + } + + pmcs_pwork(pwp, pwrk); + + /* * If the device is gone, we only put this command on the completion * queue if the work structure is not marked dead. If it's marked * dead, it will already have been put there. @@ -2185,7 +2196,7 @@ mutex_enter(&xp->aqlock); STAILQ_REMOVE(&xp->aq, sp, pmcs_cmd, cmd_next); mutex_exit(&xp->aqlock); - pmcs_prt(pwp, PMCS_PRT_DEBUG1, pptr, xp, + pmcs_prt(pwp, PMCS_PRT_DEBUG3, pptr, xp, "%s: Removing cmd 0x%p (htag 0x%x) from aq", __func__, (void *)sp, sp->cmd_tag); mutex_enter(&pwp->cq_lock); @@ -2220,7 +2231,7 @@ #else mutex_enter(&xp->aqlock); #endif - pmcs_prt(pwp, PMCS_PRT_DEBUG1, pptr, xp, + pmcs_prt(pwp, PMCS_PRT_DEBUG3, pptr, xp, "%s: Removing cmd 0x%p (htag 0x%x) from aq", __func__, (void *)sp, sp->cmd_tag); STAILQ_REMOVE(&xp->aq, sp, pmcs_cmd, cmd_next); @@ -2633,7 +2644,6 @@ } out: - pmcs_pwork(pwp, pwrk); pmcs_dma_unload(pwp, sp); /* @@ -2647,13 +2657,24 @@ mutex_enter(&xp->statlock); xp->tagmap &= ~(1 << sp->cmd_satltag); + /* + * If the device no longer has a PHY pointer, clear the PHY pointer + * from the work structure before we free it. Otherwise, pmcs_pwork + * may decrement the ref_count on a PHY that's been freed. + */ + if (xp->phy == NULL) { + pwrk->phy = NULL; + } + + pmcs_pwork(pwp, pwrk); + if (xp->dev_gone) { mutex_exit(&xp->statlock); if (!dead) { mutex_enter(&xp->aqlock); STAILQ_REMOVE(&xp->aq, sp, pmcs_cmd, cmd_next); mutex_exit(&xp->aqlock); - pmcs_prt(pwp, PMCS_PRT_DEBUG1, pptr, xp, + pmcs_prt(pwp, PMCS_PRT_DEBUG3, pptr, xp, "%s: Removing cmd 0x%p (htag 0x%x) from aq", __func__, (void *)sp, sp->cmd_tag); mutex_enter(&pwp->cq_lock); @@ -3053,10 +3074,11 @@ /* * Return the existing target softstate if there is one. If there is, * the PHY is locked as well and that lock must be freed by the caller - * after the target/PHY linkage is established. + * after the target/PHY linkage is established. If there isn't one, and + * alloc_tgt is TRUE, then allocate one. */ pmcs_xscsi_t * -pmcs_get_target(pmcs_iport_t *iport, char *tgt_port) +pmcs_get_target(pmcs_iport_t *iport, char *tgt_port, boolean_t alloc_tgt) { pmcs_hw_t *pwp = iport->pwp; pmcs_phy_t *phyp; @@ -3109,6 +3131,14 @@ } /* + * If this was just a lookup (i.e. alloc_tgt is false), return now. + */ + if (alloc_tgt == B_FALSE) { + pmcs_unlock_phy(phyp); + return (NULL); + } + + /* * Allocate the new softstate */ wwn = pmcs_barray2wwn(phyp->sas_address);
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c Thu Feb 18 12:37:30 2010 -0700 +++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c Thu Feb 18 12:52:39 2010 -0700 @@ -1423,6 +1423,11 @@ pwp->blocked = 1; /* + * Clear our softstate copies of the MSGU and IOP heartbeats. + */ + pwp->last_msgu_tick = pwp->last_iop_tick = 0; + + /* * Step 1 */ s2 = pmcs_rd_msgunit(pwp, PMCS_MSGU_SCRATCH2); @@ -1653,6 +1658,29 @@ return (-1); } + /* Clear the firmware log */ + if (pwp->fwlogp) { + bzero(pwp->fwlogp, PMCS_FWLOG_SIZE); + } + + /* Reset our queue indices and entries */ + bzero(pwp->shadow_iqpi, sizeof (pwp->shadow_iqpi)); + bzero(pwp->last_iqci, sizeof (pwp->last_iqci)); + for (i = 0; i < PMCS_NIQ; i++) { + if (pwp->iqp[i]) { + bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth); + pmcs_wr_iqpi(pwp, i, 0); + pmcs_wr_iqci(pwp, i, 0); + } + } + for (i = 0; i < PMCS_NOQ; i++) { + if (pwp->oqp[i]) { + bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth); + pmcs_wr_oqpi(pwp, i, 0); + pmcs_wr_oqci(pwp, i, 0); + } + + } if (pwp->state == STATE_DEAD || pwp->state == STATE_UNPROBING || pwp->state == STATE_PROBING || pwp->locks_initted == 0) { @@ -1673,18 +1701,8 @@ ASSERT(pwp->locks_initted != 0); /* - * Clean up various soft state. - */ - bzero(pwp->ports, sizeof (pwp->ports)); - - pmcs_free_all_phys(pwp, pwp->root_phys); - - for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) { - pmcs_lock_phy(pptr); - pmcs_clear_phy(pwp, pptr); - pmcs_unlock_phy(pptr); - } - + * Flush the target queues and clear each target's PHY + */ if (pwp->targets) { for (i = 0; i < pwp->max_dev; i++) { pmcs_xscsi_t *xp = pwp->targets[i]; @@ -1692,66 +1710,24 @@ if (xp == NULL) { continue; } + mutex_enter(&xp->statlock); - pmcs_clear_xp(pwp, xp); + pmcs_flush_target_queues(pwp, xp, PMCS_TGT_ALL_QUEUES); + xp->phy = NULL; mutex_exit(&xp->statlock); } } - bzero(pwp->shadow_iqpi, sizeof (pwp->shadow_iqpi)); - for (i = 0; i < PMCS_NIQ; i++) { - if (pwp->iqp[i]) { - bzero(pwp->iqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth); - pmcs_wr_iqpi(pwp, i, 0); - pmcs_wr_iqci(pwp, i, 0); - } - } - for (i = 0; i < PMCS_NOQ; i++) { - if (pwp->oqp[i]) { - bzero(pwp->oqp[i], PMCS_QENTRY_SIZE * pwp->ioq_depth); - pmcs_wr_oqpi(pwp, i, 0); - pmcs_wr_oqci(pwp, i, 0); - } - - } - if (pwp->fwlogp) { - bzero(pwp->fwlogp, PMCS_FWLOG_SIZE); - } - STAILQ_INIT(&pwp->wf); - bzero(pwp->work, sizeof (pmcwork_t) * pwp->max_cmd); - for (i = 0; i < pwp->max_cmd - 1; i++) { - pmcwork_t *pwrk = &pwp->work[i]; - STAILQ_INSERT_TAIL(&pwp->wf, pwrk, next); - } - - /* - * Clear out any leftover commands sitting in the work list - */ - for (i = 0; i < pwp->max_cmd; i++) { - pmcwork_t *pwrk = &pwp->work[i]; - mutex_enter(&pwrk->lock); - if (pwrk->state == PMCS_WORK_STATE_ONCHIP) { - switch (PMCS_TAG_TYPE(pwrk->htag)) { - case PMCS_TAG_TYPE_WAIT: - mutex_exit(&pwrk->lock); - break; - case PMCS_TAG_TYPE_CBACK: - case PMCS_TAG_TYPE_NONE: - pmcs_pwork(pwp, pwrk); - break; - default: - break; - } - } else if (pwrk->state == PMCS_WORK_STATE_IOCOMPQ) { - pwrk->dead = 1; - mutex_exit(&pwrk->lock); - } else { - /* - * The other states of NIL, READY and INTR - * should not be visible outside of a lock being held. - */ - pmcs_pwork(pwp, pwrk); - } + /* + * Zero out the ports list, free non root phys, clear root phys + */ + bzero(pwp->ports, sizeof (pwp->ports)); + pmcs_free_all_phys(pwp, pwp->root_phys); + for (pptr = pwp->root_phys; pptr; pptr = pptr->sibling) { + pmcs_lock_phy(pptr); + pmcs_clear_phy(pwp, pptr); + pptr->target = NULL; + pmcs_unlock_phy(pptr); } /* @@ -1760,7 +1736,6 @@ pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, pwp->intr_mask); pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff); - pwp->blocked = 0; pwp->mpi_table_setup = 0; mutex_exit(&pwp->lock); @@ -1782,7 +1757,6 @@ } mutex_enter(&pwp->lock); - pwp->blocked = 0; SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES); mutex_exit(&pwp->lock); @@ -1806,6 +1780,80 @@ return (-1); } + +/* + * Perform a 'hot' reset, which will soft reset the chip and + * restore the state back to pre-reset context. Called with pwp + * lock held. + */ +int +pmcs_hot_reset(pmcs_hw_t *pwp) +{ + pmcs_iport_t *iport; + + ASSERT(mutex_owned(&pwp->lock)); + pwp->state = STATE_IN_RESET; + + /* + * For any iports on this HBA, report empty target sets and + * then tear them down. + */ + rw_enter(&pwp->iports_lock, RW_READER); + for (iport = list_head(&pwp->iports); iport != NULL; + iport = list_next(&pwp->iports, iport)) { + mutex_enter(&iport->lock); + (void) scsi_hba_tgtmap_set_begin(iport->iss_tgtmap); + (void) scsi_hba_tgtmap_set_end(iport->iss_tgtmap, 0); + pmcs_iport_teardown_phys(iport); + mutex_exit(&iport->lock); + } + rw_exit(&pwp->iports_lock); + + /* Grab a register dump, in the event that reset fails */ + pmcs_register_dump_int(pwp); + mutex_exit(&pwp->lock); + + /* Issue soft reset and clean up related softstate */ + if (pmcs_soft_reset(pwp, B_FALSE)) { + /* + * Disable interrupts, in case we got far enough along to + * enable them, then fire off ereport and service impact. + */ + pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, + "%s: failed soft reset", __func__); + pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_MASK, 0xffffffff); + pmcs_wr_msgunit(pwp, PMCS_MSGU_OBDB_CLEAR, 0xffffffff); + pmcs_fm_ereport(pwp, DDI_FM_DEVICE_NO_RESPONSE); + ddi_fm_service_impact(pwp->dip, DDI_SERVICE_LOST); + mutex_enter(&pwp->lock); + pwp->state = STATE_DEAD; + return (DDI_FAILURE); + } + + mutex_enter(&pwp->lock); + pwp->state = STATE_RUNNING; + mutex_exit(&pwp->lock); + + /* + * Finally, restart the phys, which will bring the iports back + * up and eventually result in discovery running. + */ + if (pmcs_start_phys(pwp)) { + /* We should be up and running now, so retry */ + if (pmcs_start_phys(pwp)) { + /* Apparently unable to restart PHYs, fail */ + pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, + "%s: failed to restart PHYs after soft reset", + __func__); + mutex_enter(&pwp->lock); + return (DDI_FAILURE); + } + } + + mutex_enter(&pwp->lock); + return (DDI_SUCCESS); +} + /* * Reset a device or a logical unit. */ @@ -1961,7 +2009,9 @@ /* * Remove all phys from an iport's phymap and empty it's phylist. - * Called when a port has been reset by the host (see pmcs_intr.c). + * Called when a port has been reset by the host (see pmcs_intr.c) + * or prior to issuing a soft reset if we detect a stall on the chip + * (see pmcs_attach.c). */ void pmcs_iport_teardown_phys(pmcs_iport_t *iport) @@ -1985,10 +2035,12 @@ /* Remove all phys from the phymap */ phys = sas_phymap_ua2phys(pwp->hss_phymap, iport->ua); - while ((phynum = sas_phymap_phys_next(phys)) != -1) { - (void) sas_phymap_phy_rem(pwp->hss_phymap, phynum); - } - sas_phymap_phys_free(phys); + if (phys) { + while ((phynum = sas_phymap_phys_next(phys)) != -1) { + (void) sas_phymap_phy_rem(pwp->hss_phymap, phynum); + } + sas_phymap_phys_free(phys); + } } /* @@ -2020,6 +2072,7 @@ */ ASSERT(list_is_empty(&iport->phys)); phys = sas_phymap_ua2phys(pwp->hss_phymap, iport->ua); + ASSERT(phys != NULL); while ((phynum = sas_phymap_phys_next(phys)) != -1) { /* Grab the phy pointer from root_phys */ pptr = pwp->root_phys + phynum; @@ -2316,6 +2369,7 @@ { pmcs_phy_t *pptr; pmcs_phy_t *root_phy; + int phymap_active; DTRACE_PROBE2(pmcs__discover__entry, ulong_t, pwp->work_flags, boolean_t, pwp->config_changed); @@ -2335,6 +2389,7 @@ return; } + phymap_active = pwp->phymap_active; mutex_exit(&pwp->lock); /* @@ -2349,6 +2404,14 @@ SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER); return; } + if (pwp->num_iports != phymap_active) { + rw_exit(&pwp->iports_lock); + pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, NULL, NULL, + "%s: phymaps or iport maps not stable; retry discovery", + __func__); + SCHEDULE_WORK(pwp, PMCS_WORK_DISCOVER); + return; + } rw_exit(&pwp->iports_lock); mutex_enter(&pwp->config_lock); @@ -2491,6 +2554,9 @@ } pmcs_release_scratch(pwp); + if (!pwp->quiesced) { + pwp->blocked = 0; + } pwp->configuring = 0; mutex_exit(&pwp->config_lock); @@ -7772,8 +7838,8 @@ next_pptr = list_next(&iport->phys, pptr); mutex_enter(&pptr->phy_lock); pptr->iport = NULL; - pmcs_update_phy_pm_props(phyp, phyp->att_port_pm_tmp, - phyp->tgt_port_pm_tmp, B_FALSE); + pmcs_update_phy_pm_props(pptr, pptr->att_port_pm_tmp, + pptr->tgt_port_pm_tmp, B_FALSE); mutex_exit(&pptr->phy_lock); pmcs_rele_iport(iport); list_remove(&iport->phys, pptr); @@ -7867,8 +7933,6 @@ pmcs_smp_release(pmcs_iport_t *iport) { if (iport == NULL) { - pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_IPORT, NULL, NULL, - "%s: iport is NULL...", __func__); return; } @@ -8003,19 +8067,34 @@ scsi_tgtmap_tgt_type_t tgt_type, void **tgt_privp) { pmcs_iport_t *iport = (pmcs_iport_t *)tgtmap_priv; - - pmcs_prt(iport->pwp, PMCS_PRT_DEBUG_IPORT, NULL, NULL, - "%s: called for iport%d/%s(%d)", __func__, - ddi_get_instance(iport->dip), tgt_addr, tgt_type); + pmcs_hw_t *pwp = iport->pwp; + pmcs_xscsi_t *target; + + /* + * Look up the target. If there is one, and it doesn't have a PHY + * pointer, re-establish that linkage here. + */ + mutex_enter(&pwp->lock); + target = pmcs_get_target(iport, tgt_addr, B_FALSE); + mutex_exit(&pwp->lock); + + /* + * If we got a target, it will now have a PHY pointer and the PHY + * will point to the target. The PHY will be locked, so we'll need + * to unlock it. + */ + if (target) { + pmcs_unlock_phy(target->phy); + } /* * Update config_restart_time so we don't try to restart discovery * while enumeration is still in progress. */ - mutex_enter(&iport->pwp->config_lock); - iport->pwp->config_restart_time = ddi_get_lbolt() + + mutex_enter(&pwp->config_lock); + pwp->config_restart_time = ddi_get_lbolt() + drv_usectohz(PMCS_REDISCOVERY_DELAY); - mutex_exit(&iport->pwp->config_lock); + mutex_exit(&pwp->config_lock); } /* ARGSUSED */
--- a/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs.h Thu Feb 18 12:37:30 2010 -0700 +++ b/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs.h Thu Feb 18 12:52:39 2010 -0700 @@ -292,9 +292,23 @@ STATE_PROBING, STATE_RUNNING, STATE_UNPROBING, + STATE_IN_RESET, STATE_DEAD } state; + /* + * Last reason for a soft reset + */ + enum pwp_last_reset_reason { + PMCS_LAST_RST_UNINIT, + PMCS_LAST_RST_ATTACH, + PMCS_LAST_RST_FW_UPGRADE, + PMCS_LAST_RST_FATAL_ERROR, + PMCS_LAST_RST_STALL, + PMCS_LAST_RST_QUIESCE, + PMCS_LAST_RST_DETACH + } last_reset_reason; + uint32_t fw_disable_update : 1, fw_force_update : 1, @@ -311,7 +325,8 @@ physpeed : 3, resource_limited : 1, configuring : 1, - ds_err_recovering : 1; + ds_err_recovering : 1, + quiesced : 1; /* * This HBA instance's iportmap and list of iport states. @@ -406,6 +421,7 @@ * memory and update the card as needed. */ uint32_t shadow_iqpi[PMCS_MAX_IQ]; + uint32_t last_iqci[PMCS_MAX_IQ]; uint32_t iqpi_offset[PMCS_MAX_IQ]; uint32_t *iqp[PMCS_MAX_IQ]; kmutex_t iqp_lock[PMCS_NIQ]; @@ -462,6 +478,12 @@ uint64_t flash_chunk_addr; /* + * Copies of the last read MSGU and IOP heartbeats. + */ + uint32_t last_msgu_tick; + uint32_t last_iop_tick; + + /* * Card information, some determined during MPI setup */ uint32_t fw; /* firmware version */ @@ -473,6 +495,12 @@ uint16_t max_dev; /* max number of devices supported */ uint16_t last_wq_dev; /* last dev whose wq was serviced */ + /* + * Counter for the number of times watchdog fires. We can use this + * to throttle events which fire off of the watchdog, such as the + * forward progress detection routine. + */ + uint8_t watchdog_count; /* * Interrupt Setup stuff.
--- a/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_param.h Thu Feb 18 12:37:30 2010 -0700 +++ b/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_param.h Thu Feb 18 12:52:39 2010 -0700 @@ -19,7 +19,7 @@ * CDDL HEADER END * * - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* @@ -78,6 +78,15 @@ #define PMCS_WATCH_INTERVAL 250000 /* watchdog interval in us */ /* + * Forward progress trigger. This is the number of times we run through + * watchdog before checking for forward progress. Implicitly bound to + * PMCS_WATCH_INTERVAL above. For example, with a PMCS_WATCH_INTERVAL of + * 250000, the watchdog will run every quarter second, so forward progress + * will be checked every 16th watchdog fire, or every four seconds. + */ +#define PMCS_FWD_PROG_TRIGGER 16 + +/* * Inbound Queue definitions */ #define PMCS_NIQ 9 /* 9 Inbound Queues */
--- a/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_proto.h Thu Feb 18 12:37:30 2010 -0700 +++ b/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_proto.h Thu Feb 18 12:52:39 2010 -0700 @@ -247,6 +247,11 @@ int pmcs_soft_reset(pmcs_hw_t *, boolean_t); /* + * This is a hot reset which will attempt reconfiguration after reset. + */ +int pmcs_hot_reset(pmcs_hw_t *); + +/* * Some more reset functions */ int pmcs_reset_dev(pmcs_hw_t *, pmcs_phy_t *, uint64_t); @@ -340,7 +345,7 @@ void pmcs_worker(void *); pmcs_phy_t *pmcs_get_root_phy(pmcs_phy_t *); -pmcs_xscsi_t *pmcs_get_target(pmcs_iport_t *, char *); +pmcs_xscsi_t *pmcs_get_target(pmcs_iport_t *, char *, boolean_t); void pmcs_fatal_handler(pmcs_hw_t *);