Mercurial > illumos > illumos-gate
changeset 11090:15bf339bd614
6897712 The phy counter iport->nphy shows incorrect number for an 8 phy port.
6897719 panic: Deadlock: cycle in blocking chain during expander reset test
6897251 Device state recovery is only possible when target is configured
6899829 pmcs_watchdog panic observed when running IO workload
author | dh142964 <David.Hollister@Sun.COM> |
---|---|
date | Wed, 18 Nov 2009 07:29:02 -0700 |
parents | 1d0f8cb05b08 |
children | d25092d8295b |
files | usr/src/uts/common/Makefile.files usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_attach.c usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_ds.c usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_intr.c usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_smhba.c usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_iomb.h usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_proto.h |
diffstat | 8 files changed, 933 insertions(+), 867 deletions(-) [+] |
line wrap: on
line diff
--- a/usr/src/uts/common/Makefile.files Wed Nov 18 11:25:32 2009 +0530 +++ b/usr/src/uts/common/Makefile.files Wed Nov 18 07:29:02 2009 -0700 @@ -900,7 +900,7 @@ SV_OBJS += sv.o -PMCS_OBJS += pmcs_attach.o pmcs_intr.o pmcs_nvram.o pmcs_sata.o \ +PMCS_OBJS += pmcs_attach.o pmcs_ds.o pmcs_intr.o pmcs_nvram.o pmcs_sata.o \ pmcs_scsa.o pmcs_smhba.o pmcs_subr.o pmcs_fwlog.o #
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_attach.c Wed Nov 18 11:25:32 2009 +0530 +++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_attach.c Wed Nov 18 07:29:02 2009 -0700 @@ -361,6 +361,8 @@ (void) scsi_wwn_to_wwnstr(pwp->sas_wwns[0], 1, init_port); } + + mutex_enter(&iport->lock); pmcs_smhba_add_iport_prop(iport, DATA_TYPE_STRING, SCSI_ADDR_PROP_INITIATOR_PORT, init_port); kmem_free(init_port, PMCS_MAX_UA_SIZE); @@ -368,6 +370,7 @@ /* Set up a 'num-phys' DDI property for the iport node */ pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS, &iport->nphy); + mutex_exit(&iport->lock); /* Create kstats for each of the phys in this port */ pmcs_create_phy_stats(iport); @@ -1929,13 +1932,13 @@ /* * No point attempting recovery if the device is gone */ - if (pwrk->xp->dev_gone) { + if (target->dev_gone) { mutex_exit(&target->statlock); pmcs_unlock_phy(phyp); pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, target, "%s: tgt(0x%p) is gone. Returning CMD_DEV_GONE " "for htag 0x%08x", __func__, - (void *)pwrk->xp, pwrk->htag); + (void *)target, pwrk->htag); mutex_enter(&pwrk->lock); if (!PMCS_COMMAND_DONE(pwrk)) { /* Complete this command here */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_ds.c Wed Nov 18 07:29:02 2009 -0700 @@ -0,0 +1,894 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * PM8001 device state recovery routines + */ + +#include <sys/scsi/adapters/pmcs/pmcs.h> + +/* + * SAS Topology Configuration + */ +static void pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, + pmcs_xscsi_t *tgt, pmcs_hw_t *pwp, const char *func_name, int line, + char *reason_string); + +/* + * Get device state. Called with statlock and PHY lock held. + */ +static int +pmcs_get_dev_state(pmcs_hw_t *pwp, pmcs_phy_t *phyp, pmcs_xscsi_t *xp, + uint8_t *ds) +{ + uint32_t htag, *ptr, msg[PMCS_MSG_SIZE]; + int result; + struct pmcwork *pwrk; + + pmcs_prt(pwp, PMCS_PRT_DEBUG3, phyp, xp, "%s: tgt(0x%p)", __func__, + (void *)xp); + + if (xp != NULL) { + ASSERT(mutex_owned(&xp->statlock)); + } + ASSERT(mutex_owned(&phyp->phy_lock)); + + pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp); + if (pwrk == NULL) { + pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nowrk, __func__); + return (-1); + } + pwrk->arg = msg; + pwrk->dtype = phyp->dtype; + + if (phyp->valid_device_id == 0) { + pmcs_pwork(pwp, pwrk); + pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, xp, + "%s: Invalid DeviceID", __func__); + return (-1); + } + htag = pwrk->htag; + msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, + PMCIN_GET_DEVICE_STATE)); + msg[1] = LE_32(pwrk->htag); + msg[2] = LE_32(phyp->device_id); + + mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]); + ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER); + if (ptr == NULL) { + mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]); + pmcs_pwork(pwp, pwrk); + pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nomsg, __func__); + return (-1); + } + COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE); + pwrk->state = PMCS_WORK_STATE_ONCHIP; + INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER); + + if (xp != NULL) { + mutex_exit(&xp->statlock); + } + pmcs_unlock_phy(phyp); + WAIT_FOR(pwrk, 1000, result); + pmcs_lock_phy(phyp); + pmcs_pwork(pwp, pwrk); + + if (xp != NULL) { + mutex_enter(&xp->statlock); + } + + if (result) { + pmcs_timed_out(pwp, htag, __func__); + pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, xp, + "%s: cmd timed out, returning", __func__); + return (-1); + } + if (LE_32(msg[2]) == 0) { + *ds = (uint8_t)(LE_32(msg[4])); + if (xp == NULL) { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, + "%s: retrieved_ds=0x%x", __func__, *ds); + } else if (*ds != xp->dev_state) { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, + "%s: retrieved_ds=0x%x, target_ds=0x%x", __func__, + *ds, xp->dev_state); + } + return (0); + } else { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, + "%s: cmd failed Status(0x%x), returning ", __func__, + LE_32(msg[2])); + return (-1); + } +} + +/* + * Set device state. Called with target's statlock and PHY lock held. + */ +static int +pmcs_set_dev_state(pmcs_hw_t *pwp, pmcs_phy_t *phyp, pmcs_xscsi_t *xp, + uint8_t ds) +{ + uint32_t htag, *ptr, msg[PMCS_MSG_SIZE]; + int result; + uint8_t pds, nds; + struct pmcwork *pwrk; + + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, + "%s: ds: 0x%x tgt: 0x%p phy: 0x%p", __func__, ds, (void *)xp, + (void *)phyp); + + if (phyp == NULL) { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, xp, + "%s: PHY is NULL", __func__); + return (-1); + } + + pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp); + if (pwrk == NULL) { + pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nowrk, __func__); + return (-1); + } + if (phyp->valid_device_id == 0) { + pmcs_pwork(pwp, pwrk); + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, + "%s: Invalid DeviceID", __func__); + return (-1); + } + pwrk->arg = msg; + pwrk->dtype = phyp->dtype; + htag = pwrk->htag; + msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, + PMCIN_SET_DEVICE_STATE)); + msg[1] = LE_32(pwrk->htag); + msg[2] = LE_32(phyp->device_id); + msg[3] = LE_32(ds); + + mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]); + ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER); + if (ptr == NULL) { + mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]); + pmcs_pwork(pwp, pwrk); + pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nomsg, __func__); + return (-1); + } + COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE); + pwrk->state = PMCS_WORK_STATE_ONCHIP; + INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER); + + if (xp != NULL) { + mutex_exit(&xp->statlock); + } + pmcs_unlock_phy(phyp); + WAIT_FOR(pwrk, 1000, result); + pmcs_lock_phy(phyp); + pmcs_pwork(pwp, pwrk); + if (xp != NULL) { + mutex_enter(&xp->statlock); + } + + if (result) { + pmcs_timed_out(pwp, htag, __func__); + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, + "%s: cmd timed out, returning", __func__); + return (-1); + } + if (LE_32(msg[2]) == 0) { + pds = (uint8_t)(LE_32(msg[4]) >> 4); + nds = (uint8_t)(LE_32(msg[4]) & 0x0000000f); + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, + "%s: previous_ds=0x%x, new_ds=0x%x", __func__, pds, nds); + if (xp != NULL) { + xp->dev_state = nds; + } + return (0); + } else { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, + "%s: cmd failed Status(0x%x), returning ", __func__, + LE_32(msg[2])); + return (-1); + } +} + +void +pmcs_dev_state_recovery(pmcs_hw_t *pwp, pmcs_phy_t *phyp) +{ + uint8_t ds, tgt_dev_state; + int rc; + pmcs_xscsi_t *tgt; + pmcs_phy_t *pptr, *pnext, *pchild; + + /* + * First time, check to see if we're already performing recovery + */ + if (phyp == NULL) { + mutex_enter(&pwp->lock); + if (pwp->ds_err_recovering) { + mutex_exit(&pwp->lock); + SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY); + return; + } + + pwp->ds_err_recovering = 1; + pptr = pwp->root_phys; + mutex_exit(&pwp->lock); + } else { + pptr = phyp; + } + + while (pptr) { + /* + * Since ds_err_recovering is set, we can be assured these + * PHYs won't disappear on us while we do this. + */ + pmcs_lock_phy(pptr); + pchild = pptr->children; + pnext = pptr->sibling; + pmcs_unlock_phy(pptr); + + if (pchild) { + pmcs_dev_state_recovery(pwp, pchild); + } + + tgt = NULL; + pmcs_lock_phy(pptr); + + if (pptr->dead) { + goto next_phy; + } + + tgt = pptr->target; + + if (tgt != NULL) { + mutex_enter(&tgt->statlock); + if (tgt->recover_wait == 0) { + goto next_phy; + } + tgt_dev_state = tgt->dev_state; + } else { + tgt_dev_state = PMCS_DEVICE_STATE_NOT_AVAILABLE; + } + + if (pptr->prev_recovery) { + if (ddi_get_lbolt() - pptr->prev_recovery < + drv_usectohz(PMCS_DS_RECOVERY_INTERVAL)) { + pmcs_prt(pwp, PMCS_PRT_DEBUG2, pptr, tgt, + "%s: DS recovery on PHY %s " + "re-invoked too soon. Skipping...", + __func__, pptr->path); + goto next_phy; + } + } + pptr->prev_recovery = ddi_get_lbolt(); + + /* + * Step 1: Put the device into the IN_RECOVERY state + */ + rc = pmcs_get_dev_state(pwp, pptr, tgt, &ds); + if (rc != 0) { + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: pmcs_get_dev_state on PHY %s " + "failed (rc=%d)", + __func__, pptr->path, rc); + + pmcs_handle_ds_recovery_error(pptr, tgt, pwp, + __func__, __LINE__, "pmcs_get_dev_state"); + + goto next_phy; + } + + if ((tgt_dev_state == ds) && + (ds == PMCS_DEVICE_STATE_IN_RECOVERY)) { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, + "%s: Target 0x%p already IN_RECOVERY", __func__, + (void *)tgt); + } else { + if (tgt != NULL) { + tgt->dev_state = ds; + } + tgt_dev_state = ds; + ds = PMCS_DEVICE_STATE_IN_RECOVERY; + rc = pmcs_send_err_recovery_cmd(pwp, ds, pptr, tgt); + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, + "%s: pmcs_send_err_recovery_cmd " + "result(%d) tgt(0x%p) ds(0x%x) tgt->ds(0x%x)", + __func__, rc, (void *)tgt, ds, tgt_dev_state); + + if (rc) { + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: pmcs_send_err_recovery_cmd to PHY %s " + "failed (rc=%d)", + __func__, pptr->path, rc); + + pmcs_handle_ds_recovery_error(pptr, tgt, pwp, + __func__, __LINE__, + "pmcs_send_err_recovery_cmd"); + + goto next_phy; + } + } + + /* + * Step 2: Perform a hard reset on the PHY + */ + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, + "%s: Issue HARD_RESET to PHY %s", __func__, pptr->path); + /* + * Must release statlock here because pmcs_reset_phy will + * drop and reacquire the PHY lock. + */ + if (tgt != NULL) { + mutex_exit(&tgt->statlock); + } + rc = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_HARD_RESET); + if (tgt != NULL) { + mutex_enter(&tgt->statlock); + } + if (rc) { + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: HARD_RESET to PHY %s failed (rc=%d)", + __func__, pptr->path, rc); + + pmcs_handle_ds_recovery_error(pptr, tgt, pwp, + __func__, __LINE__, "HARD_RESET"); + + goto next_phy; + } + + /* + * Step 3: Abort all I/Os to the device + */ + if (pptr->abort_all_start) { + while (pptr->abort_all_start) { + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: Waiting for outstanding ABORT_ALL on " + "PHY 0x%p", __func__, (void *)pptr); + cv_wait(&pptr->abort_all_cv, &pptr->phy_lock); + } + } else { + if (tgt != NULL) { + mutex_exit(&tgt->statlock); + } + rc = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1); + if (tgt != NULL) { + mutex_enter(&tgt->statlock); + } + if (rc != 0) { + pptr->abort_pending = 1; + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: pmcs_abort to PHY %s failed (rc=%d)", + __func__, pptr->path, rc); + + pmcs_handle_ds_recovery_error(pptr, tgt, + pwp, __func__, __LINE__, "pmcs_abort"); + + goto next_phy; + } + } + + /* + * Step 4: Set the device back to OPERATIONAL state + */ + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, + "%s: Set PHY/tgt 0x%p/0x%p to OPERATIONAL state", + __func__, (void *)pptr, (void *)tgt); + rc = pmcs_set_dev_state(pwp, pptr, tgt, + PMCS_DEVICE_STATE_OPERATIONAL); + if (rc == 0) { + if (tgt != NULL) { + tgt->recover_wait = 0; + } + pptr->ds_recovery_retries = 0; + + if ((pptr->ds_prev_good_recoveries == 0) || + (ddi_get_lbolt() - pptr->last_good_recovery > + drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME))) { + pptr->last_good_recovery = ddi_get_lbolt(); + pptr->ds_prev_good_recoveries = 1; + } else if (ddi_get_lbolt() < pptr->last_good_recovery + + drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME)) { + pptr->ds_prev_good_recoveries++; + } else { + pmcs_handle_ds_recovery_error(pptr, tgt, pwp, + __func__, __LINE__, "Max recovery" + "attempts reached. Declaring PHY dead"); + } + + /* + * Don't bother to run the work queues if the PHY + * is dead. + */ + if (tgt && tgt->phy && !tgt->phy->dead) { + SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES); + (void) ddi_taskq_dispatch(pwp->tq, pmcs_worker, + pwp, DDI_NOSLEEP); + } + } else { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, + "%s: Failed to SET tgt 0x%p to OPERATIONAL state", + __func__, (void *)tgt); + + pmcs_handle_ds_recovery_error(pptr, tgt, pwp, + __func__, __LINE__, "SET tgt to OPERATIONAL state"); + + goto next_phy; + } + +next_phy: + if (tgt) { + mutex_exit(&tgt->statlock); + } + pmcs_unlock_phy(pptr); + pptr = pnext; + } + + /* + * Only clear ds_err_recovering if we're exiting for good and not + * just unwinding from recursion + */ + if (phyp == NULL) { + mutex_enter(&pwp->lock); + pwp->ds_err_recovering = 0; + mutex_exit(&pwp->lock); + } +} + +/* + * Called with target's statlock held (if target is non-NULL) and PHY lock held. + */ +int +pmcs_send_err_recovery_cmd(pmcs_hw_t *pwp, uint8_t dev_state, pmcs_phy_t *phyp, + pmcs_xscsi_t *tgt) +{ + int rc = -1; + uint8_t tgt_dev_state = PMCS_DEVICE_STATE_NOT_AVAILABLE; + + if (tgt != NULL) { + ASSERT(mutex_owned(&tgt->statlock)); + if (tgt->recovering) { + return (0); + } + + tgt->recovering = 1; + tgt_dev_state = tgt->dev_state; + } + + if (phyp == NULL) { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, tgt, + "%s: PHY is NULL", __func__); + return (-1); + } + + ASSERT(mutex_owned(&phyp->phy_lock)); + + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt, + "%s: ds: 0x%x, tgt ds(0x%x)", __func__, dev_state, tgt_dev_state); + + switch (dev_state) { + case PMCS_DEVICE_STATE_IN_RECOVERY: + if (tgt_dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt, + "%s: Target 0x%p already IN_RECOVERY", __func__, + (void *)tgt); + rc = 0; /* This is not an error */ + goto no_action; + } + + rc = pmcs_set_dev_state(pwp, phyp, tgt, + PMCS_DEVICE_STATE_IN_RECOVERY); + if (rc != 0) { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt, + "%s(1): Failed to set tgt(0x%p) to IN_RECOVERY", + __func__, (void *)tgt); + } + + break; + + case PMCS_DEVICE_STATE_OPERATIONAL: + if (tgt_dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt, + "%s: Target 0x%p not ready to go OPERATIONAL", + __func__, (void *)tgt); + goto no_action; + } + + rc = pmcs_set_dev_state(pwp, phyp, tgt, + PMCS_DEVICE_STATE_OPERATIONAL); + if (tgt != NULL) { + tgt->reset_success = 1; + } + if (rc != 0) { + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt, + "%s(2): Failed to SET tgt(0x%p) to OPERATIONAL", + __func__, (void *)tgt); + if (tgt != NULL) { + tgt->reset_success = 0; + } + } + + break; + + case PMCS_DEVICE_STATE_NON_OPERATIONAL: + PHY_CHANGED(pwp, phyp); + RESTART_DISCOVERY(pwp); + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt, + "%s: Device at %s is non-operational", + __func__, phyp->path); + if (tgt != NULL) { + tgt->dev_state = PMCS_DEVICE_STATE_NON_OPERATIONAL; + } + rc = 0; + + break; + + default: + pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, tgt, + "%s: Invalid state requested (%d)", __func__, + dev_state); + break; + + } + +no_action: + if (tgt != NULL) { + tgt->recovering = 0; + } + return (rc); +} + +/* + * Start ssp event recovery. We have to schedule recovery operation because + * it involves sending multiple commands to device and we should not do it + * in the interrupt context. + * If it is failure of a recovery command, let the recovery thread deal with it. + * Called with pmcwork lock held. + */ + +void +pmcs_start_ssp_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb, + size_t amt) +{ + pmcs_xscsi_t *tgt = pwrk->xp; + uint32_t event = LE_32(iomb[2]); + pmcs_phy_t *pptr = pwrk->phy; + uint32_t tag; + + if (tgt != NULL) { + mutex_enter(&tgt->statlock); + if (!tgt->assigned) { + if (pptr) { + pmcs_dec_phy_ref_count(pptr); + } + pptr = NULL; + pwrk->phy = NULL; + } + mutex_exit(&tgt->statlock); + } + if (pptr == NULL) { + /* + * No target, need to run RE-DISCOVERY here. + */ + if (pwrk->state != PMCS_WORK_STATE_TIMED_OUT) { + pwrk->state = PMCS_WORK_STATE_INTR; + } + /* + * Although we cannot mark phy to force abort nor mark phy + * as changed, killing of a target would take care of aborting + * commands for the device. + */ + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: No valid target for event processing. Reconfigure.", + __func__); + pmcs_pwork(pwp, pwrk); + RESTART_DISCOVERY(pwp); + return; + } else { + pmcs_lock_phy(pptr); + mutex_enter(&tgt->statlock); + if (event == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS) { + if (tgt->dev_state != + PMCS_DEVICE_STATE_NON_OPERATIONAL) { + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: Device at %s is non-operational", + __func__, pptr->path); + tgt->dev_state = + PMCS_DEVICE_STATE_NON_OPERATIONAL; + } + pptr->abort_pending = 1; + mutex_exit(&tgt->statlock); + pmcs_unlock_phy(pptr); + mutex_exit(&pwrk->lock); + SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE); + RESTART_DISCOVERY(pwp); + return; + } + + /* + * If this command is run in WAIT mode, it is a failing recovery + * command. If so, just wake up recovery thread waiting for + * command completion. + */ + tag = PMCS_TAG_TYPE(pwrk->htag); + if (tag == PMCS_TAG_TYPE_WAIT) { + pwrk->htag |= PMCS_TAG_DONE; + if (pwrk->arg && amt) { + (void) memcpy(pwrk->arg, iomb, amt); + } + cv_signal(&pwrk->sleep_cv); + mutex_exit(&tgt->statlock); + pmcs_unlock_phy(pptr); + mutex_exit(&pwrk->lock); + return; + } + + /* + * To recover from primary failures, + * we need to schedule handling events recovery. + */ + tgt->event_recovery = 1; + mutex_exit(&tgt->statlock); + pmcs_unlock_phy(pptr); + pwrk->ssp_event = event; + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: Scheduling SSP event recovery for tgt(0x%p) " + "pwrk(%p) tag(0x%x)", __func__, (void *)tgt, (void *)pwrk, + pwrk->htag); + mutex_exit(&pwrk->lock); + SCHEDULE_WORK(pwp, PMCS_WORK_SSP_EVT_RECOVERY); + } + + /* Work cannot be completed until event recovery is completed. */ +} + +/* + * SSP target event recovery + * Entered with a phy lock held + * Pwrk lock is not needed - pwrk is on the target aq and no other thread + * will do anything with it until this thread starts the chain of recovery. + * Statlock may be acquired and released. + */ + +void +pmcs_tgt_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk) +{ + pmcs_phy_t *pptr = pwrk->phy; + pmcs_cmd_t *sp = pwrk->arg; + pmcs_lun_t *lun = sp->cmd_lun; + pmcs_xscsi_t *tgt = pwrk->xp; + uint32_t event; + uint32_t htag; + uint32_t status; + uint8_t dstate; + int rv; + + ASSERT(pwrk->arg != NULL); + ASSERT(pwrk->xp != NULL); + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: event recovery for target 0x%p", __func__, (void *)pwrk->xp); + htag = pwrk->htag; + event = pwrk->ssp_event; + pwrk->ssp_event = 0xffffffff; + if (event == PMCOUT_STATUS_XFER_ERR_BREAK || + event == PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY || + event == PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT) { + /* Command may be still pending on device */ + rv = pmcs_ssp_tmf(pwp, pptr, SAS_QUERY_TASK, htag, + lun->lun_num, &status); + if (rv != 0) { + goto out; + } + if (status == SAS_RSP_TMF_COMPLETE) { + /* Command NOT pending on a device */ + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: No pending command for tgt 0x%p", + __func__, (void *)tgt); + /* Nothing more to do, just abort it on chip */ + htag = 0; + } + } + /* + * All other events left the command pending in the host + * Send abort task and abort it on the chip + */ + if (htag != 0) { + if (pmcs_ssp_tmf(pwp, pptr, SAS_ABORT_TASK, htag, + lun->lun_num, &status)) + goto out; + } + (void) pmcs_abort(pwp, pptr, pwrk->htag, 0, 1); + /* + * Abort either took care of work completion, or put device in + * a recovery state + */ + return; +out: + /* Abort failed, do full device recovery */ + mutex_enter(&tgt->statlock); + if (!pmcs_get_dev_state(pwp, pptr, tgt, &dstate)) + tgt->dev_state = dstate; + + if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) && + (tgt->dev_state != PMCS_DEVICE_STATE_NON_OPERATIONAL)) { + pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, + "%s: Setting IN_RECOVERY for tgt 0x%p", + __func__, (void *)tgt); + (void) pmcs_send_err_recovery_cmd(pwp, + PMCS_DEVICE_STATE_IN_RECOVERY, pptr, tgt); + } + mutex_exit(&tgt->statlock); +} + +/* + * SSP event recovery task. + */ +void +pmcs_ssp_event_recovery(pmcs_hw_t *pwp) +{ + int idx; + pmcs_xscsi_t *tgt; + pmcs_cmd_t *cp; + pmcwork_t *pwrk; + pmcs_phy_t *pphy; + int er_flag; + uint32_t idxpwrk; + +restart: + for (idx = 0; idx < pwp->max_dev; idx++) { + mutex_enter(&pwp->lock); + tgt = pwp->targets[idx]; + mutex_exit(&pwp->lock); + if (tgt != NULL) { + mutex_enter(&tgt->statlock); + if (!tgt->assigned) { + mutex_exit(&tgt->statlock); + continue; + } + pphy = tgt->phy; + er_flag = tgt->event_recovery; + mutex_exit(&tgt->statlock); + if (pphy != NULL && er_flag != 0) { + pmcs_lock_phy(pphy); + mutex_enter(&tgt->statlock); + pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt, + "%s: found target(0x%p)", __func__, + (void *) tgt); + + /* Check what cmd expects recovery */ + mutex_enter(&tgt->aqlock); + STAILQ_FOREACH(cp, &tgt->aq, cmd_next) { + /* + * Since work structure is on this + * target aq, and only this thread + * is accessing it now, we do not need + * to lock it + */ + idxpwrk = PMCS_TAG_INDEX(cp->cmd_tag); + pwrk = &pwp->work[idxpwrk]; + if (pwrk->htag != cp->cmd_tag) { + /* + * aq may contain TMF commands, + * so we may not find work + * structure with htag + */ + break; + } + if (pwrk->ssp_event != 0 && + pwrk->ssp_event != + PMCS_REC_EVENT) { + pmcs_prt(pwp, + PMCS_PRT_DEBUG, pphy, tgt, + "%s: pwrk(%p) ctag(0x%x)", + __func__, (void *) pwrk, + cp->cmd_tag); + mutex_exit(&tgt->aqlock); + mutex_exit(&tgt->statlock); + pmcs_tgt_event_recovery( + pwp, pwrk); + /* + * We dropped statlock, so + * restart scanning from scratch + */ + pmcs_unlock_phy(pphy); + goto restart; + } + } + mutex_exit(&tgt->aqlock); + tgt->event_recovery = 0; + pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt, + "%s: end of SSP event recovery for " + "target(0x%p)", __func__, (void *) tgt); + mutex_exit(&tgt->statlock); + pmcs_unlock_phy(pphy); + } + } + } + pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, + "%s: end of SSP event recovery for pwp(0x%p)", __func__, + (void *) pwp); +} + +void +pmcs_start_dev_state_recovery(pmcs_xscsi_t *xp, pmcs_phy_t *phyp) +{ + ASSERT(mutex_owned(&xp->statlock)); + ASSERT(xp->pwp != NULL); + + if (xp->recover_wait == 0) { + pmcs_prt(xp->pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, + "%s: Start ds_recovery for tgt 0x%p/PHY 0x%p (%s)", + __func__, (void *)xp, (void *)phyp, phyp->path); + xp->recover_wait = 1; + + /* + * Rather than waiting for the watchdog timer, we'll + * kick it right now. + */ + SCHEDULE_WORK(xp->pwp, PMCS_WORK_DS_ERR_RECOVERY); + (void) ddi_taskq_dispatch(xp->pwp->tq, pmcs_worker, xp->pwp, + DDI_NOSLEEP); + } +} + +/* + * Increment the phy ds error retry count. + * If too many retries, mark phy dead and restart discovery; + * otherwise schedule ds recovery. + */ +static void +pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, pmcs_xscsi_t *tgt, + pmcs_hw_t *pwp, const char *func_name, int line, char *reason_string) +{ + ASSERT(mutex_owned(&phyp->phy_lock)); + ASSERT((tgt == NULL) || mutex_owned(&tgt->statlock)); + + phyp->ds_recovery_retries++; + + if (phyp->ds_recovery_retries > PMCS_MAX_DS_RECOVERY_RETRIES) { + pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt, + "%s: retry limit reached after %s to PHY %s failed", + func_name, reason_string, phyp->path); + if (tgt != NULL) { + tgt->recover_wait = 0; + } + phyp->dead = 1; + PHY_CHANGED_AT_LOCATION(pwp, phyp, func_name, line); + RESTART_DISCOVERY(pwp); + } else if ((phyp->ds_prev_good_recoveries > + PMCS_MAX_DS_RECOVERY_RETRIES) && + (phyp->last_good_recovery + drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME) + < ddi_get_lbolt())) { + pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt, "%s: max number of " + "successful recoveries reached, declaring PHY %s dead", + __func__, phyp->path); + if (tgt != NULL) { + tgt->recover_wait = 0; + } + phyp->dead = 1; + PHY_CHANGED_AT_LOCATION(pwp, phyp, func_name, line); + RESTART_DISCOVERY(pwp); + } else { + SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY); + } +}
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_intr.c Wed Nov 18 11:25:32 2009 +0530 +++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_intr.c Wed Nov 18 07:29:02 2009 -0700 @@ -342,12 +342,13 @@ pptr = pwp->root_phys + phynum; pmcs_lock_phy(pptr); + /* + * No need to lock the primary root PHY. It can never go + * away, and we're only concerned with the port width and + * the portid, both of which only ever change in this function. + */ rp = pwp->ports[portid]; - /* rp and pptr may be the same */ - if (rp && (rp != pptr)) { - pmcs_lock_phy(rp); - } mutex_exit(&pwp->lock); pmcs_endian_transform(pwp, &af, &((uint32_t *)iomb)[4], @@ -361,9 +362,6 @@ */ if (rp) { if (rp->portid != portid) { - if (rp != pptr) { - pmcs_unlock_phy(rp); - } pmcs_unlock_phy(pptr); pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, "PortID 0x%x: PHY 0x%x SAS LINK UP IS FOR " @@ -385,10 +383,7 @@ pmcs_link_rate(IOP_EVENT_LINK_RATE(w1)); pptr->portid = portid; pptr->dead = 0; - - if (pptr != rp) { - pmcs_unlock_phy(pptr); - } + pmcs_unlock_phy(pptr); rp->width = IOP_EVENT_NPIP(w3); @@ -404,10 +399,9 @@ } /* Get our iport, if attached, and set it up */ - if (pptr != rp) { - pmcs_lock_phy(pptr); - } + pmcs_lock_phy(pptr); iport = pmcs_get_iport_by_phy(pwp, pptr); + pmcs_unlock_phy(pptr); if (iport) { pptr->iport = iport; primary = !pptr->subsidiary; @@ -427,10 +421,6 @@ "PortID 0x%x: PHY 0x%x SAS LINK UP WIDENS PORT " "TO %d PHYS", portid, phynum, rp->width); - if (pptr != rp) { - pmcs_unlock_phy(pptr); - } - pmcs_unlock_phy(rp); break; } @@ -517,6 +507,7 @@ /* Get a pointer to our iport and set it up if attached */ pmcs_lock_phy(pptr); iport = pmcs_get_iport_by_phy(pwp, pptr); + pmcs_unlock_phy(pptr); if (iport) { pptr->iport = iport; primary = !pptr->subsidiary; @@ -532,6 +523,7 @@ pmcs_rele_iport(iport); } + pmcs_lock_phy(pptr); pmcs_smhba_log_sysevent(pwp, ESC_SAS_PHY_EVENT, SAS_PHY_ONLINE, pptr); pmcs_unlock_phy(pptr); @@ -610,6 +602,7 @@ /* Get our iport, if attached, and set it up */ pmcs_lock_phy(pptr); iport = pmcs_get_iport_by_phy(pwp, pptr); + pmcs_unlock_phy(pptr); if (iport) { pptr->iport = iport; @@ -624,6 +617,7 @@ pmcs_rele_iport(iport); } + pmcs_lock_phy(pptr); pmcs_smhba_log_sysevent(pwp, ESC_SAS_PHY_EVENT, SAS_PHY_ONLINE, pptr); pmcs_unlock_phy(pptr);
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_smhba.c Wed Nov 18 11:25:32 2009 +0530 +++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_smhba.c Wed Nov 18 07:29:02 2009 -0700 @@ -58,11 +58,15 @@ } +/* + * Called with iport lock held. + */ void pmcs_smhba_add_iport_prop(pmcs_iport_t *iport, data_type_t dt, char *prop_name, void *prop_val) { ASSERT(iport != NULL); + ASSERT(mutex_owned(&iport->lock)); switch (dt) { case DATA_TYPE_INT32: @@ -84,6 +88,8 @@ "Unhandled datatype(%d) for(%s). Skipping prop update.", __func__, dt, prop_name); } + + pmcs_smhba_set_phy_props(iport); } @@ -157,9 +163,8 @@ nvlist_t **phy_props; nvlist_t *nvl; - mutex_enter(&iport->lock); + ASSERT(mutex_owned(&iport->lock)); if (iport->nphy == 0) { - mutex_exit(&iport->lock); return; } @@ -209,7 +214,6 @@ } nvlist_free(nvl); kmem_free(phy_props, sizeof (nvlist_t *) * iport->nphy); - mutex_exit(&iport->lock); kmem_free(packed_data, packed_size); }
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c Wed Nov 18 11:25:32 2009 +0530 +++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c Wed Nov 18 07:29:02 2009 -0700 @@ -64,9 +64,6 @@ static void pmcs_reap_dead_phy(pmcs_phy_t *); static pmcs_iport_t *pmcs_get_iport_by_ua(pmcs_hw_t *, char *); static boolean_t pmcs_phy_target_match(pmcs_phy_t *); -static void pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, - pmcs_xscsi_t *tgt, pmcs_hw_t *pwp, const char *func_name, int line, - char *reason_string); /* * Often used strings @@ -1972,8 +1969,8 @@ /* * Finally, insert the phy into our list */ + pmcs_unlock_phy(pptr); pmcs_add_phy_to_iport(iport, pptr); - pmcs_unlock_phy(pptr); pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL, "%s: found " "phy %d [0x%p] on iport%d, refcnt(%d)", __func__, phynum, @@ -2518,9 +2515,6 @@ */ for (iport = list_head(&pwp->iports); iport != NULL; iport = list_next(&pwp->iports, iport)) { - /* Set up the DDI properties on each phy */ - pmcs_smhba_set_phy_props(iport); - /* Set up the 'attached-port' property on the iport */ ap = kmem_zalloc(PMCS_MAX_UA_SIZE, KM_SLEEP); mutex_enter(&iport->lock); @@ -2878,6 +2872,7 @@ pptr->ds_prev_good_recoveries = 0; pptr->last_good_recovery = 0; pptr->prev_recovery = 0; + /* keep dtype */ pptr->config_stop = 0; pptr->spinup_hold = 0; @@ -4543,7 +4538,7 @@ "%s: Trying DS error recovery for tgt 0x%p", __func__, (void *)tgt); (void) pmcs_send_err_recovery_cmd(pwp, - PMCS_DEVICE_STATE_IN_RECOVERY, tgt); + PMCS_DEVICE_STATE_IN_RECOVERY, pptr, tgt); } mutex_exit(&tgt->statlock); } @@ -4583,7 +4578,7 @@ "%s: Restoring OPERATIONAL dev_state for tgt 0x%p", __func__, (void *)tgt); (void) pmcs_send_err_recovery_cmd(pwp, - PMCS_DEVICE_STATE_OPERATIONAL, tgt); + PMCS_DEVICE_STATE_OPERATIONAL, pptr, tgt); } mutex_exit(&tgt->statlock); } @@ -4719,7 +4714,8 @@ " for tgt 0x%p (status = %s)", __func__, (void *)xp, pmcs_status_str(status)); - (void) pmcs_send_err_recovery_cmd(pwp, ds, xp); + (void) pmcs_send_err_recovery_cmd(pwp, ds, + pptr, xp); } mutex_exit(&xp->statlock); } @@ -4734,7 +4730,8 @@ " for tgt 0x%p (status = %s)", __func__, (void *)xp, pmcs_status_str(status)); - (void) pmcs_send_err_recovery_cmd(pwp, ds, xp); + (void) pmcs_send_err_recovery_cmd(pwp, ds, + pptr, xp); } mutex_exit(&xp->statlock); } @@ -4897,7 +4894,7 @@ pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, "%s: Trying " "SATA DS Recovery for tgt(0x%p) for status(%s)", __func__, (void *)tgt, pmcs_status_str(status)); - (void) pmcs_send_err_recovery_cmd(pwp, ds, tgt); + (void) pmcs_send_err_recovery_cmd(pwp, ds, pptr, tgt); } mutex_exit(&tgt->statlock); @@ -6768,497 +6765,6 @@ } /* - * Get device state. Called with statlock and PHY lock held. - */ -int -pmcs_get_dev_state(pmcs_hw_t *pwp, pmcs_xscsi_t *xp, uint8_t *ds) -{ - uint32_t htag, *ptr, msg[PMCS_MSG_SIZE]; - int result; - struct pmcwork *pwrk; - pmcs_phy_t *phyp; - - pmcs_prt(pwp, PMCS_PRT_DEBUG3, NULL, xp, "%s: tgt(0x%p)", __func__, - (void *)xp); - if (xp == NULL) { - pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, xp, - "%s: Target is NULL", __func__); - return (-1); - } - - ASSERT(mutex_owned(&xp->statlock)); - phyp = xp->phy; - ASSERT(mutex_owned(&phyp->phy_lock)); - - pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp); - if (pwrk == NULL) { - pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nowrk, __func__); - return (-1); - } - pwrk->arg = msg; - pwrk->dtype = phyp->dtype; - - if (phyp->valid_device_id == 0) { - pmcs_pwork(pwp, pwrk); - pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, xp, - "%s: Invalid DeviceID", __func__); - return (-1); - } - htag = pwrk->htag; - msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, - PMCIN_GET_DEVICE_STATE)); - msg[1] = LE_32(pwrk->htag); - msg[2] = LE_32(phyp->device_id); - - mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]); - ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER); - if (ptr == NULL) { - mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]); - pmcs_pwork(pwp, pwrk); - pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nomsg, __func__); - return (-1); - } - COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE); - pwrk->state = PMCS_WORK_STATE_ONCHIP; - INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER); - mutex_exit(&xp->statlock); - pmcs_unlock_phy(phyp); - WAIT_FOR(pwrk, 1000, result); - pmcs_lock_phy(phyp); - pmcs_pwork(pwp, pwrk); - mutex_enter(&xp->statlock); - - if (result) { - pmcs_timed_out(pwp, htag, __func__); - pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, xp, - "%s: cmd timed out, returning ", __func__); - return (-1); - } - if (LE_32(msg[2]) == 0) { - *ds = (uint8_t)(LE_32(msg[4])); - if (*ds != xp->dev_state) { - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, - "%s: retrieved_ds=0x%x, target_ds=0x%x", __func__, - *ds, xp->dev_state); - } - return (0); - } else { - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, - "%s: cmd failed Status(0x%x), returning ", __func__, - LE_32(msg[2])); - return (-1); - } -} - -/* - * Set device state. Called with target's statlock and PHY lock held. - */ -int -pmcs_set_dev_state(pmcs_hw_t *pwp, pmcs_xscsi_t *xp, uint8_t ds) -{ - uint32_t htag, *ptr, msg[PMCS_MSG_SIZE]; - int result; - uint8_t pds, nds; - struct pmcwork *pwrk; - pmcs_phy_t *phyp; - - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, NULL, xp, - "%s: ds(0x%x), tgt(0x%p)", __func__, ds, (void *)xp); - if (xp == NULL) { - pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, xp, - "%s: Target is Null", __func__); - return (-1); - } - - phyp = xp->phy; - pwrk = pmcs_gwork(pwp, PMCS_TAG_TYPE_WAIT, phyp); - if (pwrk == NULL) { - pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nowrk, __func__); - return (-1); - } - if (phyp == NULL) { - pmcs_pwork(pwp, pwrk); - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, - "%s: PHY is Null", __func__); - return (-1); - } - if (phyp->valid_device_id == 0) { - pmcs_pwork(pwp, pwrk); - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, - "%s: Invalid DeviceID", __func__); - return (-1); - } - pwrk->arg = msg; - pwrk->dtype = phyp->dtype; - htag = pwrk->htag; - msg[0] = LE_32(PMCS_HIPRI(pwp, PMCS_OQ_GENERAL, - PMCIN_SET_DEVICE_STATE)); - msg[1] = LE_32(pwrk->htag); - msg[2] = LE_32(phyp->device_id); - msg[3] = LE_32(ds); - - mutex_enter(&pwp->iqp_lock[PMCS_IQ_OTHER]); - ptr = GET_IQ_ENTRY(pwp, PMCS_IQ_OTHER); - if (ptr == NULL) { - mutex_exit(&pwp->iqp_lock[PMCS_IQ_OTHER]); - pmcs_pwork(pwp, pwrk); - pmcs_prt(pwp, PMCS_PRT_ERR, phyp, xp, pmcs_nomsg, __func__); - return (-1); - } - COPY_MESSAGE(ptr, msg, PMCS_MSG_SIZE); - pwrk->state = PMCS_WORK_STATE_ONCHIP; - INC_IQ_ENTRY(pwp, PMCS_IQ_OTHER); - - mutex_exit(&xp->statlock); - pmcs_unlock_phy(phyp); - WAIT_FOR(pwrk, 1000, result); - pmcs_lock_phy(phyp); - pmcs_pwork(pwp, pwrk); - mutex_enter(&xp->statlock); - - if (result) { - pmcs_timed_out(pwp, htag, __func__); - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, - "%s: cmd timed out, returning", __func__); - return (-1); - } - if (LE_32(msg[2]) == 0) { - pds = (uint8_t)(LE_32(msg[4]) >> 4); - nds = (uint8_t)(LE_32(msg[4]) & 0x0000000f); - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, - "%s: previous_ds=0x%x, new_ds=0x%x", __func__, pds, nds); - xp->dev_state = nds; - return (0); - } else { - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, - "%s: cmd failed Status(0x%x), returning ", __func__, - LE_32(msg[2])); - return (-1); - } -} - -void -pmcs_dev_state_recovery(pmcs_hw_t *pwp, pmcs_phy_t *phyp) -{ - uint8_t ds; - int rc; - pmcs_xscsi_t *tgt; - pmcs_phy_t *pptr, *pnext, *pchild; - - /* - * First time, check to see if we're already performing recovery - */ - if (phyp == NULL) { - mutex_enter(&pwp->lock); - if (pwp->ds_err_recovering) { - mutex_exit(&pwp->lock); - SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY); - return; - } - - pwp->ds_err_recovering = 1; - pptr = pwp->root_phys; - mutex_exit(&pwp->lock); - } else { - pptr = phyp; - } - - while (pptr) { - /* - * Since ds_err_recovering is set, we can be assured these - * PHYs won't disappear on us while we do this. - */ - pmcs_lock_phy(pptr); - pchild = pptr->children; - pnext = pptr->sibling; - pmcs_unlock_phy(pptr); - - if (pchild) { - pmcs_dev_state_recovery(pwp, pchild); - } - - tgt = NULL; - pmcs_lock_phy(pptr); - - if (pptr->dead) { - goto next_phy; - } - - tgt = pptr->target; - - if (tgt != NULL) { - mutex_enter(&tgt->statlock); - if (tgt->recover_wait == 0) { - goto next_phy; - } - } - - if (pptr->prev_recovery) { - if (ddi_get_lbolt() - pptr->prev_recovery < - drv_usectohz(PMCS_DS_RECOVERY_INTERVAL)) { - pmcs_prt(pwp, PMCS_PRT_DEBUG2, pptr, tgt, - "%s: DS recovery on PHY %s " - "re-invoked too soon. Skipping...", - __func__, pptr->path); - goto next_phy; - } - } - pptr->prev_recovery = ddi_get_lbolt(); - - /* - * Step 1: Put the device into the IN_RECOVERY state - */ - rc = pmcs_get_dev_state(pwp, tgt, &ds); - if (rc != 0) { - pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, - "%s: pmcs_get_dev_state on PHY %s " - "failed (rc=%d)", - __func__, pptr->path, rc); - - pmcs_handle_ds_recovery_error(pptr, tgt, pwp, - __func__, __LINE__, "pmcs_get_dev_state"); - - goto next_phy; - } - - if ((tgt->dev_state == ds) && - (ds == PMCS_DEVICE_STATE_IN_RECOVERY)) { - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: Target 0x%p already IN_RECOVERY", __func__, - (void *)tgt); - } else { - tgt->dev_state = ds; - ds = PMCS_DEVICE_STATE_IN_RECOVERY; - rc = pmcs_send_err_recovery_cmd(pwp, ds, tgt); - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: pmcs_send_err_recovery_cmd " - "result(%d) tgt(0x%p) ds(0x%x) tgt->ds(0x%x)", - __func__, rc, (void *)tgt, ds, tgt->dev_state); - - if (rc) { - pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, - "%s: pmcs_send_err_recovery_cmd to PHY %s " - "failed (rc=%d)", - __func__, pptr->path, rc); - - pmcs_handle_ds_recovery_error(pptr, tgt, pwp, - __func__, __LINE__, - "pmcs_send_err_recovery_cmd"); - - goto next_phy; - } - } - - /* - * Step 2: Perform a hard reset on the PHY - */ - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: Issue HARD_RESET to PHY %s", __func__, pptr->path); - /* - * Must release statlock here because pmcs_reset_phy will - * drop and reacquire the PHY lock. - */ - mutex_exit(&tgt->statlock); - rc = pmcs_reset_phy(pwp, pptr, PMCS_PHYOP_HARD_RESET); - mutex_enter(&tgt->statlock); - if (rc) { - pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, - "%s: HARD_RESET to PHY %s failed (rc=%d)", - __func__, pptr->path, rc); - - pmcs_handle_ds_recovery_error(pptr, tgt, pwp, - __func__, __LINE__, "HARD_RESET"); - - goto next_phy; - } - - /* - * Step 3: Abort all I/Os to the device - */ - if (pptr->abort_all_start) { - while (pptr->abort_all_start) { - pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, - "%s: Waiting for outstanding ABORT_ALL on " - "PHY 0x%p", __func__, (void *)pptr); - cv_wait(&pptr->abort_all_cv, &pptr->phy_lock); - } - } else { - mutex_exit(&tgt->statlock); - rc = pmcs_abort(pwp, pptr, pptr->device_id, 1, 1); - mutex_enter(&tgt->statlock); - if (rc != 0) { - pptr->abort_pending = 1; - pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, - "%s: pmcs_abort to PHY %s failed (rc=%d)", - __func__, pptr->path, rc); - - pmcs_handle_ds_recovery_error(pptr, tgt, - pwp, __func__, __LINE__, "pmcs_abort"); - - goto next_phy; - } - } - - /* - * Step 4: Set the device back to OPERATIONAL state - */ - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: Set PHY/tgt 0x%p/0x%p to OPERATIONAL state", - __func__, (void *)pptr, (void *)tgt); - rc = pmcs_set_dev_state(pwp, tgt, - PMCS_DEVICE_STATE_OPERATIONAL); - if (rc == 0) { - tgt->recover_wait = 0; - - pptr->ds_recovery_retries = 0; - if ((pptr->ds_prev_good_recoveries == 0) || - (ddi_get_lbolt() - pptr->last_good_recovery > - drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME))) { - pptr->last_good_recovery = ddi_get_lbolt(); - pptr->ds_prev_good_recoveries = 1; - } else if (ddi_get_lbolt() < pptr->last_good_recovery + - drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME)) { - pptr->ds_prev_good_recoveries++; - } else { - pmcs_handle_ds_recovery_error(pptr, tgt, pwp, - __func__, __LINE__, "Max recovery" - "attempts reached. Declaring PHY dead"); - } - - /* - * Don't bother to run the work queues if the PHY - * is dead. - */ - if (tgt->phy && !tgt->phy->dead) { - SCHEDULE_WORK(pwp, PMCS_WORK_RUN_QUEUES); - (void) ddi_taskq_dispatch(pwp->tq, pmcs_worker, - pwp, DDI_NOSLEEP); - } - } else { - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: Failed to SET tgt 0x%p to OPERATIONAL state", - __func__, (void *)tgt); - - pmcs_handle_ds_recovery_error(pptr, tgt, pwp, - __func__, __LINE__, "SET tgt to OPERATIONAL state"); - - goto next_phy; - } - -next_phy: - if (tgt) { - mutex_exit(&tgt->statlock); - } - pmcs_unlock_phy(pptr); - pptr = pnext; - } - - /* - * Only clear ds_err_recovering if we're exiting for good and not - * just unwinding from recursion - */ - if (phyp == NULL) { - mutex_enter(&pwp->lock); - pwp->ds_err_recovering = 0; - mutex_exit(&pwp->lock); - } -} - -/* - * Called with target's statlock and PHY lock held. - */ -int -pmcs_send_err_recovery_cmd(pmcs_hw_t *pwp, uint8_t dev_state, pmcs_xscsi_t *tgt) -{ - pmcs_phy_t *pptr; - int rc = -1; - - ASSERT(tgt != NULL); - ASSERT(mutex_owned(&tgt->statlock)); - - if (tgt->recovering) { - return (0); - } - - tgt->recovering = 1; - pptr = tgt->phy; - - if (pptr == NULL) { - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: PHY is Null", __func__); - return (-1); - } - - ASSERT(mutex_owned(&pptr->phy_lock)); - - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: ds: 0x%x, tgt ds(0x%x)", __func__, dev_state, tgt->dev_state); - - switch (dev_state) { - case PMCS_DEVICE_STATE_IN_RECOVERY: - if (tgt->dev_state == PMCS_DEVICE_STATE_IN_RECOVERY) { - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: Target 0x%p already IN_RECOVERY", __func__, - (void *)tgt); - rc = 0; /* This is not an error */ - goto no_action; - } - - rc = pmcs_set_dev_state(pwp, tgt, - PMCS_DEVICE_STATE_IN_RECOVERY); - if (rc != 0) { - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s(1): Failed to SET tgt(0x%p) to _IN_RECOVERY", - __func__, (void *)tgt); - } - - break; - - case PMCS_DEVICE_STATE_OPERATIONAL: - if (tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) { - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: Target 0x%p not ready to go OPERATIONAL", - __func__, (void *)tgt); - goto no_action; - } - - rc = pmcs_set_dev_state(pwp, tgt, - PMCS_DEVICE_STATE_OPERATIONAL); - tgt->reset_success = 1; - if (rc != 0) { - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s(2): Failed to SET tgt(0x%p) to OPERATIONAL", - __func__, (void *)tgt); - tgt->reset_success = 0; - } - - break; - - case PMCS_DEVICE_STATE_NON_OPERATIONAL: - PHY_CHANGED(pwp, pptr); - RESTART_DISCOVERY(pwp); - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: Device at %s is non-operational", - __func__, pptr->path); - tgt->dev_state = PMCS_DEVICE_STATE_NON_OPERATIONAL; - rc = 0; - - break; - - default: - pmcs_prt(pwp, PMCS_PRT_DEBUG_DEV_STATE, pptr, tgt, - "%s: Invalid state requested (%d)", __func__, - dev_state); - break; - - } - -no_action: - tgt->recovering = 0; - return (rc); -} - -/* * pmcs_lock_phy_impl * * This function is what does the actual work for pmcs_lock_phy. It will @@ -7478,277 +6984,6 @@ } } - -/* - * Start ssp event recovery. We have to schedule recovery operation because - * it involves sending multiple commands to device and we should not do it - * in the interrupt context. - * If it is failure of a recovery command, let the recovery thread deal with it. - * Called with pmcwork lock held. - */ - -void -pmcs_start_ssp_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb, - size_t amt) -{ - pmcs_xscsi_t *tgt = pwrk->xp; - uint32_t event = LE_32(iomb[2]); - pmcs_phy_t *pptr = pwrk->phy; - uint32_t tag; - - if (tgt != NULL) { - mutex_enter(&tgt->statlock); - if (!tgt->assigned) { - if (pptr) { - pmcs_dec_phy_ref_count(pptr); - } - pptr = NULL; - pwrk->phy = NULL; - } - mutex_exit(&tgt->statlock); - } - if (pptr == NULL) { - /* - * No target, need to run RE-DISCOVERY here. - */ - if (pwrk->state != PMCS_WORK_STATE_TIMED_OUT) { - pwrk->state = PMCS_WORK_STATE_INTR; - } - /* - * Although we cannot mark phy to force abort nor mark phy - * as changed, killing of a target would take care of aborting - * commands for the device. - */ - pmcs_prt(pwp, PMCS_PRT_DEBUG, NULL, NULL, - "%s: No valid target for event processing found. " - "Scheduling RECONFIGURE", __func__); - pmcs_pwork(pwp, pwrk); - RESTART_DISCOVERY(pwp); - return; - } else { - pmcs_lock_phy(pptr); - mutex_enter(&tgt->statlock); - if (event == PMCOUT_STATUS_OPEN_CNX_ERROR_IT_NEXUS_LOSS) { - if (tgt->dev_state != - PMCS_DEVICE_STATE_NON_OPERATIONAL) { - pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, - "%s: Device at %s is non-operational", - __func__, pptr->path); - tgt->dev_state = - PMCS_DEVICE_STATE_NON_OPERATIONAL; - } - pptr->abort_pending = 1; - mutex_exit(&tgt->statlock); - pmcs_unlock_phy(pptr); - mutex_exit(&pwrk->lock); - SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE); - RESTART_DISCOVERY(pwp); - return; - } - - /* - * If this command is run in WAIT mode, it is a failing recovery - * command. If so, just wake up recovery thread waiting for - * command completion. - */ - tag = PMCS_TAG_TYPE(pwrk->htag); - if (tag == PMCS_TAG_TYPE_WAIT) { - pwrk->htag |= PMCS_TAG_DONE; - if (pwrk->arg && amt) { - (void) memcpy(pwrk->arg, iomb, amt); - } - cv_signal(&pwrk->sleep_cv); - mutex_exit(&tgt->statlock); - pmcs_unlock_phy(pptr); - mutex_exit(&pwrk->lock); - return; - } - - /* - * To recover from primary failures, - * we need to schedule handling events recovery. - */ - tgt->event_recovery = 1; - mutex_exit(&tgt->statlock); - pmcs_unlock_phy(pptr); - pwrk->ssp_event = event; - pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, - "%s: Scheduling SSP event recovery for tgt(0x%p) " - "pwrk(%p) tag(0x%x)", __func__, (void *)tgt, (void *)pwrk, - pwrk->htag); - mutex_exit(&pwrk->lock); - SCHEDULE_WORK(pwp, PMCS_WORK_SSP_EVT_RECOVERY); - } - - /* Work cannot be completed until event recovery is completed. */ -} - -/* - * SSP target event recovery - * Entered with a phy lock held - * Pwrk lock is not needed - pwrk is on the target aq and no other thread - * will do anything with it until this thread starts the chain of recovery. - * Statlock may be acquired and released. - */ - -void -pmcs_tgt_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk) -{ - pmcs_phy_t *pptr = pwrk->phy; - pmcs_cmd_t *sp = pwrk->arg; - pmcs_lun_t *lun = sp->cmd_lun; - pmcs_xscsi_t *tgt = pwrk->xp; - uint32_t event; - uint32_t htag; - uint32_t status; - uint8_t dstate; - int rv; - - ASSERT(pwrk->arg != NULL); - ASSERT(pwrk->xp != NULL); - pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, "%s: event recovery for " - "target 0x%p", __func__, (void *)pwrk->xp); - htag = pwrk->htag; - event = pwrk->ssp_event; - pwrk->ssp_event = 0xffffffff; - if (event == PMCOUT_STATUS_XFER_ERR_BREAK || - event == PMCOUT_STATUS_XFER_ERR_PHY_NOT_READY || - event == PMCOUT_STATUS_XFER_ERROR_CMD_ISSUE_ACK_NAK_TIMEOUT) { - /* Command may be still pending on device */ - rv = pmcs_ssp_tmf(pwp, pptr, SAS_QUERY_TASK, htag, - lun->lun_num, &status); - if (rv != 0) { - goto out; - } - if (status == SAS_RSP_TMF_COMPLETE) { - /* Command NOT pending on a device */ - pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, - "%s: No pending command for tgt 0x%p", - __func__, (void *)tgt); - /* Nothing more to do, just abort it on chip */ - htag = 0; - } - } - /* - * All other events left the command pending in the host - * Send abort task and abort it on the chip - */ - if (htag != 0) { - if (pmcs_ssp_tmf(pwp, pptr, SAS_ABORT_TASK, htag, - lun->lun_num, &status)) - goto out; - } - (void) pmcs_abort(pwp, pptr, pwrk->htag, 0, 1); - /* - * Abort either took care of work completion, or put device in - * a recovery state - */ - return; -out: - /* Abort failed, do full device recovery */ - mutex_enter(&tgt->statlock); - if (!pmcs_get_dev_state(pwp, tgt, &dstate)) - tgt->dev_state = dstate; - - if ((tgt->dev_state != PMCS_DEVICE_STATE_IN_RECOVERY) && - (tgt->dev_state != PMCS_DEVICE_STATE_NON_OPERATIONAL)) { - pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt, - "%s: Setting IN_RECOVERY for tgt 0x%p", - __func__, (void *)tgt); - (void) pmcs_send_err_recovery_cmd(pwp, - PMCS_DEVICE_STATE_IN_RECOVERY, tgt); - } - mutex_exit(&tgt->statlock); -} - -/* - * SSP event recovery task. - */ -void -pmcs_ssp_event_recovery(pmcs_hw_t *pwp) -{ - int idx; - pmcs_xscsi_t *tgt; - pmcs_cmd_t *cp; - pmcwork_t *pwrk; - pmcs_phy_t *pphy; - int er_flag; - uint32_t idxpwrk; - -restart: - for (idx = 0; idx < pwp->max_dev; idx++) { - mutex_enter(&pwp->lock); - tgt = pwp->targets[idx]; - mutex_exit(&pwp->lock); - if (tgt != NULL) { - mutex_enter(&tgt->statlock); - if (!tgt->assigned) { - mutex_exit(&tgt->statlock); - continue; - } - pphy = tgt->phy; - er_flag = tgt->event_recovery; - mutex_exit(&tgt->statlock); - if (pphy != NULL && er_flag != 0) { - pmcs_lock_phy(pphy); - mutex_enter(&tgt->statlock); - pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt, - "%s: found target(0x%p)", __func__, - (void *) tgt); - - /* Check what cmd expects recovery */ - mutex_enter(&tgt->aqlock); - STAILQ_FOREACH(cp, &tgt->aq, cmd_next) { - /* - * Since work structure is on this - * target aq, and only this thread - * is accessing it now, we do not need - * to lock it - */ - idxpwrk = PMCS_TAG_INDEX(cp->cmd_tag); - pwrk = &pwp->work[idxpwrk]; - if (pwrk->htag != cp->cmd_tag) { - /* - * aq may contain TMF commands, - * so we may not find work - * structure with htag - */ - break; - } - if (pwrk->ssp_event != 0 && - pwrk->ssp_event != - PMCS_REC_EVENT) { - pmcs_prt(pwp, - PMCS_PRT_DEBUG, pphy, tgt, - "%s: pwrk(%p) ctag(0x%x)", - __func__, (void *) pwrk, - cp->cmd_tag); - mutex_exit(&tgt->aqlock); - mutex_exit(&tgt->statlock); - pmcs_tgt_event_recovery( - pwp, pwrk); - /* - * We dropped statlock, so - * restart scanning from scratch - */ - pmcs_unlock_phy(pphy); - goto restart; - } - } - mutex_exit(&tgt->aqlock); - tgt->event_recovery = 0; - pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt, - "%s: end of SSP event recovery for " - "target(0x%p)", __func__, (void *) tgt); - mutex_exit(&tgt->statlock); - pmcs_unlock_phy(pphy); - } - } - } - pmcs_prt(pwp, PMCS_PRT_DEBUG, pphy, tgt, "%s: " - "end of SSP event recovery for pwp(0x%p)", __func__, (void *) pwp); -} - /*ARGSUSED2*/ int pmcs_phy_constructor(void *buf, void *arg, int kmflags) @@ -8176,9 +7411,9 @@ ASSERT(phyp); ASSERT(!list_link_active(&phyp->list_node)); iport->nphy++; + list_insert_tail(&iport->phys, phyp); pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS, &iport->nphy); - list_insert_tail(&iport->phys, phyp); mutex_enter(&iport->refcnt_lock); iport->refcnt++; mutex_exit(&iport->refcnt_lock); @@ -8215,9 +7450,9 @@ ASSERT(iport->nphy > 0); ASSERT(list_link_active(&phyp->list_node)); iport->nphy--; + list_remove(&iport->phys, phyp); pmcs_smhba_add_iport_prop(iport, DATA_TYPE_INT32, PMCS_NUM_PHYS, &iport->nphy); - list_remove(&iport->phys, phyp); pmcs_rele_iport(iport); } @@ -8250,67 +7485,3 @@ return (rval); } - -void -pmcs_start_dev_state_recovery(pmcs_xscsi_t *xp, pmcs_phy_t *phyp) -{ - ASSERT(mutex_owned(&xp->statlock)); - ASSERT(xp->pwp != NULL); - - if (xp->recover_wait == 0) { - pmcs_prt(xp->pwp, PMCS_PRT_DEBUG_DEV_STATE, phyp, xp, - "%s: Start ds_recovery for tgt 0x%p/PHY 0x%p (%s)", - __func__, (void *)xp, (void *)phyp, phyp->path); - xp->recover_wait = 1; - - /* - * Rather than waiting for the watchdog timer, we'll - * kick it right now. - */ - SCHEDULE_WORK(xp->pwp, PMCS_WORK_DS_ERR_RECOVERY); - (void) ddi_taskq_dispatch(xp->pwp->tq, pmcs_worker, xp->pwp, - DDI_NOSLEEP); - } -} - -/* - * Increment the phy ds error retry count. - * If too many retries, mark phy dead and restart discovery; - * otherwise schedule ds recovery. - */ -static void -pmcs_handle_ds_recovery_error(pmcs_phy_t *phyp, pmcs_xscsi_t *tgt, - pmcs_hw_t *pwp, const char *func_name, int line, char *reason_string) -{ - ASSERT(mutex_owned(&phyp->phy_lock)); - ASSERT((tgt == NULL) || mutex_owned(&tgt->statlock)); - - phyp->ds_recovery_retries++; - - if (phyp->ds_recovery_retries > PMCS_MAX_DS_RECOVERY_RETRIES) { - pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt, - "%s: retry limit reached after %s to PHY %s failed", - func_name, reason_string, phyp->path); - if (tgt != NULL) { - tgt->recover_wait = 0; - } - phyp->dead = 1; - PHY_CHANGED_AT_LOCATION(pwp, phyp, func_name, line); - RESTART_DISCOVERY(pwp); - } else if ((phyp->ds_prev_good_recoveries > - PMCS_MAX_DS_RECOVERY_RETRIES) && - (phyp->last_good_recovery + drv_usectohz(PMCS_MAX_DS_RECOVERY_TIME) - < ddi_get_lbolt())) { - pmcs_prt(pwp, PMCS_PRT_DEBUG, phyp, tgt, "%s: max number of " - "successful recoveries reached, declaring PHY %s dead", - __func__, phyp->path); - if (tgt != NULL) { - tgt->recover_wait = 0; - } - phyp->dead = 1; - PHY_CHANGED_AT_LOCATION(pwp, phyp, func_name, line); - RESTART_DISCOVERY(pwp); - } else { - SCHEDULE_WORK(pwp, PMCS_WORK_DS_ERR_RECOVERY); - } -}
--- a/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_iomb.h Wed Nov 18 11:25:32 2009 +0530 +++ b/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_iomb.h Wed Nov 18 07:29:02 2009 -0700 @@ -468,6 +468,7 @@ /* * Device State definitions */ +#define PMCS_DEVICE_STATE_NOT_AVAILABLE 0x0 /* Unconfigured tgt */ #define PMCS_DEVICE_STATE_OPERATIONAL 0x1 #define PMCS_DEVICE_STATE_PORT_IN_RESET 0x2 #define PMCS_DEVICE_STATE_IN_RECOVERY 0x3
--- a/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_proto.h Wed Nov 18 11:25:32 2009 +0530 +++ b/usr/src/uts/common/sys/scsi/adapters/pmcs/pmcs_proto.h Wed Nov 18 07:29:02 2009 -0700 @@ -305,10 +305,9 @@ void pmcs_flush_target_queues(pmcs_hw_t *, pmcs_xscsi_t *, uint8_t); boolean_t pmcs_iport_has_targets(pmcs_hw_t *, pmcs_iport_t *); void pmcs_free_dma_chunklist(pmcs_hw_t *); -int pmcs_get_dev_state(pmcs_hw_t *, pmcs_xscsi_t *, uint8_t *); -int pmcs_set_dev_state(pmcs_hw_t *, pmcs_xscsi_t *, uint8_t); void pmcs_dev_state_recovery(pmcs_hw_t *, pmcs_phy_t *); -int pmcs_send_err_recovery_cmd(pmcs_hw_t *, uint8_t, pmcs_xscsi_t *); +int pmcs_send_err_recovery_cmd(pmcs_hw_t *, uint8_t, pmcs_phy_t *, + pmcs_xscsi_t *); void pmcs_start_ssp_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb, size_t amt); void pmcs_ssp_event_recovery(pmcs_hw_t *);