changeset 12874:b8f151378367

6956485 format command hangs after a few chassis poweroff/on operations on expansion chassis configuration 6957428 Max-maguro+SAS2: System crash while other head was upgrading from Q1.1.0 to Q1.2.0
author Jesse Butler <jesse.butler@oracle.com>
date Mon, 19 Jul 2010 14:55:32 -0600
parents 451918cc0343
children 5783b6fe7ed5
files usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_ds.c usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c
diffstat 3 files changed, 49 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_ds.c	Mon Jul 19 13:20:31 2010 -0700
+++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_ds.c	Mon Jul 19 14:55:32 2010 -0600
@@ -599,7 +599,7 @@
  * it involves sending multiple commands to device and we should not do it
  * in the interrupt context.
  * If it is failure of a recovery command, let the recovery thread deal with it.
- * Called with pmcwork lock held.
+ * Called with the work lock held.
  */
 void
 pmcs_start_ssp_event_recovery(pmcs_hw_t *pwp, pmcwork_t *pwrk, uint32_t *iomb,
@@ -642,7 +642,10 @@
 		RESTART_DISCOVERY(pwp);
 		return;
 	} else {
+		/* We have a phy pointer, we'll need to lock it */
+		mutex_exit(&pwrk->lock);
 		pmcs_lock_phy(pptr);
+		mutex_enter(&pwrk->lock);
 		if (tgt != NULL) {
 			mutex_enter(&tgt->statlock);
 		}
@@ -659,8 +662,8 @@
 			if (tgt != NULL) {
 				mutex_exit(&tgt->statlock);
 			}
+			mutex_exit(&pwrk->lock);
 			pmcs_unlock_phy(pptr);
-			mutex_exit(&pwrk->lock);
 			SCHEDULE_WORK(pwp, PMCS_WORK_ABORT_HANDLE);
 			RESTART_DISCOVERY(pwp);
 			return;
@@ -681,8 +684,8 @@
 			if (tgt != NULL) {
 				mutex_exit(&tgt->statlock);
 			}
+			mutex_exit(&pwrk->lock);
 			pmcs_unlock_phy(pptr);
-			mutex_exit(&pwrk->lock); /* XXX: Is this right??? */
 			return;
 		}
 
@@ -691,6 +694,8 @@
 			    "%s: Not scheduling SSP event recovery for NULL tgt"
 			    " pwrk(%p) tag(0x%x)", __func__, (void *)pwrk,
 			    pwrk->htag);
+			mutex_exit(&pwrk->lock);
+			pmcs_unlock_phy(pptr);
 			return;
 		}
 
@@ -706,6 +711,7 @@
 			    __func__, pwrk->htag);
 
 			mutex_exit(&tgt->statlock);
+			/* Note: work remains locked for the callback */
 			pmcs_unlock_phy(pptr);
 			pwrk->ssp_event = event;
 			callback = (pmcs_cb_t)pwrk->ptr;
@@ -719,13 +725,13 @@
 		 */
 		tgt->event_recovery = 1;
 		mutex_exit(&tgt->statlock);
+		pwrk->ssp_event = event;
+		mutex_exit(&pwrk->lock);
 		pmcs_unlock_phy(pptr);
-		pwrk->ssp_event = event;
 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, tgt,
 		    "%s: Scheduling SSP event recovery for tgt(0x%p) "
 		    "pwrk(%p) tag(0x%x)", __func__, (void *)tgt, (void *)pwrk,
 		    pwrk->htag);
-		mutex_exit(&pwrk->lock);
 		SCHEDULE_WORK(pwp, PMCS_WORK_SSP_EVT_RECOVERY);
 	}
 
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c	Mon Jul 19 13:20:31 2010 -0700
+++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_scsa.c	Mon Jul 19 14:55:32 2010 -0600
@@ -1120,10 +1120,7 @@
 	pmcs_unlock_phy(pptr);
 	WAIT_FOR(pwrk, smp_pkt->smp_pkt_timeout * 1000, result);
 	pmcs_pwork(pwp, pwrk);
-	pmcs_smp_release(iport);
-	pmcs_rele_iport(iport);
 	pmcs_lock_phy(pptr);
-
 	if (result) {
 		pmcs_timed_out(pwp, htag, __func__);
 		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
@@ -1135,11 +1132,15 @@
 			    "%s: Issuing SMP ABORT for htag 0x%08x",
 			    __func__, htag);
 		}
+		pmcs_smp_release(iport);
+		pmcs_rele_iport(iport);
 		pmcs_unlock_phy(pptr);
 		pmcs_release_scratch(pwp);
 		smp_pkt->smp_pkt_reason = ETIMEDOUT;
 		return (DDI_FAILURE);
 	}
+	pmcs_smp_release(iport);
+	pmcs_rele_iport(iport);
 	status = LE_32(msg[2]);
 	if (status == PMCOUT_STATUS_OVERFLOW) {
 		status = PMCOUT_STATUS_OK;
--- a/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c	Mon Jul 19 13:20:31 2010 -0700
+++ b/usr/src/uts/common/io/scsi/adapters/pmcs/pmcs_subr.c	Mon Jul 19 14:55:32 2010 -0600
@@ -846,10 +846,7 @@
 	pmcs_unlock_phy(pptr);
 	WAIT_FOR(pwrk, 1000, result);
 	pmcs_pwork(pwp, pwrk);
-	pmcs_smp_release(iport);
-	pmcs_rele_iport(iport);
 	pmcs_lock_phy(pptr);
-
 	if (result) {
 		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, pmcs_timeo, __func__);
 
@@ -862,8 +859,12 @@
 			    "%s: Issuing SMP ABORT for htag 0x%08x",
 			    __func__, htag);
 		}
+		pmcs_smp_release(iport);
+		pmcs_rele_iport(iport);
 		return (EIO);
 	}
+	pmcs_smp_release(iport);
+	pmcs_rele_iport(iport);
 	status = LE_32(iomb[stsoff]);
 
 	if (status != PMCOUT_STATUS_OK) {
@@ -4218,10 +4219,23 @@
 	pmcs_unlock_phy(pptr);
 	WAIT_FOR(pwrk, 1000, result);
 	pmcs_pwork(pwp, pwrk);
+	pmcs_lock_phy(pptr);
+	if (result) {
+		pmcs_timed_out(pwp, htag, __func__);
+		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
+		    "%s: Issuing SMP ABORT for htag 0x%08x", __func__, htag);
+		if (pmcs_abort(pwp, pptr, htag, 0, 1)) {
+			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
+			    "%s: SMP ABORT failed for cmd (htag 0x%08x)",
+			    __func__, htag);
+		}
+		pmcs_smp_release(iport);
+		pmcs_rele_iport(iport);
+		result = 0;
+		goto out;
+	}
 	pmcs_smp_release(iport);
 	pmcs_rele_iport(iport);
-	pmcs_lock_phy(pptr);
-
 
 	mutex_enter(&pwp->config_lock);
 	if (pwp->config_changed) {
@@ -4232,22 +4246,6 @@
 	}
 	mutex_exit(&pwp->config_lock);
 
-	if (result) {
-		pmcs_timed_out(pwp, htag, __func__);
-		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
-		    "%s: Issuing SMP ABORT for htag 0x%08x", __func__, htag);
-		if (pmcs_abort(pwp, pptr, htag, 0, 0)) {
-			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
-			    "%s: Unable to issue SMP ABORT for htag 0x%08x",
-			    __func__, htag);
-		} else {
-			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
-			    "%s: Issuing SMP ABORT for htag 0x%08x",
-			    __func__, htag);
-		}
-		result = 0;
-		goto out;
-	}
 	ptr = (void *)pwp->scratch;
 	status = LE_32(ptr[2]);
 	if (status == PMCOUT_STATUS_UNDERFLOW ||
@@ -4457,9 +4455,22 @@
 	pmcs_unlock_phy(expander);
 	WAIT_FOR(pwrk, 1000, result);
 	pmcs_pwork(pwp, pwrk);
+	pmcs_lock_phy(expander);
+	if (result) {
+		pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
+		    "%s: Issuing SMP ABORT for htag 0x%08x", __func__, htag);
+		if (pmcs_abort(pwp, pptr, htag, 0, 1)) {
+			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
+			    "%s: SMP ABORT failed for cmd (htag 0x%08x)",
+			    __func__, htag);
+		}
+		pmcs_smp_release(iport);
+		pmcs_rele_iport(iport);
+		result = -ETIMEDOUT;
+		goto out;
+	}
 	pmcs_smp_release(iport);
 	pmcs_rele_iport(iport);
-	pmcs_lock_phy(expander);
 
 	mutex_enter(&pwp->config_lock);
 	if (pwp->config_changed) {
@@ -4468,22 +4479,8 @@
 		result = 0;
 		goto out;
 	}
+
 	mutex_exit(&pwp->config_lock);
-
-	if (result) {
-		pmcs_prt(pwp, PMCS_PRT_DEBUG, pptr, NULL, pmcs_timeo, __func__);
-		if (pmcs_abort(pwp, expander, htag, 0, 0)) {
-			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
-			    "%s: Unable to issue SMP ABORT for htag 0x%08x",
-			    __func__, htag);
-		} else {
-			pmcs_prt(pwp, PMCS_PRT_DEBUG_CONFIG, pptr, NULL,
-			    "%s: Issuing SMP ABORT for htag 0x%08x",
-			    __func__, htag);
-		}
-		result = -ETIMEDOUT;
-		goto out;
-	}
 	ptr = (void *)pwp->scratch;
 	/*
 	 * Point roff to the DMA offset for returned data