changeset 6713:87563ee4487b

6700930 Glendale panics during "Confguring /Dev" during boot net with FW 7.1.0.g 6702987 tx_ring lock is acquired ahead of the serializer after 6405398
author speer
date Mon, 26 May 2008 20:46:17 -0700
parents 79afecec3f3c
children 20dfd2e95b2f
files usr/src/uts/common/io/nxge/nxge_hio.c usr/src/uts/common/io/nxge/nxge_send.c usr/src/uts/common/io/nxge/nxge_txdma.c usr/src/uts/common/sys/nxge/nxge_txdma.h
diffstat 4 files changed, 96 insertions(+), 67 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/io/nxge/nxge_hio.c	Mon May 26 17:53:26 2008 -0700
+++ b/usr/src/uts/common/io/nxge/nxge_hio.c	Mon May 26 20:46:17 2008 -0700
@@ -900,7 +900,8 @@
 		nxge->nxge_hw_p->hio = (uintptr_t)nhd;
 	}
 
-	if (nxge->environs == SOLARIS_DOMAIN) {
+	if ((nxge->environs == SOLARIS_DOMAIN) &&
+	    (nxge->niu_type == N2_NIU)) {
 		if (nxge->niu_hsvc_available == B_TRUE) {
 			hsvc_info_t *niu_hsvc = &nxge->niu_hsvc;
 			if (niu_hsvc->hsvc_major == 1 &&
@@ -1067,8 +1068,8 @@
 	fp = &nhd->hio.vr;
 	if ((hv_rv = (*fp->assign)(vr->region, cookie, &vr->cookie))) {
 		NXGE_ERROR_MSG((nxge, HIO_CTL,
-			"nx_hio_share_assign: "
-			"vr->assign() returned %d", hv_rv));
+		    "nx_hio_share_assign: "
+		    "vr->assign() returned %d", hv_rv));
 		nxge_hio_unshare((vr_handle_t)vr);
 		return (-EIO);
 	}
@@ -1083,7 +1084,6 @@
 		while (dc) {
 			hv_rv = (*tx->assign)
 			    (vr->cookie, dc->channel, &slot);
-cmn_err(CE_CONT, "tx->assign(%d, %d)", dc->channel, dc->page);
 			if (hv_rv != 0) {
 				NXGE_ERROR_MSG((nxge, NXGE_ERR_CTL,
 				    "nx_hio_share_assign: "
@@ -1112,7 +1112,6 @@
 		while (dc) {
 			hv_rv = (*rx->assign)
 			    (vr->cookie, dc->channel, &slot);
-cmn_err(CE_CONT, "rx->assign(%d, %d)", dc->channel, dc->page);
 			if (hv_rv != 0) {
 				NXGE_ERROR_MSG((nxge, NXGE_ERR_CTL,
 				    "nx_hio_share_assign: "
@@ -1131,7 +1130,6 @@
 		}
 	}
 
-	cmn_err(CE_CONT, "tmap %lx, rmap %lx", *tmap, *rmap);
 	return (0);
 }
 
@@ -1537,6 +1535,7 @@
 {
 	nxge_grp_set_t *set = &nxge->tx_set;
 	tx_ring_t *ring;
+	int count;
 
 	NXGE_DEBUG_MSG((nxge, HIO_CTL, "==> nxge_hio_tdc_share"));
 
@@ -1544,37 +1543,26 @@
 	 * Wait until this channel is idle.
 	 */
 	ring = nxge->tx_rings->rings[channel];
-	MUTEX_ENTER(&ring->lock);
-	switch (ring->tx_ring_state) {
-		int count;
-	case TX_RING_STATE_OFFLINE:
-		break;
-	case TX_RING_STATE_IDLE:
-		ring->tx_ring_state = TX_RING_STATE_OFFLINE;
-		break;
-	case TX_RING_STATE_BUSY:
-		/* 30 seconds */
-		for (count = 30 * 1000; count; count--) {
-			MUTEX_EXIT(&ring->lock);
-			drv_usecwait(1000); /* 1 millisecond */
-			MUTEX_ENTER(&ring->lock);
-			if (ring->tx_ring_state == TX_RING_STATE_IDLE) {
-				ring->tx_ring_state = TX_RING_STATE_OFFLINE;
-				break;
-			}
+
+	/*
+	 * Wait for 30 seconds.
+	 */
+	(void) atomic_swap_32(&ring->tx_ring_offline, NXGE_TX_RING_OFFLINING);
+	for (count = 30 * 1000; count; count--) {
+		if (ring->tx_ring_offline & NXGE_TX_RING_OFFLINED) {
+			break;
 		}
-		if (count == 0) {
-			MUTEX_EXIT(&ring->lock);
-			NXGE_ERROR_MSG((nxge, NXGE_ERR_CTL, "nx_hio_tdc_share: "
-			    "Tx ring %d was always BUSY", channel));
-			return (-EIO);
-		}
-		break;
-	default:
-		MUTEX_EXIT(&ring->lock);
+
+		drv_usecwait(1000);
+	}
+
+	if (count == 0) {
+		(void) atomic_swap_32(&ring->tx_ring_offline,
+		    NXGE_TX_RING_ONLINE);
+		NXGE_ERROR_MSG((nxge, NXGE_ERR_CTL, "nx_hio_tdc_share: "
+		    "Tx ring %d was always BUSY", channel));
 		return (-EIO);
 	}
-	MUTEX_EXIT(&ring->lock);
 
 	if (nxge_intr_remove(nxge, VP_BOUND_TX, channel) != NXGE_OK) {
 		NXGE_ERROR_MSG((nxge, NXGE_ERR_CTL, "nx_hio_tdc_share: "
--- a/usr/src/uts/common/io/nxge/nxge_send.c	Mon May 26 17:53:26 2008 -0700
+++ b/usr/src/uts/common/io/nxge/nxge_send.c	Mon May 26 20:46:17 2008 -0700
@@ -233,6 +233,15 @@
 	lso_ngathers = 0;
 
 	MUTEX_ENTER(&tx_ring_p->lock);
+
+	if (isLDOMservice(nxgep)) {
+		if (tx_ring_p->tx_ring_offline) {
+			freemsg(mp);
+			MUTEX_EXIT(&tx_ring_p->lock);
+			return (status);
+		}
+	}
+
 	cur_index_lso = tx_ring_p->wr_index;
 	lso_tail_wrap = tx_ring_p->wr_index_wrap;
 start_again:
@@ -284,10 +293,21 @@
 			    tx_ring_p->tdc));
 			goto nxge_start_fail_lso;
 		} else {
+			boolean_t skip_sched = B_FALSE;
+
 			cas32((uint32_t *)&tx_ring_p->queueing, 0, 1);
 			tdc_stats->tx_no_desc++;
+			if (isLDOMservice(nxgep) &&
+				tx_ring_p->tx_ring_offline) {
+				(void) atomic_swap_32(
+				    &tx_ring_p->tx_ring_offline,
+				    NXGE_TX_RING_OFFLINED);
+				skip_sched = B_TRUE;
+			}
+
 			MUTEX_EXIT(&tx_ring_p->lock);
-			if (nxgep->resched_needed && !nxgep->resched_running) {
+			if (nxgep->resched_needed &&
+			    !nxgep->resched_running && !skip_sched) {
 				nxgep->resched_running = B_TRUE;
 				ddi_trigger_softintr(nxgep->resched_id);
 			}
@@ -965,7 +985,9 @@
 	tx_ring_p->descs_pending += ngathers;
 	tdc_stats->tx_starts++;
 
-	tx_ring_p->tx_ring_state = TX_RING_STATE_IDLE;
+	if (isLDOMservice(nxgep) && tx_ring_p->tx_ring_offline)
+		(void) atomic_swap_32(&tx_ring_p->tx_ring_offline,
+		    NXGE_TX_RING_OFFLINED);
 
 	MUTEX_EXIT(&tx_ring_p->lock);
 
@@ -983,6 +1005,9 @@
 		freemsg(mp_chain);
 	}
 	if (!lso_again && !ngathers) {
+		if (isLDOMservice(nxgep) && tx_ring_p->tx_ring_offline)
+			(void) atomic_swap_32(&tx_ring_p->tx_ring_offline,
+			    NXGE_TX_RING_OFFLINED);
 		MUTEX_EXIT(&tx_ring_p->lock);
 		NXGE_DEBUG_MSG((nxgep, TX_CTL,
 		    "==> nxge_start: lso exit (nothing changed)"));
@@ -1055,7 +1080,10 @@
 		nxgep->resched_needed = B_TRUE;
 	}
 
-	tx_ring_p->tx_ring_state = TX_RING_STATE_IDLE;
+
+	if (isLDOMservice(nxgep) && tx_ring_p->tx_ring_offline)
+		(void) atomic_swap_32(&tx_ring_p->tx_ring_offline,
+		    NXGE_TX_RING_OFFLINED);
 
 	MUTEX_EXIT(&tx_ring_p->lock);
 
@@ -1072,8 +1100,18 @@
 {
 	p_tx_ring_t		tx_ring_p = (p_tx_ring_t)arg;
 	p_nxge_t		nxgep = tx_ring_p->nxgep;
+	int			status = 0;
 
-	return (nxge_start(nxgep, tx_ring_p, mp));
+	if (isLDOMservice(nxgep)) {
+		if (tx_ring_p->tx_ring_offline) {
+			freemsg(mp);
+			return (status);
+		}
+	}
+
+	status = nxge_start(nxgep, tx_ring_p, mp);
+
+	return (status);
 }
 
 boolean_t
@@ -1094,29 +1132,19 @@
 	tx_rings = nxgep->tx_rings->rings;
 	tx_ring_p = tx_rings[group->legend[ring_index]];
 
-	MUTEX_ENTER(&tx_ring_p->lock);
-	if (tx_ring_p->tx_ring_state == TX_RING_STATE_OFFLINE) {
-		/*
-		 * OFFLINE means that it is in the process of being
-		 * shared - that is, it has been claimed by the HIO
-		 * code, but hasn't been unlinked from <group> yet.
-		 * So in this case use the first TDC, which always
-		 * belongs to the service domain and can't be shared.
-		 */
-		MUTEX_EXIT(&tx_ring_p->lock);
-
-		ring_index = 0;
-		tx_ring_p = tx_rings[group->legend[ring_index]];
-		MUTEX_ENTER(&tx_ring_p->lock);
-		tx_ring_p->tx_ring_state = TX_RING_STATE_BUSY;
-	} else {
-		/*
-		 * Otherwise, mark the TDC as BUSY: the HIO code
-		 * will wait until nxge_start() has completed.
-		 */
-		tx_ring_p->tx_ring_state = TX_RING_STATE_BUSY;
+	if (isLDOMservice(nxgep)) {
+		if (tx_ring_p->tx_ring_offline) {
+			/*
+			 * OFFLINE means that it is in the process of being
+			 * shared - that is, it has been claimed by the HIO
+			 * code, but hasn't been unlinked from <group> yet.
+			 * So in this case use the first TDC, which always
+			 * belongs to the service domain and can't be shared.
+			 */
+			ring_index = 0;
+			tx_ring_p = tx_rings[group->legend[ring_index]];
+		}
 	}
-	MUTEX_EXIT(&tx_ring_p->lock);
 
 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "count %d, tx_rings[%d] = %p",
 		(int)group->count, group->legend[ring_index], tx_ring_p));
--- a/usr/src/uts/common/io/nxge/nxge_txdma.c	Mon May 26 17:53:26 2008 -0700
+++ b/usr/src/uts/common/io/nxge/nxge_txdma.c	Mon May 26 20:46:17 2008 -0700
@@ -1126,6 +1126,14 @@
 	NXGE_DEBUG_MSG((nxgep, INT_CTL,
 		"==> nxge_tx_intr: nxgep(arg2) $%p ldvp(arg1) $%p",
 		nxgep, ldvp));
+
+	if ((!(nxgep->drv_state & STATE_HW_INITIALIZED)) ||
+	    (nxgep->nxge_mac_state != NXGE_MAC_STARTED)) {
+		NXGE_DEBUG_MSG((nxgep, INT_CTL,
+		    "<== nxge_tx_intr: interface not started or intialized"));
+		return (DDI_INTR_CLAIMED);
+	}
+
 	/*
 	 * This interrupt handler is for a specific
 	 * transmit dma channel.
@@ -1627,6 +1635,11 @@
 {
 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_check_tx_hang"));
 
+	if ((!(nxgep->drv_state & STATE_HW_INITIALIZED)) ||
+	    (nxgep->nxge_mac_state != NXGE_MAC_STARTED)) {
+		goto nxge_check_tx_hang_exit;
+	}
+
 	/*
 	 * Needs inputs from hardware for regs:
 	 *	head index had not moved since last timeout.
@@ -1635,6 +1648,8 @@
 	if (nxge_txdma_hung(nxgep)) {
 		nxge_fixup_hung_txdma_rings(nxgep);
 	}
+
+nxge_check_tx_hang_exit:
 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_check_tx_hang"));
 }
 
@@ -2511,6 +2526,7 @@
 	MUTEX_INIT(&tx_ring_p->lock, NULL, MUTEX_DRIVER,
 		(void *)nxgep->interrupt_cookie);
 
+	(void) atomic_swap_32(&tx_ring_p->tx_ring_offline, NXGE_TX_RING_ONLINE);
 	tx_ring_p->nxgep = nxgep;
 	tx_ring_p->serial = nxge_serialize_create(nmsgs,
 				nxge_serial_tx, tx_ring_p);
--- a/usr/src/uts/common/sys/nxge/nxge_txdma.h	Mon May 26 17:53:26 2008 -0700
+++ b/usr/src/uts/common/sys/nxge/nxge_txdma.h	Mon May 26 20:46:17 2008 -0700
@@ -124,12 +124,6 @@
 	txdma_ring_errlog_t	errlog;
 } nxge_tx_ring_stats_t, *p_nxge_tx_ring_stats_t;
 
-typedef enum {
-	TX_RING_STATE_IDLE,
-	TX_RING_STATE_BUSY,
-	TX_RING_STATE_OFFLINE
-} nxge_tx_state_t;
-
 typedef struct _tx_ring_t {
 	nxge_os_dma_common_t	tdc_desc;
 	struct _nxge_t		*nxgep;
@@ -152,7 +146,10 @@
 	log_page_hdl_t		page_hdl;
 	txc_dma_max_burst_t	max_burst;
 	boolean_t		cfg_set;
-	nxge_tx_state_t		tx_ring_state;
+#define	NXGE_TX_RING_ONLINE	0x00
+#define	NXGE_TX_RING_OFFLINING	0x01
+#define	NXGE_TX_RING_OFFLINED	0x02
+	uint32_t		tx_ring_offline;
 
 	nxge_os_mutex_t		lock;
 	uint16_t 		index;