changeset 12962:ace49e15c6f7

FWARC/2010/295 Update to the vNet/vSwitch Rx Dring Data mode 6971015 transmit race condition in RxDringData mode causes channel hang
author WENTAO YANG <Wentao.Yang@Sun.COM>
date Wed, 28 Jul 2010 17:32:50 -0700
parents b521d551715f
children 58d8b4191f65
files usr/src/uts/sun4v/io/vnet_rxdring.c usr/src/uts/sun4v/io/vsw_rxdring.c usr/src/uts/sun4v/sys/vio_mailbox.h usr/src/uts/sun4v/sys/vsw_ldc.h
diffstat 4 files changed, 24 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/sun4v/io/vnet_rxdring.c	Wed Jul 28 16:57:31 2010 -0700
+++ b/usr/src/uts/sun4v/io/vnet_rxdring.c	Wed Jul 28 17:32:50 2010 -0700
@@ -402,7 +402,7 @@
 	ldcp->num_txds = num_desc;
 
 	/* Initialize tx dring indexes and seqnum */
-	ldcp->next_txi = ldcp->cur_txi = 0;
+	ldcp->next_txi = ldcp->cur_txi = ldcp->resched_peer_txi = 0;
 	ldcp->next_txseq = VNET_ISS - 1;
 	ldcp->resched_peer = B_TRUE;
 	ldcp->dring_mtype = minfo.mtype;
@@ -493,7 +493,7 @@
 	/* clobber tx ring members */
 	bzero(&ldcp->tx_dring_cookie, sizeof (ldcp->tx_dring_cookie));
 	ldcp->mtxdp = NULL;
-	ldcp->next_txi = ldcp->cur_txi = 0;
+	ldcp->next_txi = ldcp->cur_txi = ldcp->resched_peer_txi = 0;
 	ldcp->num_txds = 0;
 	ldcp->next_txseq = VNET_ISS - 1;
 	ldcp->resched_peer = B_TRUE;
@@ -567,7 +567,6 @@
 	uint32_t			next_txi;
 	uint32_t			txi;
 	vnet_rx_dringdata_desc_t	*txdp;
-	vnet_rx_dringdata_desc_t	*ntxdp;
 	struct ether_header		*ehp;
 	size_t				mblksz;
 	caddr_t				dst;
@@ -644,8 +643,8 @@
 	mutex_enter(&ldcp->txlock);
 	txi = next_txi = ldcp->next_txi;
 	INCR_TXI(next_txi, ldcp);
-	ntxdp = &(ldcp->mtxdp[next_txi]);
-	if (ntxdp->dstate != VIO_DESC_DONE) { /* out of descriptors */
+	txdp = &(ldcp->mtxdp[txi]);
+	if (txdp->dstate != VIO_DESC_DONE) { /* out of descriptors */
 		if (ldcp->tx_blocked == B_FALSE) {
 			ldcp->tx_blocked_lbolt = ddi_get_lbolt();
 			ldcp->tx_blocked = B_TRUE;
@@ -654,6 +653,8 @@
 		mutex_exit(&ldcp->txlock);
 		(void) LDC_NO_TRAP();
 		return (VGEN_TX_NORESOURCES);
+	} else {
+		txdp->dstate = VIO_DESC_INITIALIZING;
 	}
 
 	if (ldcp->tx_blocked == B_TRUE) {
@@ -672,9 +673,6 @@
 		vtx_update(ldcp->portp->vhp);
 	}
 
-	/* Access the descriptor */
-	txdp = &(ldcp->mtxdp[txi]);
-
 	/* Ensure load ordering of dstate (above) and data_buf_offset. */
 	MEMBAR_CONSUMER();
 
@@ -702,13 +700,15 @@
 
 	mutex_enter(&ldcp->wrlock);
 
+	ASSERT(txdp->dstate == VIO_DESC_INITIALIZING);
+
 	/* Mark the descriptor ready */
 	txdp->dstate = VIO_DESC_READY;
 
 	/* Check if peer needs wake up (handled below) */
-	if (ldcp->resched_peer == B_TRUE) {
+	if (ldcp->resched_peer == B_TRUE && ldcp->resched_peer_txi == txi) {
+		resched_peer = B_TRUE;
 		ldcp->resched_peer = B_FALSE;
-		resched_peer = B_TRUE;
 	}
 
 	/* Update tx stats */
@@ -1270,6 +1270,7 @@
 		 * the peer when tx descriptors are ready in transmit routine.
 		 */
 		ldcp->resched_peer = B_TRUE;
+		ldcp->resched_peer_txi = txi;
 		mutex_exit(&ldcp->wrlock);
 		return (rv);
 	}
--- a/usr/src/uts/sun4v/io/vsw_rxdring.c	Wed Jul 28 16:57:31 2010 -0700
+++ b/usr/src/uts/sun4v/io/vsw_rxdring.c	Wed Jul 28 17:32:50 2010 -0700
@@ -456,7 +456,7 @@
 	/* RxDringData mode specific initializations */
 	mutex_init(&dp->txlock, NULL, MUTEX_DRIVER, NULL);
 	mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL);
-	dp->next_txi = 0;
+	dp->next_txi = dp->restart_peer_txi = 0;
 	dp->restart_reqd = B_TRUE;
 	ldcp->dringdata_msgid = 0;
 	ldcp->lane_in.dringp = dp;
@@ -857,7 +857,6 @@
 	uint32_t			next_txi;
 	uint32_t			txi;
 	vnet_rx_dringdata_desc_t	*txdp;
-	vnet_rx_dringdata_desc_t	*ntxdp;
 	struct ether_header		*ehp;
 	size_t				mblksz;
 	caddr_t				dst;
@@ -951,21 +950,20 @@
 	mutex_enter(&dp->txlock);
 	txi = next_txi = dp->next_txi;
 	INCR_TXI(dp, next_txi);
-	ntxdp = &(pub_addr[next_txi]);
-	if (ntxdp->dstate != VIO_DESC_DONE) { /* out of descriptors */
+	txdp = &(pub_addr[txi]);
+	if (txdp->dstate != VIO_DESC_DONE) { /* out of descriptors */
 		statsp->tx_no_desc++;
 		mutex_exit(&dp->txlock);
 		(void) LDC_NO_TRAP();
 		return (LDC_TX_NORESOURCES);
+	} else {
+		txdp->dstate = VIO_DESC_INITIALIZING;
 	}
 
 	/* Update descriptor ring index */
 	dp->next_txi = next_txi;
 	mutex_exit(&dp->txlock);
 
-	/* Access the descriptor */
-	txdp = &(pub_addr[txi]);
-
 	/* Ensure load ordering of dstate (above) and data_buf_offset. */
 	MEMBAR_CONSUMER();
 
@@ -993,11 +991,13 @@
 
 	mutex_enter(&dp->restart_lock);
 
+	ASSERT(txdp->dstate == VIO_DESC_INITIALIZING);
+
 	/* Mark the descriptor ready */
 	txdp->dstate = VIO_DESC_READY;
 
 	/* Check if peer needs wake up (handled below) */
-	if (dp->restart_reqd == B_TRUE) {
+	if (dp->restart_reqd == B_TRUE && dp->restart_peer_txi == txi) {
 		dp->restart_reqd = B_FALSE;
 		resched_peer = B_TRUE;
 	}
@@ -1208,6 +1208,7 @@
 		 * the peer when tx descriptors are ready in transmit routine.
 		 */
 		dp->restart_reqd = B_TRUE;
+		dp->restart_peer_txi = txi;
 		mutex_exit(&dp->restart_lock);
 		return;
 	}
--- a/usr/src/uts/sun4v/sys/vio_mailbox.h	Wed Jul 28 16:57:31 2010 -0700
+++ b/usr/src/uts/sun4v/sys/vio_mailbox.h	Wed Jul 28 17:32:50 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef _SYS_VIO_MAILBOX_H
@@ -339,6 +338,7 @@
 #define	VIO_DESC_READY		0x2
 #define	VIO_DESC_ACCEPTED	0x3
 #define	VIO_DESC_DONE		0x4
+#define	VIO_DESC_INITIALIZING	0x5
 #define	VIO_DESC_MASK		0xf
 
 /* Macro to populate the generic fields of the DRing data msg */
--- a/usr/src/uts/sun4v/sys/vsw_ldc.h	Wed Jul 28 16:57:31 2010 -0700
+++ b/usr/src/uts/sun4v/sys/vsw_ldc.h	Wed Jul 28 17:32:50 2010 -0700
@@ -20,8 +20,7 @@
  */
 
 /*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
@@ -267,6 +266,7 @@
 	uint32_t		next_rxi;	/* next expected recv index */
 	kmutex_t		restart_lock;	/* protect restart_reqd */
 	boolean_t		restart_reqd;	/* send restart msg */
+	uint32_t		restart_peer_txi; /* index to restart peer */
 	void			*pub_addr;	/* base of public section */
 	void			*priv_addr;	/* base of private section */
 	void			*data_addr;	/* base of data section */