changeset 3297:3409a5c16a1c

6464148 rename in-band descriptor in vnet to use the right prefix. 6496374 vsw: "tunrnstile_block: unowned mutex" panic on a diskless-clients test bed 6501505 Assertion panic in vcc on ldom creation. 6501588 Adding a 2nd VDS to domain0 renders the 1st VDS unreachable, plus false failures on VDS1 disk 6503173 a couple of minor memory leaks during detach() in vnet 6505181 handshake retry counts in vnet and vsw do not match
author sb155480
date Wed, 20 Dec 2006 14:40:54 -0800
parents 03d9a1ede329
children 1fb2668efa39
files usr/src/uts/sun4v/io/vcc.c usr/src/uts/sun4v/io/vds.c usr/src/uts/sun4v/io/vnet.c usr/src/uts/sun4v/io/vnet_gen.c usr/src/uts/sun4v/io/vsw.c usr/src/uts/sun4v/sys/vnet_common.h usr/src/uts/sun4v/sys/vsw.h
diffstat 7 files changed, 206 insertions(+), 86 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/sun4v/io/vcc.c	Wed Dec 20 12:07:14 2006 -0800
+++ b/usr/src/uts/sun4v/io/vcc.c	Wed Dec 20 14:40:54 2006 -0800
@@ -55,6 +55,9 @@
 #include <sys/mdeg.h>
 #include <sys/vcc_impl.h>
 
+#define	VCC_LDC_RETRIES		5
+#define	VCC_LDC_DELAY		1000 /* usec */
+
 /*
  * Function prototypes.
  */
@@ -456,12 +459,13 @@
 }
 
 /*  release a ldc channel */
-static int
+static void
 i_vcc_ldc_fini(vcc_port_t *vport)
 {
 	int 		rv = EIO;
 	vcc_msg_t	buf;
 	size_t		sz;
+	int		retry = 0;
 
 	D1("i_vcc_ldc_fini: port@%lld, ldc_id%%llx\n", vport->number,
 	    vport->ldc_id);
@@ -471,57 +475,76 @@
 	/* wait for write available */
 	rv = i_vcc_wait_port_status(vport, &vport->write_cv,
 	    VCC_PORT_USE_WRITE_LDC);
-	if (rv) {
-		return (rv);
+
+	if (rv == 0) {
+		vport->status &= ~VCC_PORT_USE_WRITE_LDC;
+
+		/* send a HUP message */
+		buf.type = LDC_CONSOLE_CTRL;
+		buf.ctrl_msg = LDC_CONSOLE_HUP;
+		buf.size = 0;
+
+		/*
+		 * ignore write error since we still want to clean up
+		 * ldc channel.
+		 */
+		(void) i_vcc_write_ldc(vport, &buf);
+
+		mutex_exit(&vport->lock);
+		i_vcc_set_port_status(vport, &vport->write_cv,
+		    VCC_PORT_USE_WRITE_LDC);
+		mutex_enter(&vport->lock);
 	}
-	vport->status &= ~VCC_PORT_USE_WRITE_LDC;
-	/* send a HUP message */
-	buf.type = LDC_CONSOLE_CTRL;
-	buf.ctrl_msg = LDC_CONSOLE_HUP;
-	buf.size = 0;
-
-	/* in case of error, we still want to clean up ldc channel */
-	(void) i_vcc_write_ldc(vport, &buf);
-
-	mutex_exit(&vport->lock);
-	i_vcc_set_port_status(vport, &vport->write_cv, VCC_PORT_USE_WRITE_LDC);
-	mutex_enter(&vport->lock);
 
 	/* flush ldc channel */
 	rv = i_vcc_wait_port_status(vport, &vport->read_cv,
 	    VCC_PORT_USE_READ_LDC);
-	if (rv) {
-		return (rv);
+
+	if (rv == 0) {
+		vport->status &= ~VCC_PORT_USE_READ_LDC;
+		do {
+			sz = sizeof (buf);
+			rv = i_vcc_read_ldc(vport, (char *)&buf, &sz);
+		} while (rv == 0 && sz > 0);
+
+		vport->status |= VCC_PORT_USE_READ_LDC;
+
 	}
 
-	vport->status &= ~VCC_PORT_USE_READ_LDC;
-	do {
-		sz = sizeof (buf);
-		rv = i_vcc_read_ldc(vport, (char *)&buf, &sz);
-	} while (rv == 0 && sz > 0);
-
-	vport->status |= VCC_PORT_USE_READ_LDC;
+	/*
+	 * ignore read error since we still want to clean up
+	 * ldc channel.
+	 */
 
 	(void) ldc_set_cb_mode(vport->ldc_handle, LDC_CB_DISABLE);
-	if ((rv = ldc_close(vport->ldc_handle)) != 0) {
-		cmn_err(CE_CONT, "i_vcc_ldc_fini: cannot close channel %ld\n",
-		    vport->ldc_id);
-		return (rv);
+
+	/* close LDC channel - retry on EAGAIN */
+	while ((rv = ldc_close(vport->ldc_handle)) == EAGAIN) {
+
+		if (++retry > VCC_LDC_RETRIES) {
+			cmn_err(CE_CONT, "i_vcc_ldc_fini: cannot close channel"
+			    " %ld\n", vport->ldc_id);
+			break;
+		}
+
+		drv_usecwait(VCC_LDC_DELAY);
 	}
 
-	if ((rv = ldc_unreg_callback(vport->ldc_handle)) != 0) {
-		cmn_err(CE_CONT, "i_vcc_ldc_fini: port@%d ldc_unreg_callback"
-			"failed\n", vport->number);
-		return (rv);
+	if (rv == 0) {
+		(void) ldc_unreg_callback(vport->ldc_handle);
+		(void) ldc_fini(vport->ldc_handle);
+	} else {
+		/*
+		 * Closing the LDC channel has failed. Ideally we should
+		 * fail here but there is no Zeus level infrastructure
+		 * to handle this. The MD has already been changed and
+		 * we have to do the close. So we try to do as much
+		 * clean up as we can.
+		 */
+		while (ldc_unreg_callback(vport->ldc_handle) == EAGAIN)
+			drv_usecwait(VCC_LDC_DELAY);
 	}
 
-	if ((rv = ldc_fini(vport->ldc_handle)) != 0) {
-		cmn_err(CE_CONT, "i_vcc_ldc_fini: cannot finilize channel"
-		    "%ld\n", vport->ldc_id);
-		return (rv);
-	}
-
-	return (0);
 }
 
 /* read data from ldc channel */
@@ -1160,6 +1183,15 @@
 		return (0);
 	}
 
+	/*
+	 * the port may just be added by mdeg callback and may
+	 * not be configured yet.
+	 */
+	if (vport->ldc_id == VCC_INVALID_CHANNEL) {
+		mutex_exit(&vport->lock);
+		return (ENXIO);
+	}
+
 
 	/* check if channel has been initialized */
 	if ((vport->status & VCC_PORT_LDC_CHANNEL_READY) == 0) {
@@ -1189,7 +1221,6 @@
 static int
 i_vcc_close_port(vcc_port_t *vport)
 {
-	int	rv = EIO;
 
 	if ((vport->status & VCC_PORT_OPEN) == 0) {
 		return (0);
@@ -1199,9 +1230,7 @@
 
 	if (vport->status & VCC_PORT_LDC_CHANNEL_READY) {
 		/* clean up ldc channel */
-		if ((rv = i_vcc_ldc_fini(vport)) != 0) {
-			return (rv);
-		}
+		i_vcc_ldc_fini(vport);
 		vport->status &= ~VCC_PORT_LDC_CHANNEL_READY;
 	}
 
@@ -1246,7 +1275,14 @@
 	vport = &(vccp->port[portno]);
 
 
+	/*
+	 * needs lock to provent i_vcc_delete_port, which is called by
+	 * the mdeg callback, from closing port.
+	 */
+	mutex_enter(&vport->lock);
+
 	if ((vport->status & VCC_PORT_OPEN) == 0) {
+		mutex_exit(&vport->lock);
 		return (0);
 	}
 
@@ -1255,11 +1291,11 @@
 		 * vntsd closes control port before it exits. There
 		 * could be events still pending for vntsd.
 		 */
+		mutex_exit(&vport->lock);
 		rv = i_vcc_reset_events(vccp);
 		return (0);
 	}
 
-	mutex_enter(&vport->lock);
 
 	/* check minor no and pid */
 	if ((rv = i_vcc_can_use_port(VCCMINORP(vccp, minor),
--- a/usr/src/uts/sun4v/io/vds.c	Wed Dec 20 12:07:14 2006 -0800
+++ b/usr/src/uts/sun4v/io/vds.c	Wed Dec 20 14:40:54 2006 -0800
@@ -69,6 +69,7 @@
 #define	VD_CHANNEL_ENDPOINT	"channel-endpoint"
 #define	VD_ID_PROP		"id"
 #define	VD_BLOCK_DEVICE_PROP	"vds-block-device"
+#define	VD_REG_PROP		"reg"
 
 /* Virtual disk initialization flags */
 #define	VD_LOCKING		0x01
@@ -110,6 +111,33 @@
 		(((vd)->xfer_mode == 0) ? "null client" :		\
 		    "unsupported client")))
 
+/*
+ * Specification of an MD node passed to the MDEG to filter any
+ * 'vport' nodes that do not belong to the specified node. This
+ * template is copied for each vds instance and filled in with
+ * the appropriate 'cfg-handle' value before being passed to the MDEG.
+ */
+static mdeg_prop_spec_t	vds_prop_template[] = {
+	{ MDET_PROP_STR,	"name",		VDS_NAME },
+	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
+	{ MDET_LIST_END,	NULL, 		NULL }
+};
+
+#define	VDS_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val);
+
+/*
+ * Matching criteria passed to the MDEG to register interest
+ * in changes to 'virtual-device-port' nodes identified by their
+ * 'id' property.
+ */
+static md_prop_match_t	vd_prop_match[] = {
+	{ MDET_PROP_VAL,	VD_ID_PROP },
+	{ MDET_LIST_END,	NULL }
+};
+
+static mdeg_node_match_t vd_match = {"virtual-device-port",
+				    vd_prop_match};
+
 /* Debugging macros */
 #ifdef DEBUG
 
@@ -221,6 +249,7 @@
 	dev_info_t	*dip;		/* driver inst devinfo pointer */
 	ldi_ident_t	ldi_ident;	/* driver's identifier for LDI */
 	mod_hash_t	*vd_table;	/* table of virtual disks served */
+	mdeg_node_spec_t *ispecp;	/* mdeg node specification */
 	mdeg_handle_t	mdeg;		/* handle for MDEG operations  */
 } vds_t;
 
@@ -2176,8 +2205,14 @@
 	}
 
 	PR0("Detaching");
-	if (vds->initialized & VDS_MDEG)
+	if (vds->initialized & VDS_MDEG) {
 		(void) mdeg_unregister(vds->mdeg);
+		kmem_free(vds->ispecp->specp, sizeof (vds_prop_template));
+		kmem_free(vds->ispecp, sizeof (mdeg_node_spec_t));
+		vds->ispecp = NULL;
+		vds->mdeg = NULL;
+	}
+
 	if (vds->initialized & VDS_LDI)
 		(void) ldi_ident_release(vds->ldi_ident);
 	mod_hash_destroy_hash(vds->vd_table);
@@ -2876,27 +2911,12 @@
 static int
 vds_do_attach(dev_info_t *dip)
 {
-	static char	reg_prop[] = "reg";	/* devinfo ID prop */
-
-	/* MDEG specification for a (particular) vds node */
-	static mdeg_prop_spec_t	vds_prop_spec[] = {
-		{MDET_PROP_STR, "name", {VDS_NAME}},
-		{MDET_PROP_VAL, "cfg-handle", {0}},
-		{MDET_LIST_END, NULL, {0}}};
-	static mdeg_node_spec_t	vds_spec = {"virtual-device", vds_prop_spec};
-
-	/* MDEG specification for matching a vd node */
-	static md_prop_match_t	vd_prop_spec[] = {
-		{MDET_PROP_VAL, VD_ID_PROP},
-		{MDET_LIST_END, NULL}};
-	static mdeg_node_match_t vd_spec = {"virtual-device-port",
-					    vd_prop_spec};
-
-	int			status;
-	uint64_t		cfg_handle;
+	int			status, sz;
+	int			cfg_handle;
 	minor_t			instance = ddi_get_instance(dip);
 	vds_t			*vds;
-
+	mdeg_prop_spec_t	*pspecp;
+	mdeg_node_spec_t	*ispecp;
 
 	/*
 	 * The "cfg-handle" property of a vds node in an MD contains the MD's
@@ -2909,14 +2929,15 @@
 	 * property cannot be found, the device tree state is presumably so
 	 * broken that there is no point in continuing.
 	 */
-	if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, reg_prop)) {
-		PRN("vds \"%s\" property does not exist", reg_prop);
+	if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+		VD_REG_PROP)) {
+		PRN("vds \"%s\" property does not exist", VD_REG_PROP);
 		return (DDI_FAILURE);
 	}
 
 	/* Get the MD instance for later MDEG registration */
 	cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
-	    reg_prop, -1);
+	    VD_REG_PROP, -1);
 
 	if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) {
 		PRN("Could not allocate state for instance %u", instance);
@@ -2929,7 +2950,6 @@
 		return (DDI_FAILURE);
 	}
 
-
 	vds->dip	= dip;
 	vds->vd_table	= mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS,
 							vds_destroy_vd,
@@ -2943,12 +2963,26 @@
 	vds->initialized |= VDS_LDI;
 
 	/* Register for MD updates */
-	vds_prop_spec[1].ps_val = cfg_handle;
-	if (mdeg_register(&vds_spec, &vd_spec, vds_process_md, vds,
+	sz = sizeof (vds_prop_template);
+	pspecp = kmem_alloc(sz, KM_SLEEP);
+	bcopy(vds_prop_template, pspecp, sz);
+
+	VDS_SET_MDEG_PROP_INST(pspecp, cfg_handle);
+
+	/* initialize the complete prop spec structure */
+	ispecp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
+	ispecp->namep = "virtual-device";
+	ispecp->specp = pspecp;
+
+	if (mdeg_register(ispecp, &vd_match, vds_process_md, vds,
 		&vds->mdeg) != MDEG_SUCCESS) {
 		PRN("Unable to register for MD updates");
+		kmem_free(ispecp, sizeof (mdeg_node_spec_t));
+		kmem_free(pspecp, sz);
 		return (DDI_FAILURE);
 	}
+
+	vds->ispecp = ispecp;
 	vds->initialized |= VDS_MDEG;
 
 	/* Prevent auto-detaching so driver is available whenever MD changes */
--- a/usr/src/uts/sun4v/io/vnet.c	Wed Dec 20 12:07:14 2006 -0800
+++ b/usr/src/uts/sun4v/io/vnet.c	Wed Dec 20 14:40:54 2006 -0800
@@ -471,6 +471,9 @@
 	}
 	RW_EXIT(&vnet_rw);
 
+	kmem_free(vnetp->fdbhp,
+	    sizeof (fdb_fanout_t) * (vnetp->nfdb_hash + 1));
+
 	KMEM_FREE(vnetp);
 
 	return (DDI_SUCCESS);
--- a/usr/src/uts/sun4v/io/vnet_gen.c	Wed Dec 20 12:07:14 2006 -0800
+++ b/usr/src/uts/sun4v/io/vnet_gen.c	Wed Dec 20 14:40:54 2006 -0800
@@ -256,7 +256,7 @@
 
 /* Tunables */
 uint32_t vgen_hwd_interval = 1000;	/* handshake watchdog freq in msec */
-uint32_t vgen_max_hretries = 1;		/* max # of handshake retries */
+uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */
 uint32_t vgen_ldcwr_retries = 10;	/* max # of ldc_write() retries */
 uint32_t vgen_ldcup_retries = 5;	/* max # of ldc_up() retries */
 uint32_t vgen_recv_delay = 1;		/* delay when rx descr not ready */
@@ -1204,6 +1204,7 @@
 vgen_mdeg_unreg(vgen_t *vgenp)
 {
 	(void) mdeg_unregister(vgenp->mdeg_hdl);
+	kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template));
 	KMEM_FREE(vgenp->mdeg_parentp);
 	vgenp->mdeg_parentp = NULL;
 	vgenp->mdeg_hdl = NULL;
--- a/usr/src/uts/sun4v/io/vsw.c	Wed Dec 20 12:07:14 2006 -0800
+++ b/usr/src/uts/sun4v/io/vsw.c	Wed Dec 20 14:40:54 2006 -0800
@@ -229,7 +229,7 @@
 static void display_lane(lane_t *);
 static void display_ring(dring_info_t *);
 
-int	vsw_num_handshakes = 3;		/* # of handshake attempts */
+int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
 int	vsw_wretries = 100;		/* # of write attempts */
 int	vsw_chain_len = 150;		/* max # of mblks in msg chain */
 int	vsw_desc_delay = 0;		/* delay in us */
@@ -3290,6 +3290,8 @@
 	mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL);
 	mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL);
 	cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL);
+	rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL);
+	rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL);
 
 	/* required for handshake with peer */
 	ldcp->local_session = (uint64_t)ddi_get_lbolt();
@@ -3353,6 +3355,9 @@
 
 	cv_destroy(&ldcp->drain_cv);
 
+	rw_destroy(&ldcp->lane_in.dlistrw);
+	rw_destroy(&ldcp->lane_out.dlistrw);
+
 	if (progress & PROG_callback) {
 		(void) ldc_unreg_callback(ldcp->ldc_handle);
 	}
@@ -3464,6 +3469,8 @@
 	mutex_destroy(&ldcp->lane_in.seq_lock);
 	mutex_destroy(&ldcp->lane_out.seq_lock);
 	mutex_destroy(&ldcp->status_lock);
+	rw_destroy(&ldcp->lane_in.dlistrw);
+	rw_destroy(&ldcp->lane_out.dlistrw);
 
 	kmem_free(ldcp, sizeof (vsw_ldc_t));
 
@@ -3869,6 +3876,7 @@
 			__func__, lstatus, ldcp->ldc_status);
 		if ((ldcp->ldc_status != lstatus) &&
 					(ldcp->ldc_status == LDC_UP)) {
+				ldcp->reset_active = 0;
 				vsw_restart_handshake(ldcp);
 		}
 
@@ -3921,6 +3929,7 @@
 		if ((ldcp->ldc_status == LDC_UP) && (lstatus != LDC_UP)) {
 			D2(vswp, "%s: channel %ld now UP, restarting "
 				"handshake", __func__, ldcp->ldc_id);
+			ldcp->reset_active = 0;
 			vsw_restart_handshake(ldcp);
 		}
 	}
@@ -3966,6 +3975,12 @@
 
 	D1(vswp, "%s: enter", __func__);
 
+	/*
+	 * Check if reset already in progress for this channel.
+	 */
+	if (ldstub((uint8_t *)&ldcp->reset_active))
+		return;
+
 	port = ldcp->ldc_port;
 	ldcl = &port->p_ldclist;
 
@@ -4103,6 +4118,7 @@
 	 * has dealt with it then we restart the handshake here.
 	 */
 	if ((lstatus != LDC_UP) && (ldcp->ldc_status == LDC_UP)) {
+		ldcp->reset_active = 0;
 		vsw_restart_handshake(ldcp);
 	}
 
@@ -4301,7 +4317,6 @@
 		}
 		break;
 
-
 	case VSW_MILESTONE2:
 		/*
 		 * If peer has indicated in its attribute message that
@@ -5541,8 +5556,10 @@
 	case VIO_SUBTYPE_INFO:
 		D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id);
 
+		READ_ENTER(&ldcp->lane_in.dlistrw);
 		if ((dp = vsw_ident2dring(&ldcp->lane_in,
 				dring_pkt->dring_ident)) == NULL) {
+			RW_EXIT(&ldcp->lane_in.dlistrw);
 
 			DERR(vswp, "%s(%lld): unable to find dring from "
 				"ident 0x%llx", __func__, ldcp->ldc_id,
@@ -5569,6 +5586,7 @@
 
 			/* basic sanity check */
 			if (end > len) {
+				RW_EXIT(&ldcp->lane_in.dlistrw);
 				DERR(vswp, "%s(%lld): endpoint %lld outside "
 					"ring length %lld", __func__,
 					ldcp->ldc_id, end, len);
@@ -5577,6 +5595,7 @@
 				return;
 			}
 		} else {
+			RW_EXIT(&ldcp->lane_in.dlistrw);
 			DERR(vswp, "%s(%lld): invalid endpoint %lld",
 				__func__, ldcp->ldc_id, end);
 			SND_DRING_NACK(ldcp, dring_pkt);
@@ -5587,6 +5606,7 @@
 vsw_recheck_desc:
 			if ((rv = ldc_mem_dring_acquire(dp->handle,
 							pos, pos)) != 0) {
+				RW_EXIT(&ldcp->lane_in.dlistrw);
 				DERR(vswp, "%s(%lld): unable to acquire "
 					"descriptor at pos %d: err %d",
 					__func__, pos, ldcp->ldc_id, rv);
@@ -5615,6 +5635,7 @@
 				}
 
 				/* bounded - error - so NACK back */
+				RW_EXIT(&ldcp->lane_in.dlistrw);
 				DERR(vswp, "%s(%lld): descriptor not READY "
 					"(%d)", __func__, ldcp->ldc_id,
 					pub_addr->hdr.dstate);
@@ -5780,6 +5801,7 @@
 				break;
 			}
 		}
+		RW_EXIT(&ldcp->lane_in.dlistrw);
 
 		/* send the chain of packets to be switched */
 		if (bp != NULL) {
@@ -5826,8 +5848,10 @@
 		 * Verify that the relevant descriptors are all
 		 * marked as DONE
 		 */
+		READ_ENTER(&ldcp->lane_out.dlistrw);
 		if ((dp = vsw_ident2dring(&ldcp->lane_out,
 			dring_pkt->dring_ident)) == NULL) {
+			RW_EXIT(&ldcp->lane_out.dlistrw);
 			DERR(vswp, "%s: unknown ident in ACK", __func__);
 			return;
 		}
@@ -5891,6 +5915,7 @@
 						" 0x%llx not DONE (0x%lx)\n",
 						__func__, i, pub_addr,
 						pub_addr->hdr.dstate);
+					RW_EXIT(&ldcp->lane_out.dlistrw);
 					return;
 				}
 			}
@@ -5953,6 +5978,7 @@
 			}
 			mutex_exit(&dp->restart_lock);
 		}
+		RW_EXIT(&ldcp->lane_out.dlistrw);
 		break;
 
 	case VIO_SUBTYPE_NACK:
@@ -5992,11 +6018,6 @@
 	D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id);
 }
 
-#define	SND_IBND_DESC_NACK(ldcp, pkt) \
-	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
-	pkt->tag.vio_sid = ldcp->local_session; \
-	vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_ibnd_desc_t));
-
 /*
  * Process an in-band descriptor message (most likely from
  * OBP).
@@ -6004,7 +6025,7 @@
 static void
 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt)
 {
-	vio_ibnd_desc_t		*ibnd_desc;
+	vnet_ibnd_desc_t	*ibnd_desc;
 	dring_info_t		*dp = NULL;
 	vsw_private_desc_t	*priv_addr = NULL;
 	vsw_t			*vswp = ldcp->ldc_vswp;
@@ -6019,7 +6040,7 @@
 
 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
 
-	ibnd_desc = (vio_ibnd_desc_t *)pkt;
+	ibnd_desc = (vnet_ibnd_desc_t *)pkt;
 
 	switch (ibnd_desc->hdr.tag.vio_subtype) {
 	case VIO_SUBTYPE_INFO:
@@ -6083,7 +6104,7 @@
 		ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK;
 		ibnd_desc->hdr.tag.vio_sid = ldcp->local_session;
 		vsw_send_msg(ldcp, (void *)ibnd_desc,
-				sizeof (vio_ibnd_desc_t));
+				sizeof (vnet_ibnd_desc_t));
 
 		/* send the packet to be switched */
 		vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT,
@@ -6808,7 +6829,9 @@
 	 * Note - using first ring only, this may change
 	 * in the future.
 	 */
+	READ_ENTER(&ldcp->lane_out.dlistrw);
 	if ((dp = ldcp->lane_out.dringp) == NULL) {
+		RW_EXIT(&ldcp->lane_out.dlistrw);
 		DERR(vswp, "%s(%lld): no dring for outbound lane on"
 			" channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id);
 		freemsg(mp);
@@ -6817,6 +6840,7 @@
 
 	size = msgsize(mp);
 	if (size > (size_t)ETHERMAX) {
+		RW_EXIT(&ldcp->lane_out.dlistrw);
 		DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
 		    ldcp->ldc_id, size);
 		freemsg(mp);
@@ -6917,6 +6941,8 @@
 
 vsw_dringsend_free_exit:
 
+	RW_EXIT(&ldcp->lane_out.dlistrw);
+
 	/* free the message block */
 	freemsg(mp);
 
@@ -6931,7 +6957,7 @@
 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp)
 {
 	vsw_t			*vswp = ldcp->ldc_vswp;
-	vio_ibnd_desc_t		ibnd_msg;
+	vnet_ibnd_desc_t	ibnd_msg;
 	vsw_private_desc_t	*priv_desc = NULL;
 	dring_info_t		*dp = NULL;
 	size_t			n, size = 0;
@@ -6958,12 +6984,14 @@
 	 * only expect single dring to exist, which we use
 	 * as an internal buffer, rather than a transfer channel.
 	 */
+	READ_ENTER(&ldcp->lane_out.dlistrw);
 	if ((dp = ldcp->lane_out.dringp) == NULL) {
 		DERR(vswp, "%s(%lld): no dring for outbound lane",
 			__func__, ldcp->ldc_id);
 		DERR(vswp, "%s(%lld) status(%d) state (0x%llx)",
 			__func__, ldcp->ldc_id, ldcp->ldc_status,
 			ldcp->lane_out.lstate);
+		RW_EXIT(&ldcp->lane_out.dlistrw);
 		freemsg(mp);
 		return (LDC_TX_FAILURE);
 	}
@@ -7030,10 +7058,12 @@
 	ibnd_msg.ncookies = priv_desc->ncookies;
 	ibnd_msg.nbytes = size;
 
-	vsw_send_msg(ldcp, (void *)&ibnd_msg, sizeof (vio_ibnd_desc_t));
+	vsw_send_msg(ldcp, (void *)&ibnd_msg, sizeof (vnet_ibnd_desc_t));
 
 vsw_descrsend_free_exit:
 
+	RW_EXIT(&ldcp->lane_out.dlistrw);
+
 	/* free the allocated message blocks */
 	freemsg(mp);
 
@@ -7827,6 +7857,7 @@
 	 * Only ever create rings for outgoing lane. Link it onto
 	 * end of list.
 	 */
+	WRITE_ENTER(&ldcp->lane_out.dlistrw);
 	if (ldcp->lane_out.dringp == NULL) {
 		D2(vswp, "vsw_create_dring: adding first outbound ring");
 		ldcp->lane_out.dringp = dp;
@@ -7837,6 +7868,7 @@
 
 		tp->next = dp;
 	}
+	RW_EXIT(&ldcp->lane_out.dlistrw);
 
 	return (dp);
 
@@ -7907,6 +7939,7 @@
 	 * Only ever create rings for outgoing lane. Link it onto
 	 * end of list.
 	 */
+	WRITE_ENTER(&ldcp->lane_out.dlistrw);
 	if (ldcp->lane_out.dringp == NULL) {
 		D2(vswp, "%s: adding first outbound privring", __func__);
 		ldcp->lane_out.dringp = dp;
@@ -7917,6 +7950,7 @@
 
 		tp->next = dp;
 	}
+	RW_EXIT(&ldcp->lane_out.dlistrw);
 
 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
 }
@@ -8108,6 +8142,8 @@
 /*
  * Map from a dring identifier to the ring itself. Returns
  * pointer to ring or NULL if no match found.
+ *
+ * Should be called with dlistrw rwlock held as reader.
  */
 static dring_info_t *
 vsw_ident2dring(lane_t *lane, uint64_t ident)
@@ -8319,6 +8355,7 @@
 	mutex_exit(&lp->seq_lock);
 	if (lp->dringp) {
 		if (dir == INBOUND) {
+			WRITE_ENTER(&lp->dlistrw);
 			dp = lp->dringp;
 			while (dp != NULL) {
 				dpp = dp->next;
@@ -8327,12 +8364,15 @@
 				kmem_free(dp, sizeof (dring_info_t));
 				dp = dpp;
 			}
+			RW_EXIT(&lp->dlistrw);
 		} else {
 			/*
 			 * unbind, destroy exported dring, free dring struct
 			 */
+			WRITE_ENTER(&lp->dlistrw);
 			dp = lp->dringp;
 			rv = vsw_free_ring(dp);
+			RW_EXIT(&lp->dlistrw);
 		}
 		if (rv == 0) {
 			lp->dringp = NULL;
@@ -8344,6 +8384,8 @@
 
 /*
  * Free ring and all associated resources.
+ *
+ * Should be called with dlistrw rwlock held as writer.
  */
 static int
 vsw_free_ring(dring_info_t *dp)
--- a/usr/src/uts/sun4v/sys/vnet_common.h	Wed Dec 20 12:07:14 2006 -0800
+++ b/usr/src/uts/sun4v/sys/vnet_common.h	Wed Dec 20 14:40:54 2006 -0800
@@ -50,6 +50,8 @@
 
 #define	VNET_IPALIGN		6	/* padding for IP header alignment */
 
+#define	VNET_NUM_HANDSHAKES	3	/* # of handshake attempts */
+
 /* vnet descriptor */
 typedef struct vnet_public_desc {
 	vio_dring_entry_hdr_t	hdr;		/* descriptor header */
@@ -59,17 +61,17 @@
 } vnet_public_desc_t;
 
 /*
- * VIO in-band descriptor. Used by those vio clients
+ * Vnet in-band descriptor. Used by those vnet clients
  * such as OBP who do not use descriptor rings.
  */
-typedef struct vio_ibnd_desc {
+typedef struct vnet_ibnd_desc {
 	vio_inband_desc_msg_hdr_t	hdr;
 
 	/* payload */
 	uint32_t			nbytes;
 	uint32_t			ncookies;
 	ldc_mem_cookie_t		memcookie[MAX_COOKIES];
-} vio_ibnd_desc_t;
+} vnet_ibnd_desc_t;
 
 #ifdef __cplusplus
 }
--- a/usr/src/uts/sun4v/sys/vsw.h	Wed Dec 20 12:07:14 2006 -0800
+++ b/usr/src/uts/sun4v/sys/vsw.h	Wed Dec 20 14:40:54 2006 -0800
@@ -282,6 +282,7 @@
 	uint8_t		addr_type;	/* Only MAC address at moment */
 	uint8_t		xfer_mode;	/* Dring or Pkt based */
 	uint8_t		ack_freq;	/* Only non zero for Pkt based xfer */
+	krwlock_t	dlistrw;	/* Lock for dring list */
 	dring_info_t	*dringp;	/* List of drings for this lane */
 } lane_t;
 
@@ -305,6 +306,7 @@
 	int			hcnt;		/* # handshake attempts */
 	kmutex_t		status_lock;
 	ldc_status_t		ldc_status;	/* channel status */
+	uint8_t			reset_active;	/* reset flag */
 	uint64_t		local_session;	/* Our session id */
 	uint64_t		peer_session;	/* Our peers session id */
 	uint8_t			session_status;	/* Session recv'd, sent */