changeset 4061:0f223b243748

6454375 e1000g link flaps at initialization, triggering failovers 6472255 e1000g can't restore to 1000M with ndd setting 6496763 e1000g should free packets when link is down 6501294 "eeprom checksum failed" with pci8086,108c device 6504688 e1000g.conf settings are inconsistent with ndd output 6505445 e1000g : when all advertised capabilities are set to 0, ndd puts all of them 1 6519690 e1000g should not print the link up/down messages to console 6531474 Fatal PCIe Fabric Error panics on T2000 when using jumbo frames on e1000g interfaces 6535712 e1000g: the processing of the checksum flags should be protected by tx_lock
author xy150489
date Wed, 18 Apr 2007 20:32:49 -0700
parents 82b3f4545f58
children c84209043eff
files usr/src/uts/common/io/e1000g/README usr/src/uts/common/io/e1000g/e1000g_debug.h usr/src/uts/common/io/e1000g/e1000g_main.c usr/src/uts/common/io/e1000g/e1000g_ndd.c usr/src/uts/common/io/e1000g/e1000g_stat.c usr/src/uts/common/io/e1000g/e1000g_sw.h usr/src/uts/common/io/e1000g/e1000g_tx.c
diffstat 7 files changed, 360 insertions(+), 235 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/io/e1000g/README	Wed Apr 18 18:48:41 2007 -0700
+++ b/usr/src/uts/common/io/e1000g/README	Wed Apr 18 20:32:49 2007 -0700
@@ -485,3 +485,16 @@
    6502458 e1000g is open source, move the source from usr/closed to use/src
    6505360 e1000g Makefile should not include "-N drv/dld" in the LDFLAGS
 
+5.1.7
+======
+  This version has the following fix:
+   6454375 e1000g link flaps at initialization, triggering failovers
+   6472255 e1000g can't restore to 1000M with ndd setting
+   6496763 e1000g should free packets when link is down
+   6501294 "eeprom checksum failed" with pci8086,108c device
+   6504688 e1000g.conf settings are inconsistent with ndd output
+   6505445 e1000g : when all advertised capabilities are set to 0, ndd puts all of them 1
+   6519690 e1000g should not print the link up/down messages to console
+   6531474 Fatal PCIe Fabric Error panics on T2000 when using jumbo frames on e1000g interfaces
+   6535712 e1000g: the processing of the checksum flags should be protected by tx_lock
+
--- a/usr/src/uts/common/io/e1000g/e1000g_debug.h	Wed Apr 18 18:48:41 2007 -0700
+++ b/usr/src/uts/common/io/e1000g/e1000g_debug.h	Wed Apr 18 20:32:49 2007 -0700
@@ -81,8 +81,6 @@
 #ifdef e1000g_DEBUG
 
 static int e1000g_debug = DEFAULTDEBUGLEVEL;
-static int e1000g_display_only = DEFAULTDISPLAYONLY;
-static int e1000g_print_only = DEFAULTPRINTONLY;
 static int e1000g_debug_hw = 1;
 
 #define	e1000g_ERRS_LEVEL	0x001	/* (1)	Errors */
@@ -122,10 +120,6 @@
 #else
 
 static int e1000g_debug = 0;
-static int e1000g_display_only = 1;	/* 1 - Yes Display, */
-					/* 0 - Don't Display */
-static int e1000g_print_only = 1;	/* 1 - Yes Print to Msg Log, */
-					/* 0 - Don't Print to Msg Log */
 static int e1000g_debug_hw = 0;
 
 #define	e1000g_DEBUGLOG_0(Adapter, Level, fmt)
@@ -144,6 +138,9 @@
 void e1000g_log(struct e1000g *Adapter, int level, char *fmt, ...);
 void e1000g_log_hw(char *msg, void *cptr, int length);
 
+static int e1000g_display_only = DEFAULTDISPLAYONLY;
+static int e1000g_print_only = DEFAULTPRINTONLY;
+
 #ifdef __cplusplus
 }
 #endif
--- a/usr/src/uts/common/io/e1000g/e1000g_main.c	Wed Apr 18 18:48:41 2007 -0700
+++ b/usr/src/uts/common/io/e1000g/e1000g_main.c	Wed Apr 18 20:32:49 2007 -0700
@@ -53,9 +53,9 @@
 #define	E1000_RX_INTPT_TIME	128
 #define	E1000_RX_PKT_CNT	8
 
-static char ident[] = "Intel PRO/1000 Ethernet 5.1.6";
+static char ident[] = "Intel PRO/1000 Ethernet 5.1.7";
 static char e1000g_string[] = "Intel(R) PRO/1000 Network Connection";
-static char e1000g_version[] = "Driver Ver. 5.1.6";
+static char e1000g_version[] = "Driver Ver. 5.1.7";
 
 /*
  * Proto types for DDI entry points
@@ -75,7 +75,6 @@
 static int e1000g_init(struct e1000g *);
 static int e1000g_start(struct e1000g *);
 static void e1000g_stop(struct e1000g *);
-static boolean_t e1000g_reset(struct e1000g *);
 static int e1000g_m_start(void *);
 static void e1000g_m_stop(void *);
 static int e1000g_m_promisc(void *, boolean_t);
@@ -101,8 +100,10 @@
 /*
  * Local routines
  */
+static void e1000g_tx_drop(struct e1000g *Adapter);
+static void e1000g_link_timer(void *);
 static void e1000g_LocalTimer(void *);
-static boolean_t e1000g_LocalTimerWork(struct e1000g *);
+static boolean_t e1000g_link_check(struct e1000g *);
 static boolean_t e1000g_stall_check(struct e1000g *);
 static void e1000g_smartspeed(struct e1000g *);
 static void e1000g_getparam(struct e1000g *Adapter);
@@ -560,12 +561,6 @@
 
 	cmn_err(CE_CONT, "!%s, %s\n", e1000g_string, e1000g_version);
 
-	/*
-	 * Tell the world about the link state of e1000g
-	 */
-	mac_link_update(Adapter->mh,
-	    (Adapter->LinkIsActive) ? LINK_STATE_UP : LINK_STATE_DOWN);
-
 	return (DDI_SUCCESS);
 
 attach_fail:
@@ -686,6 +681,8 @@
 	/* Get conf file properties */
 	e1000g_getparam(Adapter);
 
+	hw->forced_speed_duplex = e1000_100_full;
+	hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT;
 	e1000g_force_speed_duplex(Adapter);
 
 	e1000g_get_max_frame_size(Adapter);
@@ -779,6 +776,8 @@
 		hw->master_slave = e1000_ms_hw_default;	/* E1000_MASTER_SLAVE */
 	}
 
+	Adapter->link_state = LINK_STATE_UNKNOWN;
+
 	return (DDI_SUCCESS);
 }
 
@@ -1006,10 +1005,10 @@
 static int
 e1000g_init(struct e1000g *Adapter)
 {
-	UINT16 LineSpeed, Duplex;
 	uint32_t pba;
 	uint32_t ctrl;
 	struct e1000_hw *hw;
+	clock_t link_timeout;
 
 	hw = &Adapter->Shared;
 
@@ -1027,8 +1026,17 @@
 	(void) e1000_init_eeprom_params(hw);
 
 	if (e1000_validate_eeprom_checksum(hw) < 0) {
-		e1000g_log(Adapter, CE_WARN, "Eeprom checksum failed");
-		goto init_fail;
+		/*
+		 * Some PCI-E parts fail the first check due to
+		 * the link being in sleep state.  Call it again,
+		 * if it fails a second time its a real issue.
+		 */
+		if (e1000_validate_eeprom_checksum(hw) < 0) {
+			e1000g_log(Adapter, CE_WARN,
+			    "Invalid EEPROM checksum. Please contact "
+			    "the vendor to update the EEPROM.");
+			goto init_fail;
+		}
 	}
 
 #ifdef __sparc
@@ -1172,19 +1180,21 @@
 	/* Setup Interrupt Throttling Register */
 	E1000_WRITE_REG(hw, ITR, Adapter->intr_throttling_rate);
 
-	/*
-	 * Check for link status
-	 */
-	if (e1000g_link_up(Adapter)) {
-		e1000_get_speed_and_duplex(hw, &LineSpeed, &Duplex);
-		Adapter->link_speed = LineSpeed;
-		Adapter->link_duplex = Duplex;
-		Adapter->LinkIsActive = B_TRUE;
+	/* Start the timer for link setup */
+	if (hw->autoneg)
+		link_timeout = PHY_AUTO_NEG_TIME * drv_usectohz(100000);
+	else
+		link_timeout = PHY_FORCE_TIME * drv_usectohz(100000);
+
+	mutex_enter(&Adapter->e1000g_linklock);
+	if (hw->wait_autoneg_complete) {
+		Adapter->link_complete = B_TRUE;
 	} else {
-		Adapter->link_speed = 0;
-		Adapter->link_duplex = 0;
-		Adapter->LinkIsActive = B_FALSE;
+		Adapter->link_complete = B_FALSE;
+		Adapter->link_tid = timeout(e1000g_link_timer,
+		    (void *)Adapter, link_timeout);
 	}
+	mutex_exit(&Adapter->e1000g_linklock);
 
 	/* Enable PCI-Ex master */
 	if (hw->bus_type == e1000_bus_type_pci_express) {
@@ -1359,12 +1369,6 @@
 static int
 e1000g_start(struct e1000g *Adapter)
 {
-	/*
-	 * We set Adapter->PseudoLinkChanged here, so that e1000g_LocalTimer
-	 * will tell the upper network modules about the link state of e1000g
-	 */
-	Adapter->PseudoLinkChanged = B_TRUE;
-
 	if (!(Adapter->attach_progress & ATTACH_PROGRESS_INIT)) {
 		if (e1000g_init(Adapter) != DDI_SUCCESS) {
 			e1000g_log(Adapter, CE_WARN,
@@ -1400,12 +1404,9 @@
 static void
 e1000g_stop(struct e1000g *Adapter)
 {
-	PTX_SW_PACKET packet;
 	timeout_id_t tid;
 	e1000g_tx_ring_t *tx_ring;
-	e1000g_msg_chain_t *msg_chain;
-	mblk_t *mp;
-	mblk_t *nmp;
+	boolean_t link_changed;
 
 	tx_ring = Adapter->tx_ring;
 
@@ -1423,17 +1424,25 @@
 	/* Disable timers */
 	disable_timeout(Adapter);
 
+	/* Disable the tx timer for 82547 chipset */
 	mutex_enter(&tx_ring->tx_lock);
-
 	tx_ring->timer_enable_82547 = B_FALSE;
 	tid = tx_ring->timer_id_82547;
 	tx_ring->timer_id_82547 = 0;
-
 	mutex_exit(&tx_ring->tx_lock);
 
 	if (tid != 0)
 		(void) untimeout(tid);
 
+	/* Disable the link timer */
+	mutex_enter(&Adapter->e1000g_linklock);
+	tid = Adapter->link_tid;
+	Adapter->link_tid = 0;
+	mutex_exit(&Adapter->e1000g_linklock);
+
+	if (tid != 0)
+		(void) untimeout(tid);
+
 	/* Stop the chip and release pending resources */
 	rw_enter(&Adapter->chip_lock, RW_WRITER);
 
@@ -1442,6 +1451,31 @@
 	e1000_reset_hw(&Adapter->Shared);
 
 	/* Release resources still held by the TX descriptors */
+	e1000g_tx_drop(Adapter);
+
+	/* Clean the pending rx jumbo packet fragment */
+	if (Adapter->rx_mblk != NULL) {
+		freemsg(Adapter->rx_mblk);
+		Adapter->rx_mblk = NULL;
+		Adapter->rx_mblk_tail = NULL;
+		Adapter->rx_packet_len = 0;
+	}
+
+	rw_exit(&Adapter->chip_lock);
+}
+
+static void
+e1000g_tx_drop(struct e1000g *Adapter)
+{
+	e1000g_tx_ring_t *tx_ring;
+	e1000g_msg_chain_t *msg_chain;
+	PTX_SW_PACKET packet;
+	mblk_t *mp;
+	mblk_t *nmp;
+	uint32_t packet_count;
+
+	tx_ring = Adapter->tx_ring;
+
 	/*
 	 * Here we don't need to protect the lists using
 	 * the usedlist_lock and freelist_lock, for they
@@ -1449,6 +1483,7 @@
 	 */
 	mp = NULL;
 	nmp = NULL;
+	packet_count = 0;
 	packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(&tx_ring->used_list);
 	while (packet != NULL) {
 		if (packet->mp != NULL) {
@@ -1465,6 +1500,7 @@
 		}
 
 		FreeTxSwPacket(packet);
+		packet_count++;
 
 		packet = (PTX_SW_PACKET)
 		    QUEUE_GET_NEXT(&tx_ring->used_list, &packet->Link);
@@ -1483,20 +1519,20 @@
 		mutex_exit(&msg_chain->lock);
 	}
 
-	QUEUE_APPEND(&tx_ring->free_list, &tx_ring->used_list);
-	QUEUE_INIT_LIST(&tx_ring->used_list);
-
-	/* Clean the pending rx jumbo packet fragment */
-	if (Adapter->rx_mblk != NULL) {
-		freemsg(Adapter->rx_mblk);
-		Adapter->rx_mblk = NULL;
-		Adapter->rx_mblk_tail = NULL;
-		Adapter->rx_packet_len = 0;
+	ddi_intr_trigger_softint(Adapter->tx_softint_handle, NULL);
+
+	if (packet_count > 0) {
+		QUEUE_APPEND(&tx_ring->free_list, &tx_ring->used_list);
+		QUEUE_INIT_LIST(&tx_ring->used_list);
+
+		/* Setup TX descriptor pointers */
+		tx_ring->tbd_next = tx_ring->tbd_first;
+		tx_ring->tbd_oldest = tx_ring->tbd_first;
+
+		/* Setup our HW Tx Head & Tail descriptor pointers */
+		E1000_WRITE_REG(&Adapter->Shared, TDH, 0);
+		E1000_WRITE_REG(&Adapter->Shared, TDT, 0);
 	}
-
-	rw_exit(&Adapter->chip_lock);
-
-	(void) e1000g_tx_freemsg((caddr_t)Adapter, NULL);
 }
 
 static boolean_t
@@ -1535,7 +1571,7 @@
 	return (done);
 }
 
-static boolean_t
+boolean_t
 e1000g_reset(struct e1000g *Adapter)
 {
 	e1000g_stop(Adapter);
@@ -1650,8 +1686,9 @@
  * Return Value:							*
  *									*
  * Functions called:							*
- *	ProcessRxInterruptArray						*
- *	e1000g_LocalTimerWork						*
+ *	e1000g_receive							*
+ *	e1000g_link_check						*
+ *	e1000g_recycle							*
  *									*
  * **********************************************************************
  */
@@ -1690,7 +1727,8 @@
 	if ((ICRContents & E1000_ICR_RXSEQ) ||
 	    (ICRContents & E1000_ICR_LSC) ||
 	    (ICRContents & E1000_ICR_GPI_EN1)) {
-		boolean_t linkstate_changed;
+		boolean_t link_changed;
+		timeout_id_t tid = 0;
 
 		/*
 		 * Encountered RX Sequence Error!!! Link maybe forced and
@@ -1703,14 +1741,29 @@
 		stop_timeout(Adapter);
 
 		mutex_enter(&Adapter->e1000g_linklock);
-		/* e1000g_LocalTimerWork takes care of link status change */
-		linkstate_changed = e1000g_LocalTimerWork(Adapter);
+		/* e1000g_link_check takes care of link status change */
+		link_changed = e1000g_link_check(Adapter);
+		/*
+		 * If the link timer has not timed out, we'll not notify
+		 * the upper layer with any link state until the link
+		 * is up.
+		 */
+		if (link_changed && !Adapter->link_complete) {
+			if (Adapter->link_state == LINK_STATE_UP) {
+				Adapter->link_complete = B_TRUE;
+				tid = Adapter->link_tid;
+				Adapter->link_tid = 0;
+			} else {
+				link_changed = B_FALSE;
+			}
+		}
 		mutex_exit(&Adapter->e1000g_linklock);
 
-		if (linkstate_changed) {
-			mac_link_update(Adapter->mh,
-			    (Adapter->LinkIsActive) ?
-				LINK_STATE_UP : LINK_STATE_DOWN);
+		if (link_changed) {
+			if (tid != 0)
+				(void) untimeout(tid);
+
+			mac_link_update(Adapter->mh, Adapter->link_state);
 		}
 
 		start_timeout(Adapter);
@@ -2425,12 +2478,11 @@
 }
 
 static boolean_t
-e1000g_LocalTimerWork(struct e1000g *Adapter)
+e1000g_link_check(struct e1000g *Adapter)
 {
-	UINT16 LineSpeed, Duplex, phydata;
-	boolean_t linkstate_changed = B_FALSE;
+	uint16_t speed, duplex, phydata;
+	boolean_t link_changed = B_FALSE;
 	struct e1000_hw *hw;
-	e1000g_ether_addr_t ether_addr;
 	uint32_t reg_tarc;
 
 	hw = &Adapter->Shared;
@@ -2439,51 +2491,45 @@
 		/*
 		 * The Link is up, check whether it was marked as down earlier
 		 */
-		if (!Adapter->LinkIsActive) {
-			e1000_get_speed_and_duplex(hw, &LineSpeed, &Duplex);
-			Adapter->link_speed = LineSpeed;
-			Adapter->link_duplex = Duplex;
-
-			if (!Adapter->PseudoLinkChanged) {
-				if ((hw->mac_type == e1000_82571) ||
-				    (hw->mac_type == e1000_82572)) {
-					reg_tarc = E1000_READ_REG(hw, TARC0);
-					if (LineSpeed == SPEED_1000)
-						reg_tarc |= (1 << 21);
-					else
-						reg_tarc &= ~(1 << 21);
-					E1000_WRITE_REG(hw, TARC0, reg_tarc);
-				}
-
-				e1000g_log(Adapter, CE_NOTE,
-				    "Adapter %dMbps %s %s link is up.",
-				    LineSpeed,
-				    ((Duplex == FULL_DUPLEX) ?
-					"full duplex" : "half duplex"),
-				    ((hw->media_type ==
-					e1000_media_type_copper) ?
-					"copper" : "fiber"));
+		if (Adapter->link_state != LINK_STATE_UP) {
+			e1000_get_speed_and_duplex(hw, &speed, &duplex);
+			Adapter->link_speed = speed;
+			Adapter->link_duplex = duplex;
+			Adapter->link_state = LINK_STATE_UP;
+			link_changed = B_TRUE;
+
+			Adapter->tx_link_down_timeout = 0;
+
+			if ((hw->mac_type == e1000_82571) ||
+			    (hw->mac_type == e1000_82572)) {
+				reg_tarc = E1000_READ_REG(hw, TARC0);
+				if (speed == SPEED_1000)
+					reg_tarc |= (1 << 21);
+				else
+					reg_tarc &= ~(1 << 21);
+				E1000_WRITE_REG(hw, TARC0, reg_tarc);
 			}
 
-			Adapter->LinkIsActive = B_TRUE;
-			linkstate_changed = B_TRUE;
+			e1000g_log(Adapter, CE_NOTE,
+			    "Adapter %dMbps %s %s link is up.", speed,
+			    ((duplex == FULL_DUPLEX) ?
+				"full duplex" : "half duplex"),
+			    ((hw->media_type == e1000_media_type_copper) ?
+				"copper" : "fiber"));
 		}
 		Adapter->smartspeed = 0;
 	} else {
-		if (Adapter->LinkIsActive) {
+		if (Adapter->link_state != LINK_STATE_DOWN) {
 			Adapter->link_speed = 0;
 			Adapter->link_duplex = 0;
-
-			if (!Adapter->PseudoLinkChanged) {
-				e1000g_log(Adapter, CE_NOTE,
-				    "Adapter %s link is down.",
-				    ((hw->media_type ==
-					e1000_media_type_copper) ?
-					"copper" : "fiber"));
-			}
-
-			Adapter->LinkIsActive = B_FALSE;
-			linkstate_changed = B_TRUE;
+			Adapter->link_state = LINK_STATE_DOWN;
+			link_changed = B_TRUE;
+
+			e1000g_log(Adapter, CE_NOTE,
+			    "Adapter %s link is down.",
+			    ((hw->media_type == e1000_media_type_copper) ?
+				"copper" : "fiber"));
+
 			/*
 			 * SmartSpeed workaround for Tabor/TanaX, When the
 			 * driver loses link disable auto master/slave
@@ -2506,8 +2552,54 @@
 		} else {
 			e1000g_smartspeed(Adapter);
 		}
+
+		if (Adapter->started) {
+			if (Adapter->tx_link_down_timeout <
+			    MAX_TX_LINK_DOWN_TIMEOUT) {
+				Adapter->tx_link_down_timeout++;
+			} else if (Adapter->tx_link_down_timeout ==
+			    MAX_TX_LINK_DOWN_TIMEOUT) {
+				rw_enter(&Adapter->chip_lock, RW_WRITER);
+				e1000g_tx_drop(Adapter);
+				rw_exit(&Adapter->chip_lock);
+				Adapter->tx_link_down_timeout++;
+			}
+		}
 	}
 
+	return (link_changed);
+}
+
+static void
+e1000g_LocalTimer(void *ws)
+{
+	struct e1000g *Adapter = (struct e1000g *)ws;
+	struct e1000_hw *hw;
+	e1000g_ether_addr_t ether_addr;
+	boolean_t link_changed;
+
+	hw = &Adapter->Shared;
+
+	(void) e1000g_tx_freemsg((caddr_t)Adapter, NULL);
+
+	if (e1000g_stall_check(Adapter)) {
+		e1000g_DEBUGLOG_0(Adapter, e1000g_INFO_LEVEL,
+		    "Tx stall detected. Activate automatic recovery.\n");
+		Adapter->StallWatchdog = 0;
+		Adapter->tx_recycle_fail = 0;
+		Adapter->reset_count++;
+		(void) e1000g_reset(Adapter);
+	}
+
+	link_changed = B_FALSE;
+	mutex_enter(&Adapter->e1000g_linklock);
+	if (Adapter->link_complete)
+		link_changed = e1000g_link_check(Adapter);
+	mutex_exit(&Adapter->e1000g_linklock);
+
+	if (link_changed)
+		mac_link_update(Adapter->mh, Adapter->link_state);
+
 	/*
 	 * With 82571 controllers, any locally administered address will
 	 * be overwritten when there is a reset on the other port.
@@ -2541,7 +2633,8 @@
 	 * These properties should only be set for 10/100
 	 */
 	if ((hw->media_type == e1000_media_type_copper) &&
-	    (Adapter->link_speed != SPEED_1000)) {
+	    ((Adapter->link_speed == SPEED_100) ||
+	    (Adapter->link_speed == SPEED_10))) {
 		e1000_update_adaptive(hw);
 	}
 	/*
@@ -2549,38 +2642,26 @@
 	 */
 	E1000_WRITE_REG(hw, ICS, E1000_IMS_RXT0);
 
-	return (linkstate_changed);
+	restart_timeout(Adapter);
 }
 
+/*
+ * The function e1000g_link_timer() is called when the timer for link setup
+ * is expired, which indicates the completion of the link setup. The link
+ * state will not be updated until the link setup is completed. And the
+ * link state will not be sent to the upper layer through mac_link_update()
+ * in this function. It will be updated in the local timer routine or the
+ * interrupt service routine after the interface is started (plumbed).
+ */
 static void
-e1000g_LocalTimer(void *ws)
+e1000g_link_timer(void *arg)
 {
-	struct e1000g *Adapter = (struct e1000g *)ws;
-	boolean_t linkstate_changed;
-
-	(void) e1000g_tx_freemsg((caddr_t)Adapter, NULL);
-
-	if (e1000g_stall_check(Adapter)) {
-		e1000g_DEBUGLOG_0(Adapter, e1000g_INFO_LEVEL,
-		    "Tx stall detected. Activate automatic recovery.\n");
-		Adapter->StallWatchdog = 0;
-		Adapter->tx_recycle_fail = 0;
-		Adapter->reset_count++;
-		(void) e1000g_reset(Adapter);
-	}
+	struct e1000g *Adapter = (struct e1000g *)arg;
 
 	mutex_enter(&Adapter->e1000g_linklock);
-	linkstate_changed = e1000g_LocalTimerWork(Adapter);
+	Adapter->link_complete = B_TRUE;
+	Adapter->link_tid = 0;
 	mutex_exit(&Adapter->e1000g_linklock);
-
-	if (linkstate_changed || Adapter->PseudoLinkChanged) {
-		mac_link_update(Adapter->mh,
-		    (Adapter->LinkIsActive) ?
-			LINK_STATE_UP : LINK_STATE_DOWN);
-		Adapter->PseudoLinkChanged = B_FALSE;
-	}
-
-	restart_timeout(Adapter);
 }
 
 /*
@@ -3033,7 +3114,7 @@
 static boolean_t
 e1000g_stall_check(struct e1000g *Adapter)
 {
-	if (!Adapter->LinkIsActive)
+	if (Adapter->link_state != LINK_STATE_UP)
 		return (B_FALSE);
 
 	if (Adapter->tx_recycle_fail > 0)
@@ -3360,7 +3441,6 @@
 		if (iocp->ioc_count != sizeof (uint32_t))
 			return (IOC_INVAL);
 
-		Adapter->PseudoLinkChanged = B_TRUE;
 		lbmp = (uint32_t *)mp->b_cont->b_rptr;
 		if (!e1000g_set_loopback_mode(Adapter, *lbmp))
 			return (IOC_INVAL);
--- a/usr/src/uts/common/io/e1000g/e1000g_ndd.c	Wed Apr 18 18:48:41 2007 -0700
+++ b/usr/src/uts/common/io/e1000g/e1000g_ndd.c	Wed Apr 18 20:32:49 2007 -0700
@@ -100,7 +100,8 @@
 { PARAM_LP_10HDX_CAP,	    0, 1, 0,	NULL,	"-lp_10hdx_cap"		},
 
 /* Force Speed and Duplex */
-{ PARAM_FORCE_SPEED_DUPLEX, 1, 4, 4,	NULL,	"?force_speed_duplex"	},
+{ PARAM_FORCE_SPEED_DUPLEX, GDIAG_10_HALF, GDIAG_100_FULL, GDIAG_100_FULL,
+					NULL,	"?force_speed_duplex"	},
 
 /* Current operating modes */
 { PARAM_LINK_STATUS,	    0, 1, 0,	NULL,	"-link_status"		},
@@ -444,10 +445,24 @@
 
 	/* Force Speed and Duplex Parameter */
 	case PARAM_FORCE_SPEED_DUPLEX:
+		switch (Adapter->Shared.forced_speed_duplex) {
+		case e1000_10_half:
+			ndp->ndp_val = GDIAG_10_HALF;
+			break;
+		case e1000_10_full:
+			ndp->ndp_val = GDIAG_10_FULL;
+			break;
+		case e1000_100_half:
+			ndp->ndp_val = GDIAG_100_HALF;
+			break;
+		case e1000_100_full:
+			ndp->ndp_val = GDIAG_100_FULL;
+			break;
+		}
 		break;
 	/* Link States */
 	case PARAM_LINK_STATUS:
-		ndp->ndp_val = Adapter->LinkIsActive;
+		ndp->ndp_val = (Adapter->link_state == LINK_STATE_UP) ? 1 : 0;
 		break;
 	case PARAM_LINK_SPEED:
 		ndp->ndp_val = Adapter->link_speed;
@@ -512,6 +527,7 @@
 	struct e1000g *Adapter;
 	uint16_t autoneg_advertised;
 	uint8_t forced_speed_duplex;
+	boolean_t autoneg_enable;
 	boolean_t link_change;
 
 	Adapter = ndp->ndp_instance;
@@ -519,6 +535,7 @@
 
 	autoneg_advertised = 0;
 	forced_speed_duplex = 0;
+	autoneg_enable = B_FALSE;
 	link_change = B_FALSE;
 
 	rw_enter(&Adapter->chip_lock, RW_WRITER);
@@ -599,16 +616,16 @@
 	}
 
 	switch (Adapter->param_force_speed_duplex) {
-	case 1:
+	case GDIAG_10_HALF:
 		forced_speed_duplex = e1000_10_half;
 		break;
-	case 2:
+	case GDIAG_10_FULL:
 		forced_speed_duplex = e1000_10_full;
 		break;
-	case 3:
+	case GDIAG_100_HALF:
 		forced_speed_duplex = e1000_100_half;
 		break;
-	case 4:
+	case GDIAG_100_FULL:
 		forced_speed_duplex = e1000_100_full;
 		break;
 	default:
@@ -620,6 +637,7 @@
 	/* Auto-Negotiation Advertisement Capabilities */
 	case PARAM_ADV_AUTONEG_CAP:
 		if (value != ndp->ndp_val) {
+			autoneg_enable = (value == 1) ? B_TRUE : B_FALSE;
 			link_change = B_TRUE;
 		}
 		break;
@@ -631,6 +649,7 @@
 				    "adv_autoneg_cap enabled");
 				goto finished;
 			}
+			autoneg_enable = B_TRUE;
 			link_change = B_TRUE;
 			if (value == 1) {
 				autoneg_advertised |= ADVERTISE_1000_FULL;
@@ -647,6 +666,7 @@
 				    "adv_autoneg_cap enabled");
 				goto finished;
 			}
+			autoneg_enable = B_TRUE;
 			link_change = B_TRUE;
 			if (value == 1) {
 				autoneg_advertised |= ADVERTISE_100_FULL;
@@ -663,6 +683,7 @@
 				    "adv_autoneg_cap enabled");
 				goto finished;
 			}
+			autoneg_enable = B_TRUE;
 			link_change = B_TRUE;
 			if (value == 1) {
 				autoneg_advertised |= ADVERTISE_100_HALF;
@@ -679,6 +700,7 @@
 				    "adv_autoneg_cap enabled");
 				goto finished;
 			}
+			autoneg_enable = B_TRUE;
 			link_change = B_TRUE;
 			if (value == 1) {
 				autoneg_advertised |= ADVERTISE_10_FULL;
@@ -695,6 +717,7 @@
 				    "adv_autoneg_cap enabled");
 				goto finished;
 			}
+			autoneg_enable = B_TRUE;
 			link_change = B_TRUE;
 			if (value == 1) {
 				autoneg_advertised |= ADVERTISE_10_HALF;
@@ -711,18 +734,19 @@
 				    "adv_autoneg_cap disabled");
 				goto finished;
 			}
+			autoneg_enable = B_FALSE;
 			link_change = B_TRUE;
 			switch (value) {
-			case 1:
+			case GDIAG_10_HALF:
 				forced_speed_duplex = e1000_10_half;
 				break;
-			case 2:
+			case GDIAG_10_FULL:
 				forced_speed_duplex = e1000_10_full;
 				break;
-			case 3:
+			case GDIAG_100_HALF:
 				forced_speed_duplex = e1000_100_half;
 				break;
-			case 4:
+			case GDIAG_100_FULL:
 				forced_speed_duplex = e1000_100_full;
 				break;
 			default:
@@ -736,9 +760,14 @@
 	}
 
 	if (link_change) {
-		ndp->ndp_val = value;
+		if (autoneg_enable) {
+			if (autoneg_advertised == 0) {
+				e1000g_log(Adapter, CE_WARN,
+				    "ndd set: there must be at least one "
+				    "advertised capability enabled");
+				goto finished;
+			}
 
-		if (Adapter->param_adv_autoneg == 1) {
 			Adapter->Shared.autoneg = B_TRUE;
 			Adapter->Shared.autoneg_advertised =
 				autoneg_advertised;
@@ -748,11 +777,11 @@
 				forced_speed_duplex;
 		}
 
-		if (e1000_setup_link(&Adapter->Shared) != E1000_SUCCESS) {
-			e1000g_log(Adapter, CE_WARN,
-			    "ndd set: update link failed");
-			goto finished;
-		}
+		ndp->ndp_val = value;
+
+		rw_exit(&Adapter->chip_lock);
+		(void) e1000g_reset(Adapter);
+		return;
 	}
 
 finished:
--- a/usr/src/uts/common/io/e1000g/e1000g_stat.c	Wed Apr 18 18:48:41 2007 -0700
+++ b/usr/src/uts/common/io/e1000g/e1000g_stat.c	Wed Apr 18 20:32:49 2007 -0700
@@ -202,7 +202,6 @@
 	e1000g_ksp = (e1000gstat *)ksp->ks_data;
 	ASSERT(e1000g_ksp != NULL);
 
-	e1000g_ksp->link_up.value.ul = Adapter->LinkIsActive;
 	e1000g_ksp->link_speed.value.ul = Adapter->link_speed;
 	e1000g_ksp->rx_none.value.ul = Adapter->rx_none;
 	e1000g_ksp->rx_error.value.ul = Adapter->rx_error;
@@ -870,9 +869,6 @@
 	/*
 	 * Initialize all the statistics
 	 */
-	kstat_named_init(&e1000g_ksp->link_up, "link_up",
-	    KSTAT_DATA_ULONG);
-
 	kstat_named_init(&e1000g_ksp->link_speed, "link_speed",
 	    KSTAT_DATA_ULONG);
 
--- a/usr/src/uts/common/io/e1000g/e1000g_sw.h	Wed Apr 18 18:48:41 2007 -0700
+++ b/usr/src/uts/common/io/e1000g/e1000g_sw.h	Wed Apr 18 20:32:49 2007 -0700
@@ -203,14 +203,12 @@
 #define	E1000G_RX_SW_SENDUP		0x1
 #define	E1000G_RX_SW_DETACHED		0x2
 
-#ifdef e1000g_DEBUG
+/*
+ * By default it will print only to log
+ */
 #define	DEFAULTDEBUGLEVEL		0x004
-#define	DEFAULTDISPLAYONLY		1
+#define	DEFAULTDISPLAYONLY		0
 #define	DEFAULTPRINTONLY		1
-/*
- * By default it will do both i.e. print as well as log
- */
-#endif
 
 /*
  * definitions for smartspeed workaround
@@ -288,6 +286,8 @@
 /* Defines for Tx stall check */
 #define	E1000G_STALL_WATCHDOG_COUNT	8
 
+#define	MAX_TX_LINK_DOWN_TIMEOUT	8
+
 /* Defines for DVMA */
 #ifdef __sparc
 #define	E1000G_DEFAULT_DVMA_PAGE_NUM	2
@@ -673,6 +673,13 @@
 	kmutex_t lock;
 } e1000g_msg_chain_t;
 
+typedef struct _cksum_data {
+	uint32_t ether_header_size;
+	uint32_t cksum_flags;
+	uint32_t cksum_start;
+	uint32_t cksum_stuff;
+} cksum_data_t;
+
 /*
  * MultiCast Command Block (MULTICAST_CB) The multicast
  * structure contains an array of multicast addresses and
@@ -698,7 +705,6 @@
 
 typedef struct _e1000gstat {
 
-	kstat_named_t link_up;		/* Link Status */
 	kstat_named_t link_speed;	/* Link Speed */
 	kstat_named_t rx_none;		/* Rx No Incoming Data */
 	kstat_named_t rx_error;		/* Rx Error in Packet */
@@ -825,10 +831,7 @@
 	/*
 	 * TCP/UDP checksum offload
 	 */
-	uint_t cksum_start;
-	uint_t cksum_stuff;
-	uint_t cksum_flags;
-	uint8_t ether_header_size;
+	cksum_data_t cksum_data;
 	/*
 	 * Timer definitions for 82547
 	 */
@@ -874,10 +877,9 @@
 	struct e1000_hw Shared;
 	struct e1000g_osdep osdep;
 
-	UINT LinkIsActive;
+	link_state_t link_state;
 	UINT link_speed;
 	UINT link_duplex;
-	timeout_id_t WatchDogTimer_id;
 	UINT NumRxDescriptors;
 	UINT NumRxFreeList;
 	UINT NumTxDescriptors;
@@ -898,6 +900,9 @@
 	size_t RxBufferSize;
 	boolean_t intr_adaptive;
 	uint32_t intr_throttling_rate;
+	timeout_id_t WatchDogTimer_id;
+	timeout_id_t link_tid;
+	boolean_t link_complete;
 
 	/*
 	 * The e1000g_timeout_lock must be held when updateing the
@@ -906,9 +911,9 @@
 	 */
 	kmutex_t e1000g_timeout_lock;
 	/*
-	 * link notification order ??? I think it protects the
-	 * link field in struct e1000g (such as LinkIsActive,
-	 * FullDuplex etc) and struct e1000_hw.
+	 * The e1000g_linklock protects the link fields in struct e1000g,
+	 * such as link_state, link_speed, link_duplex, link_complete, and
+	 * link_tid.
 	 */
 	kmutex_t e1000g_linklock;
 	kmutex_t TbiCntrMutex;
@@ -928,6 +933,7 @@
 	uint32_t tx_recycle_low_water;
 	uint32_t tx_recycle_num;
 	uint32_t tx_frags_limit;
+	uint32_t tx_link_down_timeout;
 
 	boolean_t tx_intr_enable;
 	ddi_softint_handle_t tx_softint_handle;
@@ -1006,8 +1012,6 @@
 	 */
 	boolean_t resched_needed;
 
-	boolean_t PseudoLinkChanged;
-
 #ifdef __sparc
 	ulong_t sys_page_sz;
 	uint_t dvma_page_num;
@@ -1084,6 +1088,7 @@
 void SetupTransmitStructures(struct e1000g *Adapter);
 void SetupReceiveStructures(struct e1000g *Adapter);
 void SetupMulticastTable(struct e1000g *Adapter);
+boolean_t e1000g_reset(struct e1000g *Adapter);
 
 int e1000g_recycle(e1000g_tx_ring_t *tx_ring);
 void FreeTxSwPacket(PTX_SW_PACKET packet);
--- a/usr/src/uts/common/io/e1000g/e1000g_tx.c	Wed Apr 18 18:48:41 2007 -0700
+++ b/usr/src/uts/common/io/e1000g/e1000g_tx.c	Wed Apr 18 20:32:49 2007 -0700
@@ -58,9 +58,10 @@
 static boolean_t e1000g_send(struct e1000g *, mblk_t *);
 static int e1000g_tx_copy(struct e1000g *, PTX_SW_PACKET, mblk_t *, uint32_t);
 static int e1000g_tx_bind(struct e1000g *, PTX_SW_PACKET, mblk_t *);
+static boolean_t check_cksum_context(e1000g_tx_ring_t *, cksum_data_t *);
 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *,
-    uint_t, boolean_t);
-static void e1000g_fill_context_descriptor(e1000g_tx_ring_t *,
+    cksum_data_t *);
+static void e1000g_fill_context_descriptor(cksum_data_t *,
     struct e1000_context_desc *);
 static int e1000g_fill_tx_desc(struct e1000g *,
     PTX_SW_PACKET, uint64_t, size_t);
@@ -78,6 +79,7 @@
 #ifndef e1000g_DEBUG
 #pragma inline(e1000g_tx_copy)
 #pragma inline(e1000g_tx_bind)
+#pragma inline(check_cksum_context)
 #pragma inline(e1000g_fill_tx_ring)
 #pragma inline(e1000g_fill_context_descriptor)
 #pragma inline(e1000g_fill_tx_desc)
@@ -182,7 +184,7 @@
 
 	rw_enter(&Adapter->chip_lock, RW_READER);
 
-	if (!Adapter->started) {
+	if (!Adapter->started || (Adapter->link_state != LINK_STATE_UP)) {
 		freemsgchain(mp);
 		mp = NULL;
 	}
@@ -238,12 +240,7 @@
 	mblk_t *nmp;
 	mblk_t *tmp;
 	e1000g_tx_ring_t *tx_ring;
-	/* IP Head/TCP/UDP checksum offload */
-	uint_t cksum_start;
-	uint_t cksum_stuff;
-	uint_t cksum_flags;
-	boolean_t cksum_load;
-	uint8_t ether_header_size;
+	cksum_data_t cksum;
 
 	/* Get the total size and frags number of the message */
 	force_bcopy = 0;
@@ -303,7 +300,8 @@
 	 * If there are many frags of the message, then bcopy them
 	 * into one tx descriptor buffer will get better performance.
 	 */
-	if (frag_count >= Adapter->tx_frags_limit) {
+	if ((frag_count >= Adapter->tx_frags_limit) &&
+	    (msg_size <= Adapter->TxBufferSize)) {
 		Adapter->tx_exceed_frags++;
 		force_bcopy |= FORCE_BCOPY_EXCEED_FRAGS;
 	}
@@ -323,30 +321,14 @@
 	QUEUE_INIT_LIST(&pending_list);
 
 	/* Retrieve checksum info */
-	hcksum_retrieve(mp, NULL, NULL, &cksum_start, &cksum_stuff,
-	    NULL, NULL, &cksum_flags);
-
-	cksum_load = B_FALSE;
-	if (cksum_flags) {
-		if (((struct ether_vlan_header *)mp->b_rptr)->ether_tpid ==
-		    htons(ETHERTYPE_VLAN))
-			ether_header_size = sizeof (struct ether_vlan_header);
-		else
-			ether_header_size = sizeof (struct ether_header);
+	hcksum_retrieve(mp, NULL, NULL, &cksum.cksum_start, &cksum.cksum_stuff,
+	    NULL, NULL, &cksum.cksum_flags);
 
-		if ((ether_header_size != tx_ring->ether_header_size) ||
-		    (cksum_flags != tx_ring->cksum_flags) ||
-		    (cksum_stuff != tx_ring->cksum_stuff) ||
-		    (cksum_start != tx_ring->cksum_start)) {
-
-			tx_ring->ether_header_size = ether_header_size;
-			tx_ring->cksum_flags = cksum_flags;
-			tx_ring->cksum_start = cksum_start;
-			tx_ring->cksum_stuff = cksum_stuff;
-
-			cksum_load = B_TRUE;
-		}
-	}
+	if (((struct ether_vlan_header *)mp->b_rptr)->ether_tpid ==
+	    htons(ETHERTYPE_VLAN))
+		cksum.ether_header_size = sizeof (struct ether_vlan_header);
+	else
+		cksum.ether_header_size = sizeof (struct ether_header);
 
 	/* Process each mblk fragment and fill tx descriptors */
 	packet = NULL;
@@ -440,8 +422,7 @@
 		goto tx_send_failed;
 	}
 
-	desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list,
-	    cksum_flags, cksum_load);
+	desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cksum);
 
 	mutex_exit(&tx_ring->tx_lock);
 
@@ -500,14 +481,36 @@
 	return (B_FALSE);
 }
 
+static boolean_t
+check_cksum_context(e1000g_tx_ring_t *tx_ring, cksum_data_t *cksum)
+{
+	boolean_t cksum_load;
+	cksum_data_t *last;
+
+	cksum_load = B_FALSE;
+	last = &tx_ring->cksum_data;
+
+	if (cksum->cksum_flags != 0) {
+		if ((cksum->ether_header_size != last->ether_header_size) ||
+		    (cksum->cksum_flags != last->cksum_flags) ||
+		    (cksum->cksum_stuff != last->cksum_stuff) ||
+		    (cksum->cksum_start != last->cksum_start)) {
+
+			cksum_load = B_TRUE;
+		}
+	}
+
+	return (cksum_load);
+}
+
 static int
 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list,
-    uint_t cksum_flags, boolean_t cksum_load)
+    cksum_data_t *cksum)
 {
 	struct e1000g *Adapter;
 	PTX_SW_PACKET first_packet;
 	PTX_SW_PACKET packet;
-	struct e1000_context_desc *cksum_desc;
+	boolean_t cksum_load;
 	struct e1000_tx_desc *first_data_desc;
 	struct e1000_tx_desc *next_desc;
 	struct e1000_tx_desc *descriptor;
@@ -519,22 +522,22 @@
 	Adapter = tx_ring->adapter;
 
 	desc_count = 0;
-	cksum_desc = NULL;
+	first_packet = NULL;
 	first_data_desc = NULL;
 	descriptor = NULL;
 
-	first_packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(pending_list);
-	ASSERT(first_packet);
-
 	next_desc = tx_ring->tbd_next;
 
 	/* IP Head/TCP/UDP checksum offload */
+	cksum_load = check_cksum_context(tx_ring, cksum);
+
 	if (cksum_load) {
+		first_packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(pending_list);
+
 		descriptor = next_desc;
 
-		cksum_desc = (struct e1000_context_desc *)descriptor;
-
-		e1000g_fill_context_descriptor(tx_ring, cksum_desc);
+		e1000g_fill_context_descriptor(cksum,
+		    (struct e1000_context_desc *)descriptor);
 
 		/* Check the wrap-around case */
 		if (descriptor == tx_ring->tbd_last)
@@ -545,9 +548,6 @@
 		desc_count++;
 	}
 
-	if (cksum_desc == NULL)
-		first_packet = NULL;
-
 	first_data_desc = next_desc;
 
 	packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(pending_list);
@@ -601,11 +601,11 @@
 
 	ASSERT(descriptor);
 
-	if (cksum_flags) {
-		if (cksum_flags & HCK_IPV4_HDRCKSUM)
+	if (cksum->cksum_flags) {
+		if (cksum->cksum_flags & HCK_IPV4_HDRCKSUM)
 			((struct e1000_data_desc *)first_data_desc)->
 				upper.fields.popts |= E1000_TXD_POPTS_IXSM;
-		if (cksum_flags & HCK_PARTIALCKSUM)
+		if (cksum->cksum_flags & HCK_PARTIALCKSUM)
 			((struct e1000_data_desc *)first_data_desc)->
 				upper.fields.popts |= E1000_TXD_POPTS_TXSM;
 	}
@@ -662,6 +662,10 @@
 	QUEUE_APPEND(&tx_ring->used_list, pending_list);
 	mutex_exit(&tx_ring->usedlist_lock);
 
+	/* Store the cksum data */
+	if (cksum_load)
+		tx_ring->cksum_data = *cksum;
+
 	return (desc_count);
 }
 
@@ -823,9 +827,10 @@
 	}
 
 	/* For TCP/UDP checksum offload */
-	tx_ring->cksum_stuff = 0;
-	tx_ring->cksum_start = 0;
-	tx_ring->cksum_flags = 0;
+	tx_ring->cksum_data.cksum_stuff = 0;
+	tx_ring->cksum_data.cksum_start = 0;
+	tx_ring->cksum_data.cksum_flags = 0;
+	tx_ring->cksum_data.ether_header_size = 0;
 
 	/* Initialize tx parameters */
 	Adapter->tx_bcopy_thresh = DEFAULTTXBCOPYTHRESHOLD;
@@ -1323,22 +1328,22 @@
 }
 
 static void
-e1000g_fill_context_descriptor(e1000g_tx_ring_t *tx_ring,
+e1000g_fill_context_descriptor(cksum_data_t *cksum,
     struct e1000_context_desc *cksum_desc)
 {
-	if (tx_ring->cksum_flags & HCK_IPV4_HDRCKSUM) {
+	if (cksum->cksum_flags & HCK_IPV4_HDRCKSUM) {
 		cksum_desc->lower_setup.ip_fields.ipcss =
-		    tx_ring->ether_header_size;
+		    cksum->ether_header_size;
 		cksum_desc->lower_setup.ip_fields.ipcso =
-		    tx_ring->ether_header_size +
+		    cksum->ether_header_size +
 		    offsetof(struct ip, ip_sum);
 		cksum_desc->lower_setup.ip_fields.ipcse =
-		    tx_ring->ether_header_size +
+		    cksum->ether_header_size +
 		    sizeof (struct ip) - 1;
 	} else
 		cksum_desc->lower_setup.ip_config = 0;
 
-	if (tx_ring->cksum_flags & HCK_PARTIALCKSUM) {
+	if (cksum->cksum_flags & HCK_PARTIALCKSUM) {
 		/*
 		 * The packet with same protocol has the following
 		 * stuff and start offset:
@@ -1350,9 +1355,9 @@
 		 * | IPv6 + UDP |  0x14  |  0x10  |  No
 		 */
 		cksum_desc->upper_setup.tcp_fields.tucss =
-		    tx_ring->cksum_start + tx_ring->ether_header_size;
+		    cksum->cksum_start + cksum->ether_header_size;
 		cksum_desc->upper_setup.tcp_fields.tucso =
-		    tx_ring->cksum_stuff + tx_ring->ether_header_size;
+		    cksum->cksum_stuff + cksum->ether_header_size;
 		cksum_desc->upper_setup.tcp_fields.tucse = 0;
 	} else
 		cksum_desc->upper_setup.tcp_config = 0;