changeset 13870:387db3e6d543

3304 need workaround for QEMU bug that induces bad e1000g checksums Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Reviewed by: Richard Lowe <richlowe@richlowe.net> Reviewed by: Garrett D'Amore <garrett@damore.org> Approved by: Eric Schrock <Eric.Schrock@delphix.com>
author Bryan Cantrill <bryan@joyent.com>
date Wed, 31 Oct 2012 05:51:14 -0700
parents 921a99998bb4
children a9c12c2c1647
files usr/src/uts/common/io/e1000g/e1000g_tx.c
diffstat 1 files changed, 35 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/io/e1000g/e1000g_tx.c	Mon Oct 29 12:08:09 2012 -0500
+++ b/usr/src/uts/common/io/e1000g/e1000g_tx.c	Wed Oct 31 05:51:14 2012 -0700
@@ -24,6 +24,10 @@
  */
 
 /*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+/*
  * **********************************************************************
  *									*
  * Module Name:								*
@@ -610,6 +614,7 @@
 	struct e1000_tx_desc *first_data_desc;
 	struct e1000_tx_desc *next_desc;
 	struct e1000_tx_desc *descriptor;
+	struct e1000_data_desc zeroed;
 	int desc_count;
 	boolean_t buff_overrun_flag;
 	int i;
@@ -624,6 +629,7 @@
 	first_packet = NULL;
 	packet = NULL;
 	buff_overrun_flag = B_FALSE;
+	zeroed.upper.data = 0;
 
 	next_desc = tx_ring->tbd_next;
 
@@ -649,6 +655,32 @@
 
 	first_data_desc = next_desc;
 
+	/*
+	 * According to the documentation, the packet options field (POPTS) is
+	 * "ignored except on the first data descriptor of a packet."  However,
+	 * there is a bug in QEMU (638955) whereby the POPTS field within a
+	 * given data descriptor is used to interpret that data descriptor --
+	 * regardless of whether or not the descriptor is the first in a packet
+	 * or not.  For a packet that spans multiple descriptors, the (virtual)
+	 * HW checksum (either TCP/UDP or IP or both) will therefore _not_ be
+	 * performed on descriptors after the first, resulting in incorrect
+	 * checksums and mysteriously dropped/retransmitted packets.  Other
+	 * drivers do not have this issue because they (harmlessly) set the
+	 * POPTS field on every data descriptor to be the intended options for
+	 * the entire packet.  To circumvent this QEMU bug, we engage in this
+	 * same behavior iff the subsystem vendor and device IDs indicate that
+	 * this is an emulated QEMU device (1af4,1100).
+	 */
+	if (hw->subsystem_vendor_id == 0x1af4 &&
+	    hw->subsystem_device_id == 0x1100 &&
+	    cur_context->cksum_flags) {
+		if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM)
+			zeroed.upper.fields.popts |= E1000_TXD_POPTS_IXSM;
+
+		if (cur_context->cksum_flags & HCK_PARTIALCKSUM)
+			zeroed.upper.fields.popts |= E1000_TXD_POPTS_TXSM;
+	}
+
 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list);
 	while (packet) {
 		ASSERT(packet->num_desc);
@@ -663,7 +695,7 @@
 			    packet->desc[i].length;
 
 			/* Zero out status */
-			descriptor->upper.data = 0;
+			descriptor->upper.data = zeroed.upper.data;
 
 			descriptor->lower.data |=
 			    E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
@@ -708,7 +740,7 @@
 				    E1000_TX_BUFFER_OEVRRUN_THRESHOLD;
 
 				/* Zero out status */
-				next_desc->upper.data = 0;
+				next_desc->upper.data = zeroed.upper.data;
 
 				next_desc->lower.data |=
 				    E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
@@ -771,7 +803,7 @@
 		next_desc->lower.data = 4;
 
 		/* Zero out status */
-		next_desc->upper.data = 0;
+		next_desc->upper.data = zeroed.upper.data;
 		/* It must be part of a LSO packet */
 		next_desc->lower.data |=
 		    E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D |