changeset 3844:8c345d3f2fd4

6459954 dls_ether_soft_ring_fanout should not always assume traffic is TCP/UDP. 6510853 softring delivers out of order packets
author krgopi
date Fri, 16 Mar 2007 16:11:15 -0700
parents 6501a40c8d2a
children cbc537e53089
files usr/src/uts/common/inet/ip/ip_squeue.c usr/src/uts/common/io/dld/dld_proto.c usr/src/uts/common/io/dls/dls_soft_ring.c usr/src/uts/common/sys/dlpi.h usr/src/uts/common/sys/dls_impl.h usr/src/uts/common/sys/dls_soft_ring.h
diffstat 6 files changed, 135 insertions(+), 58 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/inet/ip/ip_squeue.c	Fri Mar 16 14:05:13 2007 -0700
+++ b/usr/src/uts/common/inet/ip/ip_squeue.c	Fri Mar 16 16:11:15 2007 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -583,7 +583,7 @@
 	mutex_exit(&ill->ill_lock);
 
 	ill_soft_ring->ill_dls_change_status(ill_soft_ring->ill_tx_handle,
-	    SOFT_RING_SRC_HASH);
+	    SOFT_RING_FANOUT);
 
 	mutex_enter(&ill->ill_lock);
 	ill->ill_state_flags &= ~ILL_SOFT_RING_ASSIGN;
--- a/usr/src/uts/common/io/dld/dld_proto.c	Fri Mar 16 14:05:13 2007 -0700
+++ b/usr/src/uts/common/io/dld/dld_proto.c	Fri Mar 16 16:11:15 2007 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1693,7 +1693,7 @@
 		rx = (dsp->ds_mode == DLD_FASTPATH) ?
 			    dld_str_rx_fastpath : dld_str_rx_unitdata;
 	} else {
-		rx = (dls_rx_t)dls_ether_soft_ring_fanout;
+		rx = (dls_rx_t)dls_soft_ring_fanout;
 	}
 	dls_soft_ring_rx_set(dsp->ds_dc, rx, dsp, type);
 }
--- a/usr/src/uts/common/io/dls/dls_soft_ring.c	Fri Mar 16 14:05:13 2007 -0700
+++ b/usr/src/uts/common/io/dls/dls_soft_ring.c	Fri Mar 16 16:11:15 2007 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -55,9 +55,13 @@
 #include <sys/callb.h>
 #include <sys/sdt.h>
 #include <sys/ddi.h>
+#include <sys/strsun.h>
 #include <sys/strsubr.h>
 #include <inet/common.h>
 #include <inet/ip.h>
+#include <inet/ipsec_impl.h>
+#include <inet/sadb.h>
+#include <inet/ipsecah.h>
 
 #include <sys/dls_impl.h>
 #include <sys/dls_soft_ring.h>
@@ -326,26 +330,20 @@
  * appropriate places.
  */
 /* ARGSUSED */
-void
-soft_ring_process(soft_ring_t *ringp, mblk_t *mp_chain, uint8_t tag)
+static void
+soft_ring_process(soft_ring_t *ringp,
+    mblk_t *mp_chain, mblk_t *tail, uint_t count)
 {
 	void 		*arg1, *arg2;
 	s_ring_proc_t	proc;
-	mblk_t		*tail;
-	int		cnt = 1;
 
 	ASSERT(ringp != NULL);
 	ASSERT(mp_chain != NULL);
 	ASSERT(MUTEX_NOT_HELD(&ringp->s_ring_lock));
 
-	tail = mp_chain;
-	while (tail->b_next != NULL) {
-		tail = tail->b_next;
-		cnt++;
-	}
 	mutex_enter(&ringp->s_ring_lock);
 
-	ringp->s_ring_total_inpkt += cnt;
+	ringp->s_ring_total_inpkt += count;
 	if (!(ringp->s_ring_state & S_RING_PROC) &&
 	    !(ringp->s_ring_type == S_RING_WORKER_ONLY)) {
 		/*
@@ -353,7 +351,7 @@
 		 * first packet, do inline processing else queue the
 		 * packet and do the drain.
 		 */
-		if (ringp->s_ring_first == NULL && cnt == 1) {
+		if (ringp->s_ring_first == NULL && count == 1) {
 			/*
 			 * Fast-path, ok to process and nothing queued.
 			 */
@@ -385,7 +383,7 @@
 				return;
 			}
 		} else {
-			SOFT_RING_ENQUEUE_CHAIN(ringp, mp_chain, tail, cnt);
+			SOFT_RING_ENQUEUE_CHAIN(ringp, mp_chain, tail, count);
 		}
 
 		/*
@@ -409,9 +407,9 @@
 		 */
 		if (ringp->s_ring_count > soft_ring_max_q_cnt) {
 			freemsgchain(mp_chain);
-			DLS_BUMP_STAT(dlss_soft_ring_pkt_drop, cnt);
+			DLS_BUMP_STAT(dlss_soft_ring_pkt_drop, count);
 		} else
-			SOFT_RING_ENQUEUE_CHAIN(ringp, mp_chain, tail, cnt);
+			SOFT_RING_ENQUEUE_CHAIN(ringp, mp_chain, tail, count);
 		if (!(ringp->s_ring_state & S_RING_PROC)) {
 			SOFT_RING_WORKER_WAKEUP(ringp);
 		} else {
@@ -530,7 +528,6 @@
 	dls_impl_t  *dip = (dls_impl_t *)dc;
 
 	rw_enter(&(dip->di_lock), RW_WRITER);
-	dip->di_soft_ring_fanout_type = type;
 	dip->di_rx = rx;
 	if (type == SOFT_RING_NONE)
 		dip->di_rx_arg = arg;
@@ -626,41 +623,126 @@
 	return (B_TRUE);
 }
 
-#define	COMPUTE_HASH(key, sz)	(key % sz)
+int dls_bad_ip_pkt = 0;
 
+static mblk_t *
+dls_skip_mblk(mblk_t *bp, mblk_t *mp, int *skip_lenp)
+{
+	while (MBLKL(bp) <= *skip_lenp) {
+		*skip_lenp -= MBLKL(bp);
+		bp = bp->b_cont;
+		if (bp == NULL) {
+			dls_bad_ip_pkt++;
+			freemsg(mp);
+			return (NULL);
+		}
+	}
+	return (bp);
+}
+
+#define	HASH32(x) (((x) >> 24) ^ ((x) >> 16) ^ ((x) >> 8) ^ (x))
+#define	COMPUTE_INDEX(key, sz)	(key % sz)
+
+/*
+ * dls_soft_ring_fanout():
+ */
 /* ARGSUSED */
 void
-dls_ether_soft_ring_fanout(void *rx_handle, void *rx_cookie, mblk_t *mp_chain,
+dls_soft_ring_fanout(void *rx_handle, void *rx_cookie, mblk_t *mp_chain,
     mac_header_info_t *mhip)
 {
-	ipha_t		*ipha = (ipha_t *)mp_chain->b_rptr;
+	mblk_t		*mp, *bp, *head, *tail;
+	ipha_t		*ipha;
 	dls_impl_t	*dip = (dls_impl_t *)rx_handle;
-	int		indx;
-	int		key;
-	int		hdr_len;
-	uint16_t	port1, port2;
+	int		indx, saved_indx;
+	int		hash = 0;
+	int		skip_len;
+	uint8_t		protocol;
+	int		count = 0;
+
+	head = tail = NULL;
+
+	while (mp_chain != NULL) {
+		bp = mp = mp_chain;
+		mp_chain = mp_chain->b_next;
+		mp->b_next = NULL;
+		if (MBLKL(mp) < sizeof (ipha_t)) {
+			if ((mp = msgpullup(mp, sizeof (ipha_t))) == NULL) {
+				/* Let's toss this away */
+				dls_bad_ip_pkt++;
+				freemsg(mp);
+				continue;
+			}
+			bp = mp;
+		}
+
+		ipha = (ipha_t *)mp->b_rptr;
+		skip_len = IPH_HDR_LENGTH(ipha);
+		protocol = ipha->ipha_protocol;
+	again:
+		switch (protocol) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+		case IPPROTO_SCTP:
+		case IPPROTO_ESP:
+			/*
+			 * Note that for ESP, we fanout on SPI and it is at the
+			 * same offset as the 2x16-bit ports. So it is clumped
+			 * along with TCP, UDP and SCTP.
+			 */
+			if (MBLKL(bp) <= skip_len) {
+				bp = dls_skip_mblk(bp, mp, &skip_len);
+				if (bp == NULL)
+					continue;
+			}
+
+			hash = HASH32(*(uint32_t *)(bp->b_rptr + skip_len));
+			break;
 
-	switch (dip->di_soft_ring_fanout_type) {
-	case SOFT_RING_SRC_HASH:
-		/*
-		 * We get a chain of packets from the same remote. Make
-		 * sure the same remote goes to same ring.
-		 */
-		hdr_len = IPH_HDR_LENGTH(ipha);
-		port1 = *((uint16_t *)(&mp_chain->b_rptr[hdr_len]));
-		port2 = *((uint16_t *)(&mp_chain->b_rptr[hdr_len+2]));
-		key = port1 + port2;
-		indx = COMPUTE_HASH(key, dip->di_soft_ring_size);
-		soft_ring_process(dip->di_soft_ring_list[indx],
-		    mp_chain, 0);
-		break;
-	case SOFT_RING_RND_ROBIN:
-	case SOFT_RING_RANDOM:
-		/*
-		 * Just send it to any possible soft ring
-		 */
-		soft_ring_process(dip->di_soft_ring_list[
-			lbolt % dip->di_soft_ring_size], mp_chain, 0);
-		break;
+		case IPPROTO_AH: {
+			ah_t *ah;
+			uint_t ah_length;
+
+			if (MBLKL(bp) <= skip_len) {
+				bp = dls_skip_mblk(bp, mp, &skip_len);
+				if (bp == NULL)
+					continue;
+			}
+
+			ah = (ah_t *)(bp->b_rptr + skip_len);
+			protocol = ah->ah_nexthdr;
+			ah_length = AH_TOTAL_LEN(ah);
+			skip_len += ah_length;
+			goto again;
+		}
+
+		default:
+			/*
+			 * Send the packet to a ring based on src/dest addresses
+			 */
+			hash =
+			    (HASH32(ipha->ipha_src) ^ HASH32(ipha->ipha_dst));
+			break;
+		}
+
+		indx = COMPUTE_INDEX(hash, dip->di_soft_ring_size);
+		if (head == NULL) {
+			saved_indx = indx;
+			head = tail = mp;
+			count++;
+		} else if (indx == saved_indx) {
+			tail->b_next = mp;
+			tail = mp;
+			count++;
+		} else {
+			soft_ring_process(dip->di_soft_ring_list[saved_indx],
+			    head, tail, count);
+			head = tail = mp;
+			saved_indx = indx;
+			count = 1;
+		}
 	}
+	if (head != NULL)
+		soft_ring_process(dip->di_soft_ring_list[saved_indx],
+		    head, tail, count);
 }
--- a/usr/src/uts/common/sys/dlpi.h	Fri Mar 16 14:05:13 2007 -0700
+++ b/usr/src/uts/common/sys/dlpi.h	Fri Mar 16 16:11:15 2007 -0700
@@ -19,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -749,9 +749,7 @@
 
 /* Soft_Ring fanout types (used by soft_ring_change_status) */
 #define	SOFT_RING_NONE			0x00
-#define	SOFT_RING_RANDOM		0x01
-#define	SOFT_RING_SRC_HASH		0x02
-#define	SOFT_RING_RND_ROBIN		0x03
+#define	SOFT_RING_FANOUT		0x01
 
 #endif /* _KERNEL */
 
--- a/usr/src/uts/common/sys/dls_impl.h	Fri Mar 16 14:05:13 2007 -0700
+++ b/usr/src/uts/common/sys/dls_impl.h	Fri Mar 16 16:11:15 2007 -0700
@@ -106,7 +106,6 @@
 	uint8_t				di_unicst_addr[MAXMACADDRLEN];
 	soft_ring_t			**di_soft_ring_list;
 	uint_t				di_soft_ring_size;
-	int				di_soft_ring_fanout_type;
 	zoneid_t			di_zid;
 	dls_impl_t			*di_next_impl;
 };
--- a/usr/src/uts/common/sys/dls_soft_ring.h	Fri Mar 16 14:05:13 2007 -0700
+++ b/usr/src/uts/common/sys/dls_soft_ring.h	Fri Mar 16 16:11:15 2007 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -83,11 +83,9 @@
 extern soft_ring_t **soft_ring_set_create(char *, processorid_t, clock_t,
     uint_t, pri_t, int);
 extern void soft_ring_set_destroy(soft_ring_t **, int);
-extern void soft_ring_process(soft_ring_t *, mblk_t *, uint8_t);
 extern void soft_ring_bind(void *, processorid_t);
 extern void soft_ring_unbind(void *);
-extern void dls_ether_soft_ring_fanout(void *, void *, mblk_t *,
-    mac_header_info_t *);
+extern void dls_soft_ring_fanout(void *, void *, mblk_t *, mac_header_info_t *);
 extern boolean_t dls_soft_ring_enable(dls_channel_t, dl_capab_dls_t *);
 extern void dls_soft_ring_disable(dls_channel_t);
 extern boolean_t dls_soft_ring_workers(dls_channel_t);