changeset 10828:562a05fd0a2d

6846343 sctp_icmp_error() drops ICMP_FRAGMENTATION_NEEDED msg from ip, preventing sctp from sending mss. 6598652 Potential SCTP receive dead lock with zero window
author George Shepherd <George.Shepherd@Sun.COM>
date Wed, 21 Oct 2009 15:05:54 -0700
parents 8b1a667fca45
children ef1d46805087
files usr/src/uts/common/inet/ip/ip.c usr/src/uts/common/inet/ip/ip6.c usr/src/uts/common/inet/sctp/sctp.c usr/src/uts/common/inet/sctp/sctp_impl.h usr/src/uts/common/inet/sctp/sctp_input.c
diffstat 5 files changed, 53 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/inet/ip/ip.c	Wed Oct 21 15:58:21 2009 -0400
+++ b/usr/src/uts/common/inet/ip/ip.c	Wed Oct 21 15:05:54 2009 -0700
@@ -2203,7 +2203,7 @@
 		return (B_FALSE);
 	}
 	/*
-	 * Verify we have atleast ICMP_MIN_TP_HDR_LENGTH bytes of transport
+	 * Verify we have at least ICMP_MIN_TP_HDR_LENGTH bytes of transport
 	 * header.
 	 */
 	if ((uchar_t *)ipha + hdr_length + ICMP_MIN_TP_HDR_LEN >
@@ -2547,13 +2547,13 @@
 
 	case IPPROTO_SCTP:
 		/*
-		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
-		 * transport header.
-		 */
-		if ((uchar_t *)ipha + hdr_length + ICMP_MIN_TP_HDR_LEN >
+		 * Verify we have at least ICMP_MIN_SCTP_HDR_LEN bytes of
+		 * transport header, in the first mp.
+		 */
+		if ((uchar_t *)ipha + hdr_length + ICMP_MIN_SCTP_HDR_LEN >
 		    mp->b_wptr) {
 			if (!pullupmsg(mp, (uchar_t *)ipha + hdr_length +
-			    ICMP_MIN_TP_HDR_LEN - mp->b_rptr)) {
+			    ICMP_MIN_SCTP_HDR_LEN - mp->b_rptr)) {
 				goto discard_pkt;
 			}
 			icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
--- a/usr/src/uts/common/inet/ip/ip6.c	Wed Oct 21 15:58:21 2009 -0400
+++ b/usr/src/uts/common/inet/ip/ip6.c	Wed Oct 21 15:05:54 2009 -0700
@@ -79,6 +79,7 @@
 #include <inet/tcp.h>
 #include <inet/tcp_impl.h>
 #include <inet/udp_impl.h>
+#include <inet/sctp/sctp_impl.h>
 #include <inet/ipp_common.h>
 
 #include <inet/ip_multi.h>
@@ -860,12 +861,15 @@
 	}
 	case IPPROTO_SCTP:
 		/*
-		 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
-		 * the SCTP header to get the port information.
-		 */
-		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
+		 * Verify we have at least ICMP_MIN_SCTP_HDR_LEN bytes of
+		 * transport header to get the port information.
+		 */
+		if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_SCTP_HDR_LEN >
 		    mp->b_wptr) {
-			break;
+			if (!pullupmsg(mp, (uchar_t *)ip6h + hdr_length +
+			    ICMP_MIN_SCTP_HDR_LEN - mp->b_rptr)) {
+				goto drop_pkt;
+			}
 		}
 
 		up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
--- a/usr/src/uts/common/inet/sctp/sctp.c	Wed Oct 21 15:58:21 2009 -0400
+++ b/usr/src/uts/common/inet/sctp/sctp.c	Wed Oct 21 15:05:54 2009 -0700
@@ -1134,11 +1134,14 @@
 		return;
 	}
 
+	/* account for the ip hdr from the icmp message */
 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
+	/* now the ip hdr of message resulting in this icmp */
 	ipha = (ipha_t *)&icmph[1];
 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
 	sctph = (sctp_hdr_t *)((char *)ipha + iph_hdr_length);
+	/* first_mp must expose the full sctp header. */
 	if ((uchar_t *)(sctph + 1) >= mp->b_wptr) {
 		/* not enough data for SCTP header */
 		freemsg(first_mp);
@@ -1185,7 +1188,15 @@
 			fp->sfa_pmss = (new_mtu - sctp->sctp_hdr_len) &
 			    ~(SCTP_ALIGN - 1);
 			fp->pmtu_discovered = 1;
-
+			/*
+			 * It is possible, even likely that a fast retransmit
+			 * attempt has been dropped by ip as a result of this
+			 * error, retransmission bundles as much as possible.
+			 * A retransmit here prevents significant delays waiting
+			 * on the timer. Analogous to behaviour of TCP after
+			 * ICMP too big.
+			 */
+			sctp_rexmit(sctp, fp);
 			break;
 		case ICMP_PORT_UNREACHABLE:
 		case ICMP_PROTOCOL_UNREACHABLE:
--- a/usr/src/uts/common/inet/sctp/sctp_impl.h	Wed Oct 21 15:58:21 2009 -0400
+++ b/usr/src/uts/common/inet/sctp/sctp_impl.h	Wed Oct 21 15:05:54 2009 -0700
@@ -191,6 +191,7 @@
 #define	SCTP_MAX_COMBINED_HEADER_LENGTH	(60 + 12) /* Maxed out ip + sctp */
 #define	SCTP_MAX_IP_OPTIONS_LENGTH	(60 - IP_SIMPLE_HDR_LENGTH)
 #define	SCTP_MAX_HDR_LENGTH		60
+#define	ICMP_MIN_SCTP_HDR_LEN	(ICMP_MIN_TP_HDR_LEN + sizeof (sctp_hdr_t))
 
 #define	SCTP_SECRET_LEN	16
 
--- a/usr/src/uts/common/inet/sctp/sctp_input.c	Wed Oct 21 15:58:21 2009 -0400
+++ b/usr/src/uts/common/inet/sctp/sctp_input.c	Wed Oct 21 15:05:54 2009 -0700
@@ -1248,8 +1248,14 @@
 
 	dlen = ntohs(dc->sdh_len) - sizeof (*dc);
 
-	/* Check for buffer space */
-	if (sctp->sctp_rwnd - sctp->sctp_rxqueued < dlen) {
+	/*
+	 * Check for buffer space. Note if this is the next expected TSN
+	 * we have to take it to avoid deadlock because we cannot deliver
+	 * later queued TSNs and thus clear buffer space without it.
+	 * We drop anything that is purely zero window probe data here.
+	 */
+	if ((sctp->sctp_rwnd - sctp->sctp_rxqueued < dlen) &&
+	    (tsn != sctp->sctp_ftsn || sctp->sctp_rwnd == 0)) {
 		/* Drop and SACK, but don't advance the cumulative TSN. */
 		sctp->sctp_force_sack = 1;
 		dprint(0, ("sctp_data_chunk: exceed rwnd %d rxqueued %d "
@@ -1404,9 +1410,8 @@
 	 */
 	dlen = dmp->b_wptr - (uchar_t *)dc - sizeof (*dc);
 	for (pmp = dmp->b_cont; pmp != NULL; pmp = pmp->b_cont)
-		dlen += pmp->b_wptr - pmp->b_rptr;
+		dlen += MBLKL(pmp);
 	ASSERT(sctp->sctp_rxqueued >= dlen);
-	ASSERT(sctp->sctp_rwnd >= dlen);
 
 	/* Deliver the message. */
 	sctp->sctp_rxqueued -= dlen;
@@ -1424,9 +1429,15 @@
 			dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA;
 			new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
 			    msgdsize(dmp), 0, &error, NULL);
-			if (new_rwnd > sctp->sctp_rwnd) {
+			/*
+			 * Since we always deliver the next TSN data chunk,
+			 * we may buffer a little more than allowed. In
+			 * that case, just mark the window as 0.
+			 */
+			if (new_rwnd < 0)
+				sctp->sctp_rwnd = 0;
+			else if (new_rwnd > sctp->sctp_rwnd)
 				sctp->sctp_rwnd = new_rwnd;
-			}
 			SCTP_ACK_IT(sctp, tsn);
 		} else {
 			/* Just free the message if we don't have memory. */
@@ -1488,10 +1499,9 @@
 		 */
 		dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
 		for (pmp = dmp->b_cont; pmp; pmp = pmp->b_cont)
-			dlen += pmp->b_wptr - pmp->b_rptr;
+			dlen += MBLKL(pmp);
 
 		ASSERT(sctp->sctp_rxqueued >= dlen);
-		ASSERT(sctp->sctp_rwnd >= dlen);
 
 		sctp->sctp_rxqueued -= dlen;
 		if (can_deliver) {
@@ -1508,9 +1518,10 @@
 				    0 : SCTP_PARTIAL_DATA;
 				new_rwnd = sctp->sctp_ulp_recv(sctp->sctp_ulpd,
 				    dmp, msgdsize(dmp), 0, &error, NULL);
-				if (new_rwnd > sctp->sctp_rwnd) {
+				if (new_rwnd < 0)
+					sctp->sctp_rwnd = 0;
+				else if (new_rwnd > sctp->sctp_rwnd)
 					sctp->sctp_rwnd = new_rwnd;
-				}
 				SCTP_ACK_IT(sctp, tsn);
 			} else {
 				freemsg(dmp);
@@ -2150,7 +2161,7 @@
 			dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
 			for (pmp = dmp->b_cont; pmp != NULL;
 			    pmp = pmp->b_cont) {
-				dlen += pmp->b_wptr - pmp->b_rptr;
+				dlen += MBLKL(pmp);
 			}
 			if (can_deliver) {
 				int32_t	nrwnd;
@@ -2172,7 +2183,9 @@
 					nrwnd = sctp->sctp_ulp_recv(
 					    sctp->sctp_ulpd, dmp, msgdsize(dmp),
 					    0, &error, NULL);
-					if (nrwnd > sctp->sctp_rwnd)
+					if (nrwnd < 0)
+						sctp->sctp_rwnd = 0;
+					else if (nrwnd > sctp->sctp_rwnd)
 						sctp->sctp_rwnd = nrwnd;
 				} else {
 					/*