changeset 9992:ad70ef992a0e

6846545 tcp urg data handling is broken for fused streams based endpoints
author Anders Persson <Anders.Persson@Sun.COM>
date Mon, 29 Jun 2009 13:59:57 -0700
parents 5944be7176ec
children 24a332cb9e29
files usr/src/uts/common/inet/tcp/tcp.c usr/src/uts/common/inet/tcp/tcp_fusion.c
diffstat 2 files changed, 71 insertions(+), 72 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/inet/tcp/tcp.c	Mon Jun 29 13:59:56 2009 -0700
+++ b/usr/src/uts/common/inet/tcp/tcp.c	Mon Jun 29 13:59:57 2009 -0700
@@ -21700,8 +21700,7 @@
 	if ((mp->b_wptr - rptr) >= sizeof (t_scalar_t)) {
 		type = ((union T_primitives *)rptr)->type;
 		if (type == T_EXDATA_REQ) {
-			tcp_output_urgent(connp, mp->b_cont, arg2);
-			freeb(mp);
+			tcp_output_urgent(connp, mp, arg2);
 		} else if (type != T_DATA_REQ) {
 			goto non_urgent_data;
 		} else {
@@ -26721,9 +26720,8 @@
 	}
 
 	/*
-	 * Try to force urgent data out on the wire.
-	 * Even if we have unsent data this will
-	 * at least send the urgent flag.
+	 * Try to force urgent data out on the wire. Even if we have unsent
+	 * data this will at least send the urgent flag.
 	 * XXX does not handle more flag correctly.
 	 */
 	len += tcp->tcp_unsent;
@@ -26734,6 +26732,14 @@
 	/* Bypass tcp protocol for fused tcp loopback */
 	if (tcp->tcp_fused && tcp_fuse_output(tcp, mp, msize))
 		return;
+
+	/* Strip off the T_EXDATA_REQ if the data is from TPI */
+	if (DB_TYPE(mp) != M_DATA) {
+		mblk_t *mp1 = mp;
+		ASSERT(!IPCL_IS_NONSTR(connp));
+		mp = mp->b_cont;
+		freeb(mp1);
+	}
 	tcp_wput_data(tcp, mp, B_TRUE);
 }
 
@@ -26920,7 +26926,6 @@
 	int			error;
 	mblk_t			*stropt_mp;
 	mblk_t			*ordrel_mp;
-	mblk_t			*fused_sigurp_mp;
 
 	tcp = connp->conn_tcp;
 
@@ -26935,9 +26940,6 @@
 	((struct T_ordrel_ind *)ordrel_mp->b_rptr)->PRIM_type = T_ORDREL_IND;
 	ordrel_mp->b_wptr += sizeof (struct T_ordrel_ind);
 
-	/* Pre-allocate the M_PCSIG used by fusion */
-	fused_sigurp_mp = allocb_wait(1, BPRI_HI, STR_NOSIG, NULL);
-
 	/*
 	 * Enter the squeue so that no new packets can come in
 	 */
@@ -26946,7 +26948,6 @@
 		/* failed to enter, free all the pre-allocated messages. */
 		freeb(stropt_mp);
 		freeb(ordrel_mp);
-		freeb(fused_sigurp_mp);
 		/*
 		 * We cannot process the eager, so at least send out a
 		 * RST so the peer can reconnect.
@@ -26959,19 +26960,19 @@
 	}
 
 	/*
+	 * Both endpoints must be of the same type (either STREAMS or
+	 * non-STREAMS) for fusion to be enabled. So if we are fused,
+	 * we have to unfuse.
+	 */
+	if (tcp->tcp_fused)
+		tcp_unfuse(tcp);
+
+	/*
 	 * No longer a direct socket
 	 */
 	connp->conn_flags &= ~IPCL_NONSTR;
-
 	tcp->tcp_ordrel_mp = ordrel_mp;
 
-	if (tcp->tcp_fused) {
-		ASSERT(tcp->tcp_fused_sigurg_mp == NULL);
-		tcp->tcp_fused_sigurg_mp = fused_sigurp_mp;
-	} else {
-		freeb(fused_sigurp_mp);
-	}
-
 	if (tcp->tcp_listener != NULL) {
 		/* The eager will deal with opts when accept() is called */
 		freeb(stropt_mp);
--- a/usr/src/uts/common/inet/tcp/tcp_fusion.c	Mon Jun 29 13:59:56 2009 -0700
+++ b/usr/src/uts/common/inet/tcp/tcp_fusion.c	Mon Jun 29 13:59:57 2009 -0700
@@ -226,15 +226,17 @@
 
 	/*
 	 * We can only proceed if peer exists, resides in the same squeue
-	 * as our conn and is not raw-socket.  The squeue assignment of
-	 * this eager tcp was done earlier at the time of SYN processing
-	 * in ip_fanout_tcp{_v6}.  Note that similar squeues by itself
-	 * doesn't guarantee a safe condition to fuse, hence we perform
+	 * as our conn and is not raw-socket. We also restrict fusion to
+	 * endpoints of the same type (STREAMS or non-STREAMS). The squeue
+	 * assignment of this eager tcp was done earlier at the time of SYN
+	 * processing in ip_fanout_tcp{_v6}.  Note that similar squeues by
+	 * itself doesn't guarantee a safe condition to fuse, hence we perform
 	 * additional tests below.
 	 */
 	ASSERT(peer_connp == NULL || peer_connp != connp);
 	if (peer_connp == NULL || peer_connp->conn_sqp != connp->conn_sqp ||
-	    !IPCL_IS_TCP(peer_connp)) {
+	    !IPCL_IS_TCP(peer_connp) ||
+	    IPCL_IS_NONSTR(connp) != IPCL_IS_NONSTR(peer_connp)) {
 		if (peer_connp != NULL) {
 			TCP_STAT(tcps, tcp_fusion_unqualified);
 			CONN_DEC_REF(peer_connp);
@@ -289,23 +291,19 @@
 		 * endpoints which will only be used during/after unfuse.
 		 */
 		if (!IPCL_IS_NONSTR(tcp->tcp_connp)) {
-			if ((mp = allocb(1, BPRI_HI)) == NULL)
-				goto failed;
+			ASSERT(!IPCL_IS_NONSTR(peer_tcp->tcp_connp));
 
-			tcp->tcp_fused_sigurg_mp = mp;
-		}
-
-		if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp)) {
 			if ((mp = allocb(1, BPRI_HI)) == NULL)
 				goto failed;
-
-			peer_tcp->tcp_fused_sigurg_mp = mp;
-		}
+			tcp->tcp_fused_sigurg_mp = mp;
 
-		if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp) &&
-		    (mp = allocb(sizeof (struct stroptions),
-		    BPRI_HI)) == NULL) {
-			goto failed;
+			if ((mp = allocb(1, BPRI_HI)) == NULL)
+				goto failed;
+			peer_tcp->tcp_fused_sigurg_mp = mp;
+
+			if ((mp = allocb(sizeof (struct stroptions),
+			    BPRI_HI)) == NULL)
+				goto failed;
 		}
 
 		/* Fuse both endpoints */
@@ -458,8 +456,9 @@
 }
 
 /*
- * Fusion output routine for urgent data.  This routine is called by
- * tcp_fuse_output() for handling non-M_DATA mblks.
+ * Fusion output routine used to handle urgent data sent by STREAMS based
+ * endpoints. This routine is called by tcp_fuse_output() for handling
+ * non-M_DATA mblks.
  */
 void
 tcp_fuse_output_urg(tcp_t *tcp, mblk_t *mp)
@@ -471,6 +470,7 @@
 	tcp_stack_t	*tcps = tcp->tcp_tcps;
 
 	ASSERT(tcp->tcp_fused);
+	ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
 	ASSERT(peer_tcp != NULL && peer_tcp->tcp_loopback_peer == tcp);
 	ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO);
 	ASSERT(mp->b_cont != NULL && DB_TYPE(mp->b_cont) == M_DATA);
@@ -924,7 +924,8 @@
 
 	DTRACE_PROBE2(tcp__fuse__output, tcp_t *, tcp, uint_t, send_size);
 
-	if (!TCP_IS_DETACHED(peer_tcp)) {
+	if (!IPCL_IS_NONSTR(peer_tcp->tcp_connp) &&
+	    !TCP_IS_DETACHED(peer_tcp)) {
 		/*
 		 * Drain the peer's receive queue it has urgent data or if
 		 * we're not flow-controlled.  There is no need for draining
@@ -932,8 +933,7 @@
 		 * will pull the data via tcp_fuse_rrw().
 		 */
 		if (urgent || (!flow_stopped && !peer_tcp->tcp_direct_sockfs)) {
-			ASSERT(IPCL_IS_NONSTR(peer_tcp->tcp_connp) ||
-			    peer_tcp->tcp_rcv_list != NULL);
+			ASSERT(peer_tcp->tcp_rcv_list != NULL);
 			/*
 			 * For TLI-based streams, a thread in tcp_accept_swap()
 			 * can race with us.  That thread will ensure that the
@@ -995,41 +995,39 @@
 	 * works properly.
 	 */
 	if (tcp->tcp_fused_sigurg) {
+		ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
+
 		tcp->tcp_fused_sigurg = B_FALSE;
-		if (IPCL_IS_NONSTR(connp)) {
-			(*connp->conn_upcalls->su_signal_oob)
-			    (connp->conn_upper_handle, 0);
-		} else {
+		/*
+		 * sigurg_mpp is normally NULL, i.e. when we're still
+		 * fused and didn't get here because of tcp_unfuse().
+		 * In this case try hard to allocate the M_PCSIG mblk.
+		 */
+		if (sigurg_mpp == NULL &&
+		    (mp = allocb(1, BPRI_HI)) == NULL &&
+		    (mp = allocb_tryhard(1)) == NULL) {
+			/* Alloc failed; try again next time */
+			tcp->tcp_push_tid = TCP_TIMER(tcp,
+			    tcp_push_timer,
+			    MSEC_TO_TICK(
+			    tcps->tcps_push_timer_interval));
+			return (B_TRUE);
+		} else if (sigurg_mpp != NULL) {
 			/*
-			 * sigurg_mpp is normally NULL, i.e. when we're still
-			 * fused and didn't get here because of tcp_unfuse().
-			 * In this case try hard to allocate the M_PCSIG mblk.
+			 * Use the supplied M_PCSIG mblk; it means we're
+			 * either unfused or in the process of unfusing,
+			 * and the drain must happen now.
 			 */
-			if (sigurg_mpp == NULL &&
-			    (mp = allocb(1, BPRI_HI)) == NULL &&
-			    (mp = allocb_tryhard(1)) == NULL) {
-				/* Alloc failed; try again next time */
-				tcp->tcp_push_tid = TCP_TIMER(tcp,
-				    tcp_push_timer,
-				    MSEC_TO_TICK(
-				    tcps->tcps_push_timer_interval));
-				return (B_TRUE);
-			} else if (sigurg_mpp != NULL) {
-				/*
-				 * Use the supplied M_PCSIG mblk; it means we're
-				 * either unfused or in the process of unfusing,
-				 * and the drain must happen now.
-				 */
-				mp = *sigurg_mpp;
-				*sigurg_mpp = NULL;
-			}
-			ASSERT(mp != NULL);
+			mp = *sigurg_mpp;
+			*sigurg_mpp = NULL;
+		}
+		ASSERT(mp != NULL);
 
-			/* Send up the signal */
-			DB_TYPE(mp) = M_PCSIG;
-			*mp->b_wptr++ = (uchar_t)SIGURG;
-			putnext(q, mp);
-		}
+		/* Send up the signal */
+		DB_TYPE(mp) = M_PCSIG;
+		*mp->b_wptr++ = (uchar_t)SIGURG;
+		putnext(q, mp);
+
 		/*
 		 * Let the regular tcp_rcv_drain() path handle
 		 * draining the data if we're no longer fused.