changeset 2933:b83c1115488b

6440263 M_PROTO msg associated with OOB Byte eats up one Byte of TCP receive window
author ss146032
date Tue, 17 Oct 2006 00:27:18 -0700
parents 9882da59a45c
children 5745c107c2e3
files usr/src/uts/common/fs/sockfs/sockstr.c
diffstat 1 files changed, 56 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/fs/sockfs/sockstr.c	Mon Oct 16 17:04:12 2006 -0700
+++ b/usr/src/uts/common/fs/sockfs/sockstr.c	Tue Oct 17 00:27:18 2006 -0700
@@ -1975,6 +1975,10 @@
 
 	case T_EXDATA_IND: {
 		mblk_t		*mctl, *mdata;
+		mblk_t *lbp;
+		union T_primitives *tprp;
+		struct stdata   *stp;
+		queue_t *qp;
 
 		if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
 			zcmn_err(getzoneid(), CE_WARN,
@@ -2019,6 +2023,58 @@
 		mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
 		mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
 
+		stp = vp->v_stream;
+		ASSERT(stp != NULL);
+		qp = _RD(stp->sd_wrq);
+
+		mutex_enter(QLOCK(qp));
+		lbp = qp->q_last;
+
+		/*
+		 * We want to avoid queueing up a string of T_EXDATA_IND
+		 * messages with no intervening data messages at the stream
+		 * head. These messages contribute to the total message
+		 * count. Eventually this can lead to STREAMS flow contol
+		 * and also cause TCP to advertise a zero window condition
+		 * to the peer. This can happen in the degenerate case where
+		 * the sender and receiver exchange only OOB data. The sender
+		 * only sends messages with MSG_OOB flag and the receiver
+		 * receives only MSG_OOB messages and does not use SO_OOBINLINE.
+		 * An example of this scenario has been reported in applications
+		 * that use OOB data to exchange heart beats. Flow control
+		 * relief will never happen if the application only reads OOB
+		 * data which is done directly by sorecvoob() and the
+		 * T_EXDATA_IND messages at the streamhead won't be consumed.
+		 * Note that there is no correctness issue in compressing the
+		 * string of T_EXDATA_IND messages into a single T_EXDATA_IND
+		 * message. A single read that does not specify MSG_OOB will
+		 * read across all the marks in a loop in sotpi_recvmsg().
+		 * Each mark is individually distinguishable only if the
+		 * T_EXDATA_IND messages are separated by data messages.
+		 */
+		if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) {
+			tprp = (union T_primitives *)lbp->b_rptr;
+			if ((tprp->type == T_EXDATA_IND) &&
+			    !(so->so_options & SO_OOBINLINE)) {
+
+				/*
+				 * free the new M_PROTO message
+				 */
+				freemsg(mctl);
+
+				/*
+				 * adjust the OOB count and OOB	signal count
+				 * just incremented for the new OOB data.
+				 */
+				so->so_oobcnt--;
+				so->so_oobsigcnt--;
+				mutex_exit(QLOCK(qp));
+				mutex_exit(&so->so_lock);
+				return (NULL);
+			}
+		}
+		mutex_exit(QLOCK(qp));
+
 		/*
 		 * Pass the T_EXDATA_IND and the M_DATA back separately
 		 * by using b_next linkage. (The stream head will queue any