changeset 9888:3d27daea2cbf

6845913 fr_make_icmp_*() uses TH_SYN/TH_FIN for testing fin_flx - it's not the intention 6827271 ipfilter TCP state emulation ends up in 5/0 state (Established/Closed) 6562745 Adapt a better TCP statemachine emulation (fr_tcp_age()) from upstream version
author Alexandr Nedvedicky <Alexandr.Nedvedicky@Sun.COM>
date Wed, 17 Jun 2009 10:42:08 +0200
parents 6b176a50433b
children 68d0fe4c716e
files usr/src/uts/common/inet/ipf/ip_fil_solaris.c usr/src/uts/common/inet/ipf/ip_state.c usr/src/uts/common/inet/ipf/netinet/ip_compat.h
diffstat 3 files changed, 115 insertions(+), 64 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c	Wed Jun 17 12:00:49 2009 +0800
+++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c	Wed Jun 17 10:42:08 2009 +0200
@@ -2277,6 +2277,7 @@
 fr_info_t *fin;
 {
 	struct in_addr tmp_src;
+	tcphdr_t *tcp;
 	struct icmp *icmp;
 	mblk_t *mblk_icmp;
 	mblk_t *mblk_ip;
@@ -2294,8 +2295,10 @@
 	 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
 	 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
 	 */
+	tcp = (tcphdr_t *) fin->fin_dp;
+
 	if ((fin->fin_p == IPPROTO_TCP) && 
-	    !(fin->fin_flx & (TH_SYN | TH_FIN)))
+	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
 		return (-1);
 
 	/*
@@ -2467,6 +2470,7 @@
 fr_info_t *fin;
 {
 	struct icmp6_hdr *icmp6;
+	tcphdr_t *tcp;
 	struct in6_addr	tmp_src6;
 	size_t icmp_pld_len;
 	mblk_t *mblk_ip, *mblk_icmp;
@@ -2478,8 +2482,10 @@
 	 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
 	 * IP stack. If it is not SYN/FIN, then we must drop it silently.
 	 */
-	if (fin->fin_p == IPPROTO_TCP &&
-	    !(fin->fin_flx & (TH_SYN | TH_FIN)))
+	tcp = (tcphdr_t *) fin->fin_dp;
+
+	if ((fin->fin_p == IPPROTO_TCP) && 
+	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
 		return (-1);
 
 	/*
--- a/usr/src/uts/common/inet/ipf/ip_state.c	Wed Jun 17 12:00:49 2009 +0800
+++ b/usr/src/uts/common/inet/ipf/ip_state.c	Wed Jun 17 10:42:08 2009 +0200
@@ -1660,10 +1660,10 @@
 	 * entry to be created with a retransmited SYN packet.
 	 */
 	if ((tcp->th_flags & TH_OPENING) == TH_SYN) {
-		if (((is->is_state[source] > IPF_TCPS_ESTABLISHED) ||
-		    (is->is_state[source] == IPF_TCPS_CLOSED)) &&
-		    ((is->is_state[!source] > IPF_TCPS_ESTABLISHED) ||
-		    (is->is_state[!source] == IPF_TCPS_CLOSED))) {
+		if ((is->is_state[source] > IPF_TCPS_ESTABLISHED) &&
+		    (is->is_state[!source] > IPF_TCPS_ESTABLISHED)) {
+			is->is_state[source] = IPF_TCPS_CLOSED;
+			is->is_state[!source] = IPF_TCPS_CLOSED;
 			/*
 			 * Do not update is->is_sti.tqe_die in case state entry
 			 * is already present in deletetq. It prevents state
@@ -3532,19 +3532,36 @@
 	tcpflags = tcp->th_flags;
 	dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
 
+	ostate = tqe->tqe_state[1 - dir];
+	nstate = tqe->tqe_state[dir];
+
+	DTRACE_PROBE4(
+		indata,
+		fr_info_t *, fin,
+		int, ostate,
+		int, nstate,
+		u_char, tcpflags
+	);
+
 	if (tcpflags & TH_RST) {
 		if (!(tcpflags & TH_PUSH) && !dlen)
 			nstate = IPF_TCPS_CLOSED;
 		else
 			nstate = IPF_TCPS_CLOSE_WAIT;
+
+		/*
+		 * Once RST is received, we must advance peer's state to
+		 * CLOSE_WAIT.
+		 */
+		if (ostate <= IPF_TCPS_ESTABLISHED) {
+			tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT;
+		}
 		rval = 1;
 	} else {
-		ostate = tqe->tqe_state[1 - dir];
-		nstate = tqe->tqe_state[dir];
 
 		switch (nstate)
 		{
-		case IPF_TCPS_CLOSED: /* 0 */
+		case IPF_TCPS_LISTEN: /* 0 */
 			if ((tcpflags & TH_OPENING) == TH_OPENING) {
 				/*
 				 * 'dir' received an S and sends SA in
@@ -3572,7 +3589,7 @@
 				 */
 				switch (ostate)
 				{
-				case IPF_TCPS_CLOSED :
+				case IPF_TCPS_LISTEN :
 				case IPF_TCPS_SYN_RECEIVED :
 					nstate = IPF_TCPS_HALF_ESTAB;
 					rval = 1;
@@ -3593,11 +3610,7 @@
 			 */
 			break;
 
-		case IPF_TCPS_LISTEN: /* 1 */
-			/* NOT USED */
-			break;
-
-		case IPF_TCPS_SYN_SENT: /* 2 */
+		case IPF_TCPS_SYN_SENT: /* 1 */
 			if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) {
 				/*
 				 * A retransmitted SYN packet.  We do not reset
@@ -3638,7 +3651,7 @@
 			}
 			break;
 
-		case IPF_TCPS_SYN_RECEIVED: /* 3 */
+		case IPF_TCPS_SYN_RECEIVED: /* 2 */
 			if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
 				/*
 				 * we see an A from 'dir' which was in
@@ -3667,17 +3680,38 @@
 			}
 			break;
 
-		case IPF_TCPS_HALF_ESTAB: /* 4 */
-			if (ostate >= IPF_TCPS_HALF_ESTAB) {
-				if ((tcpflags & TH_ACKMASK) == TH_ACK) {
+		case IPF_TCPS_HALF_ESTAB: /* 3 */
+			if (tcpflags & TH_FIN) {
+				nstate = IPF_TCPS_FIN_WAIT_1;
+				rval = 1;
+			} else if ((tcpflags & TH_ACKMASK) == TH_ACK) {
+				/*
+				 * If we've picked up a connection in mid
+				 * flight, we could be looking at a follow on
+				 * packet from the same direction as the one
+				 * that created this state.  Recognise it but
+				 * do not advance the entire connection's
+				 * state.
+				 */
+				switch (ostate)
+				{
+				case IPF_TCPS_LISTEN :
+				case IPF_TCPS_SYN_SENT :
+				case IPF_TCPS_SYN_RECEIVED :
+					rval = 1;
+					break;
+				case IPF_TCPS_HALF_ESTAB :
+				case IPF_TCPS_ESTABLISHED :
 					nstate = IPF_TCPS_ESTABLISHED;
+					rval = 1;
+					break;
+				default :
+					break;
 				}
 			}
-			rval = 1;
-				
 			break;
 
-		case IPF_TCPS_ESTABLISHED: /* 5 */
+		case IPF_TCPS_ESTABLISHED: /* 4 */
 			rval = 1;
 			if (tcpflags & TH_FIN) {
 				/*
@@ -3685,7 +3719,11 @@
 				 * this gives us a half-closed connection;
 				 * ESTABLISHED -> FIN_WAIT_1
 				 */
-				nstate = IPF_TCPS_FIN_WAIT_1;
+				if (ostate == IPF_TCPS_FIN_WAIT_1) {
+					nstate = IPF_TCPS_CLOSING;
+				} else {
+					nstate = IPF_TCPS_FIN_WAIT_1;
+				}
 			} else if (tcpflags & TH_ACK) {
 				/*
 				 * an ACK, should we exclude other flags here?
@@ -3710,7 +3748,7 @@
 			}
 			break;
 
-		case IPF_TCPS_CLOSE_WAIT: /* 6 */
+		case IPF_TCPS_CLOSE_WAIT: /* 5 */
 			rval = 1;
 			if (tcpflags & TH_FIN) {
 				/*
@@ -3728,7 +3766,7 @@
 			}
 			break;
 
-		case IPF_TCPS_FIN_WAIT_1: /* 7 */
+		case IPF_TCPS_FIN_WAIT_1: /* 6 */
 			rval = 1;
 			if ((tcpflags & TH_ACK) &&
 			    ostate > IPF_TCPS_CLOSE_WAIT) {
@@ -3737,14 +3775,14 @@
 				 * it has sent us a FIN packet that we are
 				 * ack'ing now with an ACK; this means both
 				 * sides have now closed the connection and
-				 * we go into TIME_WAIT
+				 * we go into LAST_ACK
 				 */
 				/*
 				 * XXX: how do we know we really are ACKing
 				 * the FIN packet here? does the window code
 				 * guarantee that?
 				 */
-				nstate = IPF_TCPS_TIME_WAIT;
+				nstate = IPF_TCPS_LAST_ACK;
 			} else {
 				/*
 				 * we closed our side of the connection
@@ -3756,11 +3794,14 @@
 			}
 			break;
 
-		case IPF_TCPS_CLOSING: /* 8 */
-			/* NOT USED */
+		case IPF_TCPS_CLOSING: /* 7 */
+			if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) {
+				nstate = IPF_TCPS_TIME_WAIT;
+			}
+			rval = 1;
 			break;
 
-		case IPF_TCPS_LAST_ACK: /* 9 */
+		case IPF_TCPS_LAST_ACK: /* 8 */
 			/*
 			 * We want to reset timer here to keep state in table.
 			 * If we would allow the state to time out here, while
@@ -3771,33 +3812,28 @@
 			rval = 1;
 			break;
 
-		case IPF_TCPS_FIN_WAIT_2: /* 10 */
-			rval = 1;
-			if ((tcpflags & TH_OPENING) == TH_OPENING)
-				nstate = IPF_TCPS_SYN_RECEIVED;
-			else if (tcpflags & TH_SYN)
-				nstate = IPF_TCPS_SYN_SENT;
+		case IPF_TCPS_FIN_WAIT_2: /* 9 */
+			/* NOT USED */
 			break;
 
-		case IPF_TCPS_TIME_WAIT: /* 11 */
+		case IPF_TCPS_TIME_WAIT: /* 10 */
 			/* we're in 2MSL timeout now */
-			rval = 1;
+			if (ostate == IPF_TCPS_LAST_ACK) {
+				nstate = IPF_TCPS_CLOSED;
+				rval = 1;
+			} else {
+				rval = 2;
+			}
+			break;
+
+		case IPF_TCPS_CLOSED: /* 11 */
+			rval = 2;
 			break;
 
 		default :
 #if defined(_KERNEL)
-# if SOLARIS
-#  ifdef IPFDEBUG
-			cmn_err(CE_NOTE,
-				"tcp %lx flags %x si %lx nstate %d ostate %d\n",
-				(u_long)tcp, tcpflags, (u_long)tqe,
-				nstate, ostate);
-#  endif
-# else
-			printf("tcp %lx flags %x si %lx nstate %d ostate %d\n",
-				(u_long)tcp, tcpflags, (u_long)tqe,
-				nstate, ostate);
-# endif
+			ASSERT(nstate >= IPF_TCPS_LISTEN &&
+			    nstate <= IPF_TCPS_CLOSED);
 #else
 			abort();
 #endif
@@ -3809,10 +3845,19 @@
 	 * If rval == 2 then do not update the queue position, but treat the
 	 * packet as being ok.
 	 */
-	if (rval == 2)
+	if (rval == 2) {
+		DTRACE_PROBE1(state_keeping_timer, int, nstate);
 		rval = 1;
+	}
 	else if (rval == 1) {
 		tqe->tqe_state[dir] = nstate;
+		/*
+		 * The nstate can either advance to a new state, or remain
+		 * unchanged, resetting the timer by moving to the bottom of
+		 * the queue.
+		 */
+		DTRACE_PROBE1(state_done, int, nstate);
+
 		if ((tqe->tqe_flags & TQE_RULEBASED) == 0)
 			fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs);
 	}
--- a/usr/src/uts/common/inet/ipf/netinet/ip_compat.h	Wed Jun 17 12:00:49 2009 +0800
+++ b/usr/src/uts/common/inet/ipf/netinet/ip_compat.h	Wed Jun 17 10:42:08 2009 +0200
@@ -2403,21 +2403,21 @@
 /*
  * TCP States
  */
-#define IPF_TCPS_CLOSED		0	/* closed */
-#define IPF_TCPS_LISTEN		1	/* listening for connection */
-#define IPF_TCPS_SYN_SENT	2	/* active, have sent syn */
-#define IPF_TCPS_SYN_RECEIVED	3	/* have send and received syn */
-#define IPF_TCPS_HALF_ESTAB	4	/* for connections not fully "up" */
+#define IPF_TCPS_LISTEN		0	/* listening for connection */
+#define IPF_TCPS_SYN_SENT	1	/* active, have sent syn */
+#define IPF_TCPS_SYN_RECEIVED	2	/* have send and received syn */
+#define IPF_TCPS_HALF_ESTAB	3	/* for connections not fully "up" */
 /* states < IPF_TCPS_ESTABLISHED are those where connections not established */
-#define IPF_TCPS_ESTABLISHED	5	/* established */
-#define IPF_TCPS_CLOSE_WAIT	6	/* rcvd fin, waiting for close */
+#define IPF_TCPS_ESTABLISHED	4	/* established */
+#define IPF_TCPS_CLOSE_WAIT	5	/* rcvd fin, waiting for close */
 /* states > IPF_TCPS_CLOSE_WAIT are those where user has closed */
-#define IPF_TCPS_FIN_WAIT_1	7	/* have closed, sent fin */
-#define IPF_TCPS_CLOSING	8	/* closed xchd FIN; await FIN ACK */
-#define IPF_TCPS_LAST_ACK	9	/* had fin and close; await FIN ACK */
+#define IPF_TCPS_FIN_WAIT_1	6	/* have closed, sent fin */
+#define IPF_TCPS_CLOSING	7	/* closed xchd FIN; await FIN ACK */
+#define IPF_TCPS_LAST_ACK	8	/* had fin and close; await FIN ACK */
 /* states > IPF_TCPS_CLOSE_WAIT && < IPF_TCPS_FIN_WAIT_2 await ACK of FIN */
-#define IPF_TCPS_FIN_WAIT_2	10	/* have closed, fin is acked */
-#define IPF_TCPS_TIME_WAIT	11	/* in 2*msl quiet wait after close */
+#define IPF_TCPS_FIN_WAIT_2	9	/* have closed, fin is acked */
+#define IPF_TCPS_TIME_WAIT	10	/* in 2*msl quiet wait after close */
+#define IPF_TCPS_CLOSED		11	/* closed */
 #define IPF_TCP_NSTATES		12
 
 #define	TCP_MSL			120