changeset 25428:7f8195cca704

13149 elide squeue wake-ups when prudent Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Ryan Zezeski <ryan.zeseski@joyent.com> Approved by: Dan McDonald <danmcd@joyent.com>
author Patrick Mooney <pmooney@pfmooney.com>
date Thu, 14 Sep 2017 14:16:58 +0000
parents a5e7d71ed1b5
children dcc78dc3a8c8
files usr/src/uts/common/inet/squeue.c usr/src/uts/common/inet/tcp/tcp_socket.c usr/src/uts/common/sys/squeue.h
diffstat 3 files changed, 145 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/usr/src/uts/common/inet/squeue.c	Tue Sep 15 10:14:55 2020 +0100
+++ b/usr/src/uts/common/inet/squeue.c	Thu Sep 14 14:16:58 2017 +0000
@@ -136,6 +136,7 @@
 static void squeue_worker(squeue_t *sqp);
 static void squeue_polling_thread(squeue_t *sqp);
 static void squeue_worker_wakeup(squeue_t *sqp);
+static void squeue_try_drain_one(squeue_t *, conn_t *);
 
 kmem_cache_t *squeue_cache;
 
@@ -402,6 +403,15 @@
 			if (sqp->sq_first == NULL ||
 			    process_flag == SQ_NODRAIN) {
 				/*
+				 * Even if SQ_NODRAIN was specified, it may
+				 * still be best to process a single queued
+				 * item if it matches the active connection.
+				 */
+				if (sqp->sq_first != NULL) {
+					squeue_try_drain_one(sqp, connp);
+				}
+
+				/*
 				 * If work or control actions are pending, wake
 				 * up the worker thread.
 				 */
@@ -589,11 +599,11 @@
 squeue_drain(squeue_t *sqp, uint_t proc_type, hrtime_t expire)
 {
 	mblk_t		*mp;
-	mblk_t 		*head;
-	sqproc_t 	proc;
+	mblk_t		*head;
+	sqproc_t	proc;
 	conn_t		*connp;
 	ill_rx_ring_t	*sq_rx_ring = sqp->sq_rx_ring;
-	hrtime_t 	now;
+	hrtime_t	now;
 	boolean_t	sq_poll_capable;
 	ip_recv_attr_t	*ira, iras;
 
@@ -1347,32 +1357,104 @@
 	}
 }
 
+/*
+ * If possible, attempt to immediately process a single queued request, should
+ * it match the supplied conn_t reference.  This is primarily intended to elide
+ * squeue worker thread wake-ups during local TCP connect() or close()
+ * operations where the response is placed on the squeue during processing.
+ */
+static void
+squeue_try_drain_one(squeue_t *sqp, conn_t *compare_conn)
+{
+	mblk_t *next, *mp = sqp->sq_first;
+	conn_t *connp;
+	sqproc_t proc = (sqproc_t)mp->b_queue;
+	ip_recv_attr_t iras, *ira = NULL;
+
+	ASSERT(MUTEX_HELD(&sqp->sq_lock));
+	ASSERT((sqp->sq_state & SQS_PROC) == 0);
+	ASSERT(sqp->sq_run == NULL);
+	VERIFY(mp != NULL);
+
+	/*
+	 * There is no guarantee that compare_conn references a valid object at
+	 * this time, so under no circumstance may it be deferenced unless it
+	 * matches the squeue entry.
+	 */
+	connp = (conn_t *)mp->b_prev;
+	if (connp != compare_conn) {
+		return;
+	}
+
+	next = mp->b_next;
+	proc = (sqproc_t)mp->b_queue;
+
+	ASSERT(proc != NULL);
+	ASSERT(sqp->sq_count > 0);
+
+	/* Dequeue item from squeue */
+	if (next == NULL) {
+		sqp->sq_first = NULL;
+		sqp->sq_last = NULL;
+	} else {
+		sqp->sq_first = next;
+	}
+	sqp->sq_count--;
+
+	sqp->sq_state |= SQS_PROC;
+	sqp->sq_run = curthread;
+	mutex_exit(&sqp->sq_lock);
+
+	/* Prep mblk_t and retrieve ira if needed */
+	mp->b_prev = NULL;
+	mp->b_queue = NULL;
+	mp->b_next = NULL;
+	if (ip_recv_attr_is_mblk(mp)) {
+		mblk_t	*attrmp = mp;
+
+		ASSERT(attrmp->b_cont != NULL);
+
+		mp = attrmp->b_cont;
+		attrmp->b_cont = NULL;
+
+		ASSERT(mp->b_queue == NULL);
+		ASSERT(mp->b_prev == NULL);
+
+		if (!ip_recv_attr_from_mblk(attrmp, &iras)) {
+			/* ill_t or ip_stack_t disappeared */
+			ip_drop_input("ip_recv_attr_from_mblk", mp, NULL);
+			ira_cleanup(&iras, B_TRUE);
+			CONN_DEC_REF(connp);
+			goto done;
+		}
+		ira = &iras;
+	}
+
+	SQUEUE_DBG_SET(sqp, mp, proc, connp, mp->b_tag);
+	connp->conn_on_sqp = B_TRUE;
+	DTRACE_PROBE3(squeue__proc__start, squeue_t *, sqp, mblk_t *, mp,
+	    conn_t *, connp);
+	(*proc)(connp, mp, sqp, ira);
+	DTRACE_PROBE2(squeue__proc__end, squeue_t *, sqp, conn_t *, connp);
+	connp->conn_on_sqp = B_FALSE;
+	CONN_DEC_REF(connp);
+	SQUEUE_DBG_CLEAR(sqp);
+
+done:
+	mutex_enter(&sqp->sq_lock);
+	sqp->sq_state &= ~(SQS_PROC);
+	sqp->sq_run = NULL;
+}
+
 void
-squeue_synch_exit(conn_t *connp)
+squeue_synch_exit(conn_t *connp, int flag)
 {
 	squeue_t *sqp = connp->conn_sqp;
 
-	mutex_enter(&sqp->sq_lock);
-	if (sqp->sq_run == curthread) {
-		ASSERT(sqp->sq_state & SQS_PROC);
-
-		sqp->sq_state &= ~SQS_PROC;
-		sqp->sq_run = NULL;
-		connp->conn_on_sqp = B_FALSE;
+	ASSERT(flag == SQ_NODRAIN || flag == SQ_PROCESS);
 
-		if (sqp->sq_first != NULL) {
-			/*
-			 * If this was a normal thread, then it would
-			 * (most likely) continue processing the pending
-			 * requests. Since the just completed operation
-			 * was executed synchronously, the thread should
-			 * not be delayed. To compensate, wake up the
-			 * worker thread right away when there are outstanding
-			 * requests.
-			 */
-			squeue_worker_wakeup(sqp);
-		}
-	} else {
+	mutex_enter(&sqp->sq_lock);
+	if (sqp->sq_run != curthread) {
 		/*
 		 * The caller doesn't own the squeue, clear the SQS_PAUSE flag,
 		 * and wake up the squeue owner, such that owner can continue
@@ -1383,6 +1465,24 @@
 
 		/* There should be only one thread blocking on sq_synch_cv. */
 		cv_signal(&sqp->sq_synch_cv);
+		mutex_exit(&sqp->sq_lock);
+		return;
+	}
+
+	ASSERT(sqp->sq_state & SQS_PROC);
+
+	sqp->sq_state &= ~SQS_PROC;
+	sqp->sq_run = NULL;
+	connp->conn_on_sqp = B_FALSE;
+
+	/* If the caller opted in, attempt to process the head squeue item. */
+	if (flag == SQ_PROCESS && sqp->sq_first != NULL) {
+		squeue_try_drain_one(sqp, connp);
+	}
+
+	/* Wake up the worker if further requests are pending. */
+	if (sqp->sq_first != NULL) {
+		squeue_worker_wakeup(sqp);
 	}
 	mutex_exit(&sqp->sq_lock);
 }
--- a/usr/src/uts/common/inet/tcp/tcp_socket.c	Tue Sep 15 10:14:55 2020 +0100
+++ b/usr/src/uts/common/inet/tcp/tcp_socket.c	Thu Sep 14 14:16:58 2017 +0000
@@ -21,6 +21,7 @@
 
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 Joyent, Inc.
  */
 
 /* This file contains all TCP kernel socket related functions. */
@@ -198,7 +199,7 @@
 tcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
     socklen_t len, cred_t *cr)
 {
-	int 		error;
+	int		error;
 	conn_t		*connp = (conn_t *)proto_handle;
 
 	/* All Solaris components should pass a cred for this operation. */
@@ -221,7 +222,7 @@
 		error = tcp_do_bind(connp, sa, len, cr, B_TRUE);
 	}
 
-	squeue_synch_exit(connp);
+	squeue_synch_exit(connp, SQ_NODRAIN);
 
 	if (error < 0) {
 		if (error == -TOUTSTATE)
@@ -239,7 +240,7 @@
 {
 	conn_t	*connp = (conn_t *)proto_handle;
 	tcp_t	*tcp = connp->conn_tcp;
-	int 	error;
+	int	error;
 
 	ASSERT(connp->conn_upper_handle != NULL);
 
@@ -268,7 +269,7 @@
 		else
 			error = proto_tlitosyserr(-error);
 	}
-	squeue_synch_exit(connp);
+	squeue_synch_exit(connp, SQ_NODRAIN);
 	return (error);
 }
 
@@ -332,7 +333,13 @@
 		    connp->conn_upper_handle, &sopp);
 	}
 done:
-	squeue_synch_exit(connp);
+	/*
+	 * Indicate (via SQ_PROCESS) that it is acceptable for the squeue to
+	 * attempt to drain a pending request relevant to this connection when
+	 * exiting the synchronous context.  This can improve the performance
+	 * and efficiency of TCP connect(2) operations to localhost.
+	 */
+	squeue_synch_exit(connp, SQ_PROCESS);
 
 	return ((error == 0) ? EINPROGRESS : error);
 }
@@ -401,7 +408,7 @@
 	}
 
 	len = tcp_opt_get(connp, level, option_name, optvalp_buf);
-	squeue_synch_exit(connp);
+	squeue_synch_exit(connp, SQ_NODRAIN);
 
 	if (len == -1) {
 		kmem_free(optvalp_buf, max_optbuf_len);
@@ -462,14 +469,14 @@
 		if (error < 0) {
 			error = proto_tlitosyserr(-error);
 		}
-		squeue_synch_exit(connp);
+		squeue_synch_exit(connp, SQ_NODRAIN);
 		return (error);
 	}
 
 	error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
 	    NULL, cr);
-	squeue_synch_exit(connp);
+	squeue_synch_exit(connp, SQ_NODRAIN);
 
 	ASSERT(error >= 0);
 
@@ -645,7 +652,7 @@
 		}
 	}
 
-	squeue_synch_exit(connp);
+	squeue_synch_exit(connp, SQ_NODRAIN);
 }
 
 /* ARGSUSED */
@@ -653,7 +660,7 @@
 tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
     int mode, int32_t *rvalp, cred_t *cr)
 {
-	conn_t  	*connp = (conn_t *)proto_handle;
+	conn_t		*connp = (conn_t *)proto_handle;
 	int		error;
 
 	ASSERT(connp->conn_upper_handle != NULL);
@@ -818,7 +825,7 @@
 	struct stroptions	*stropt;
 	struct T_capability_ack tca;
 	struct sockaddr_in6	laddr, faddr;
-	socklen_t 		laddrlen, faddrlen;
+	socklen_t		laddrlen, faddrlen;
 	short			opts;
 	int			error;
 	mblk_t			*mp, *mpnext;
@@ -992,7 +999,7 @@
     sock_quiesce_arg_t *arg)
 {
 	tcp_t			*tcp;
-	conn_t 			*connp = (conn_t *)proto_handle;
+	conn_t			*connp = (conn_t *)proto_handle;
 	int			error;
 	mblk_t			*stropt_mp;
 	mblk_t			*ordrel_mp;
@@ -1051,7 +1058,7 @@
 	 * There should be atleast two ref's (IP + TCP)
 	 */
 	ASSERT(connp->conn_ref >= 2);
-	squeue_synch_exit(connp);
+	squeue_synch_exit(connp, SQ_NODRAIN);
 
 	return (0);
 }
--- a/usr/src/uts/common/sys/squeue.h	Tue Sep 15 10:14:55 2020 +0100
+++ b/usr/src/uts/common/sys/squeue.h	Thu Sep 14 14:16:58 2017 +0000
@@ -86,7 +86,7 @@
 
 struct conn_s;
 extern int squeue_synch_enter(struct conn_s *, mblk_t *);
-extern void squeue_synch_exit(struct conn_s *);
+extern void squeue_synch_exit(struct conn_s *, int);
 
 #ifdef	__cplusplus
 }