changeset 22964:7f4439b04050

lib-master: If connect() to backend UNIX socket is retried, log a warning For example if imap-login process needs to retry before it successfully connects to imap process's socket, a warning is logged. This warning is important because it means that the imap-login process may have been sleeping up to 0.5 seconds and causing all the other connections to hang during it. It would be better to make this retrying asynchronous, but before spending time on doing that, lets see if this warning is ever even being logged.
author Timo Sirainen <timo.sirainen@dovecot.fi>
date Fri, 13 Apr 2018 16:14:18 +0300
parents 09248aab2b42
children a382709de5e8
files src/lib-master/master-auth.c
diffstat 1 files changed, 25 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-master/master-auth.c	Fri Apr 13 16:12:27 2018 +0300
+++ b/src/lib-master/master-auth.c	Fri Apr 13 16:14:18 2018 +0300
@@ -5,6 +5,7 @@
 #include "fdpass.h"
 #include "buffer.h"
 #include "hash.h"
+#include "time-util.h"
 #include "master-service-private.h"
 #include "master-auth.h"
 
@@ -12,6 +13,7 @@
 #include <sys/stat.h>
 
 #define SOCKET_CONNECT_RETRY_MSECS 500
+#define SOCKET_CONNECT_RETRY_WARNING_INTERVAL_SECS 2
 #define MASTER_AUTH_REQUEST_TIMEOUT_MSECS (MASTER_LOGIN_TIMEOUT_SECS/2*1000)
 
 struct master_auth_connection {
@@ -35,6 +37,7 @@
 	pool_t pool;
 
 	const char *default_path;
+	time_t last_connect_warning;
 
 	unsigned int tag_counter;
 	HASH_TABLE(void *, struct master_auth_connection *) connections;
@@ -185,8 +188,28 @@
 	buffer_append(buf, &req, sizeof(req));
 	buffer_append(buf, params->data, req.data_size);
 
-	conn->fd = net_connect_unix_with_retries(conn->path,
-						 SOCKET_CONNECT_RETRY_MSECS);
+	conn->fd = net_connect_unix(conn->path);
+	if (conn->fd == -1 && errno == EAGAIN) {
+		/* Couldn't connect to the socket immediately. This will add
+		   a delay that causes hangs to the whole process, which won't
+		   be obvious unless we log a warning. FIXME: The wait could
+		   be asynchronous. */
+		struct timeval start_time;
+
+		io_loop_time_refresh();
+		start_time = ioloop_timeval;
+		conn->fd = net_connect_unix_with_retries(conn->path,
+			SOCKET_CONNECT_RETRY_MSECS);
+		io_loop_time_refresh();
+		if (conn->fd != -1 &&
+		    ioloop_time - auth->last_connect_warning >=
+		    SOCKET_CONNECT_RETRY_WARNING_INTERVAL_SECS) {
+			i_warning("net_connect_unix(%s) succeeded only after retrying - "
+				  "took %lld us", conn->path,
+				  timeval_diff_usecs(&ioloop_timeval, &start_time));
+			auth->last_connect_warning = ioloop_time;
+		}
+	}
 	if (conn->fd == -1) {
 		i_error("net_connect_unix(%s) failed: %m%s",
 			conn->path, errno != EAGAIN ? "" :