changeset 20583:c4cc24c77ad8

director: Throttle user move/kill failure errors after 100/sec. If it happened to a lot of users, they can take a lot of space in logs.
author Timo Sirainen <timo.sirainen@dovecot.fi>
date Tue, 02 Aug 2016 23:25:57 +0300
parents 3e02c55136a6
children 6ff9cfbbdf4c
files src/director/director.c src/director/director.h src/director/main.c
diffstat 3 files changed, 51 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/src/director/director.c	Tue Aug 02 23:14:23 2016 +0300
+++ b/src/director/director.c	Tue Aug 02 23:25:57 2016 +0300
@@ -5,6 +5,7 @@
 #include "array.h"
 #include "str.h"
 #include "strescape.h"
+#include "log-throttle.h"
 #include "ipc-client.h"
 #include "user-directory.h"
 #include "mail-host.h"
@@ -24,6 +25,14 @@
 
 bool director_debug;
 
+static struct log_throttle *user_move_throttle;
+static struct log_throttle *user_kill_fail_throttle;
+
+static const struct log_throttle_settings director_log_throttle_settings = {
+	.throttle_at_max_per_interval = 100,
+	.unthrottle_at_max_per_interval = 2,
+};
+
 static bool director_is_self_ip_set(struct director *dir)
 {
 	struct ip_addr ip;
@@ -744,6 +753,12 @@
 	}
 }
 
+static void director_user_kill_fail_throttled(unsigned int new_events_count,
+					      void *context ATTR_UNUSED)
+{
+	i_error("Failed to kill %u users' connections", new_events_count);
+}
+
 static void director_kill_user_callback(enum ipc_client_cmd_state state,
 					const char *data, void *context)
 {
@@ -760,8 +775,10 @@
 	case IPC_CLIENT_CMD_STATE_OK:
 		break;
 	case IPC_CLIENT_CMD_STATE_ERROR:
-		i_error("Failed to kill user %u connections: %s",
-			ctx->username_hash, data);
+		if (log_throttle_accept(user_kill_fail_throttle)) {
+			i_error("Failed to kill user %u connections: %s",
+				ctx->username_hash, data);
+		}
 		/* we can't really do anything but continue anyway */
 		break;
 	}
@@ -779,12 +796,21 @@
 	i_free(ctx);
 }
 
+static void director_user_move_throttled(unsigned int new_events_count,
+					 void *context ATTR_UNUSED)
+{
+	i_error("%u users' move timed out, their state may now be inconsistent",
+		new_events_count);
+}
+
 static void director_user_move_timeout(struct user *user)
 {
 	i_assert(user->kill_state != USER_KILL_STATE_DELAY);
 
-	i_error("Finishing user %u move timed out, "
-		"its state may now be inconsistent", user->username_hash);
+	if (log_throttle_accept(user_move_throttle)) {
+		i_error("Finishing user %u move timed out, "
+			"its state may now be inconsistent", user->username_hash);
+	}
 
 	user->kill_state = USER_KILL_STATE_NONE;
 	timeout_remove(&user->to_move);
@@ -1067,3 +1093,19 @@
 	} T_END;
 	va_end(args);
 }
+
+void directors_init(void)
+{
+	user_move_throttle =
+		log_throttle_init(&director_log_throttle_settings,
+				  director_user_move_throttled, NULL);
+	user_kill_fail_throttle =
+		log_throttle_init(&director_log_throttle_settings,
+				  director_user_kill_fail_throttled, NULL);
+}
+
+void directors_deinit(void)
+{
+	log_throttle_deinit(&user_move_throttle);
+	log_throttle_deinit(&user_kill_fail_throttle);
+}
--- a/src/director/director.h	Tue Aug 02 23:14:23 2016 +0300
+++ b/src/director/director.h	Tue Aug 02 23:25:57 2016 +0300
@@ -175,6 +175,9 @@
 
 int director_connect_host(struct director *dir, struct director_host *host);
 
+void directors_init(void);
+void directors_deinit(void);
+
 void dir_debug(const char *fmt, ...) ATTR_FORMAT(1, 2);
 
 #endif
--- a/src/director/main.c	Tue Aug 02 23:14:23 2016 +0300
+++ b/src/director/main.c	Tue Aug 02 23:25:57 2016 +0300
@@ -258,6 +258,7 @@
 			"(for standalone keep director_servers empty)");
 	}
 
+	directors_init();
 	director = director_init(set, &listen_ip, listen_port,
 				 director_state_changed);
 	director_host_add_from_string(director, set->director_servers);
@@ -278,6 +279,7 @@
 	if (notify_conn != NULL)
 		notify_connection_deinit(&notify_conn);
 	director_deinit(&director);
+	directors_deinit();
 	doveadm_connections_deinit();
 	login_connections_deinit();
 	auth_connections_deinit();