changeset 13958:7175320feafc

auth: Throttle SQL auth worker process creation if they can't connect to database.
author Timo Sirainen <tss@iki.fi>
date Thu, 19 Jan 2012 17:46:52 +0200
parents 1fa75cada826
children 3dae51fd4565
files src/auth/auth-settings.c src/auth/auth-worker-client.c src/auth/auth-worker-client.h src/auth/auth-worker-server.c src/auth/db-sql.c src/auth/db-sql.h src/auth/main.c src/auth/passdb-sql.c src/auth/userdb-sql.c
diffstat 9 files changed, 149 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/src/auth/auth-settings.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/auth-settings.c	Thu Jan 19 17:46:52 2012 +0200
@@ -282,6 +282,11 @@
 	if (set->debug)
 		set->verbose = TRUE;
 
+	if (set->worker_max_count == 0) {
+		*error_r = "auth_worker_max_count must be above zero";
+		return FALSE;
+	}
+
 	if (set->cache_size > 0 && set->cache_size < 1024) {
 		/* probably a configuration error.
 		   older versions used megabyte numbers */
--- a/src/auth/auth-worker-client.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/auth-worker-client.c	Thu Jan 19 17:46:52 2012 +0200
@@ -8,6 +8,7 @@
 #include "ostream.h"
 #include "hex-binary.h"
 #include "str.h"
+#include "process-title.h"
 #include "master-service.h"
 #include "auth-request.h"
 #include "auth-worker-client.h"
@@ -30,6 +31,7 @@
 
 	unsigned int version_received:1;
 	unsigned int dbhash_received:1;
+	unsigned int error_sent:1;
 };
 
 struct auth_worker_list_context {
@@ -40,10 +42,23 @@
 };
 
 struct auth_worker_client *auth_worker_client;
+static bool auth_worker_client_error = FALSE;
 
 static void auth_worker_input(struct auth_worker_client *client);
 static int auth_worker_output(struct auth_worker_client *client);
 
+void auth_worker_refresh_proctitle(const char *state)
+{
+	if (!global_auth_settings->verbose_proctitle || !worker)
+		return;
+
+	if (auth_worker_client_error)
+		state = "error";
+	else if (auth_worker_client == NULL)
+		state = "waiting for connection";
+	process_title_set(t_strdup_printf("worker: %s", state));
+}
+
 static void
 auth_worker_client_check_throttle(struct auth_worker_client *client)
 {
@@ -673,6 +688,8 @@
 	auth_worker_refresh_proctitle(CLIENT_STATE_HANDSHAKE);
 
 	auth_worker_client = client;
+	if (auth_worker_client_error)
+		auth_worker_client_send_error();
 	return client;
 }
 
@@ -694,8 +711,8 @@
 	client->fd = -1;
 	auth_worker_client_unref(&client);
 
-	auth_worker_refresh_proctitle(NULL);
 	auth_worker_client = NULL;
+	auth_worker_refresh_proctitle("");
 	master_service_client_connection_destroyed(master_service);
 }
 
@@ -712,3 +729,25 @@
 	o_stream_unref(&client->output);
 	i_free(client);
 }
+
+void auth_worker_client_send_error(void)
+{
+	auth_worker_client_error = TRUE;
+	if (auth_worker_client != NULL &&
+	    !auth_worker_client->error_sent) {
+		o_stream_send_str(auth_worker_client->output, "ERROR\n");
+		auth_worker_client->error_sent = TRUE;
+	}
+	auth_worker_refresh_proctitle("");
+}
+
+void auth_worker_client_send_success(void)
+{
+	auth_worker_client_error = FALSE;
+	if (auth_worker_client != NULL &&
+	    auth_worker_client->error_sent) {
+		o_stream_send_str(auth_worker_client->output, "SUCCESS\n");
+		auth_worker_client->error_sent = FALSE;
+	}
+	auth_worker_refresh_proctitle(CLIENT_STATE_IDLE);
+}
--- a/src/auth/auth-worker-client.h	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/auth-worker-client.h	Thu Jan 19 17:46:52 2012 +0200
@@ -11,6 +11,9 @@
 void auth_worker_client_destroy(struct auth_worker_client **client);
 void auth_worker_client_unref(struct auth_worker_client **client);
 
+void auth_worker_client_send_error(void);
+void auth_worker_client_send_success(void);
+
 const char *auth_worker_client_get_state(struct auth_worker_client *client);
 
 #endif
--- a/src/auth/auth-worker-server.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/auth-worker-server.c	Thu Jan 19 17:46:52 2012 +0200
@@ -41,14 +41,16 @@
 	struct auth_worker_request *request;
 	unsigned int id_counter;
 
+	unsigned int received_error:1;
 	unsigned int shutdown:1;
 };
 
 static ARRAY_DEFINE(connections, struct auth_worker_connection *) = ARRAY_INIT;
-static unsigned int idle_count;
+static unsigned int idle_count = 0, auth_workers_with_errors = 0;
 static ARRAY_DEFINE(worker_request_array, struct auth_worker_request *);
 static struct aqueue *worker_request_queue;
 static time_t auth_worker_last_warn;
+static unsigned int auth_workers_throttle_count;
 
 static const char *worker_socket_path;
 
@@ -150,7 +152,7 @@
 	struct auth_worker_connection *conn;
 	int fd;
 
-	if (array_count(&connections) >= global_auth_settings->worker_max_count)
+	if (array_count(&connections) >= auth_workers_throttle_count)
 		return NULL;
 
 	fd = net_connect_unix_with_retries(worker_socket_path, 5000);
@@ -189,6 +191,12 @@
 
 	*_conn = NULL;
 
+	if (conn->received_error) {
+		i_assert(auth_workers_with_errors > 0);
+		i_assert(auth_workers_with_errors <= array_count(&connections));
+		auth_workers_with_errors--;
+	}
+
 	array_foreach(&connections, conns) {
 		if (*conns == conn) {
 			idx = array_foreach_idx(&connections, conns);
@@ -260,6 +268,51 @@
 		io_remove(&conn->io);
 }
 
+static bool auth_worker_error(struct auth_worker_connection *conn)
+{
+	if (conn->received_error)
+		return TRUE;
+	conn->received_error = TRUE;
+	auth_workers_with_errors++;
+	i_assert(auth_workers_with_errors <= array_count(&connections));
+
+	if (auth_workers_with_errors == 1) {
+		/* this is the only failing auth worker connection.
+		   don't create new ones until this one sends SUCCESS. */
+		auth_workers_throttle_count = array_count(&connections);
+		return TRUE;
+	}
+
+	/* too many auth workers, reduce them */
+	i_assert(array_count(&connections) > 1);
+	if (auth_workers_throttle_count >= array_count(&connections))
+		auth_workers_throttle_count = array_count(&connections)-1;
+	else if (auth_workers_throttle_count > 1)
+		auth_workers_throttle_count--;
+	auth_worker_destroy(&conn, "Internal auth worker failure", FALSE);
+	return FALSE;
+}
+
+static void auth_worker_success(struct auth_worker_connection *conn)
+{
+	unsigned int max_count = global_auth_settings->worker_max_count;
+
+	if (!conn->received_error)
+		return;
+
+	i_assert(auth_workers_with_errors > 0);
+	i_assert(auth_workers_with_errors <= array_count(&connections));
+	auth_workers_with_errors--;
+
+	if (auth_workers_with_errors == 0) {
+		/* all workers are succeeding now, set the limit back to
+		   original. */
+		auth_workers_throttle_count = max_count;
+	} else if (auth_workers_throttle_count < max_count)
+		auth_workers_throttle_count++;
+	conn->received_error = FALSE;
+}
+
 static void worker_input(struct auth_worker_connection *conn)
 {
 	const char *line, *id_str;
@@ -286,6 +339,15 @@
 			conn->shutdown = TRUE;
 			continue;
 		}
+		if (strcmp(line, "ERROR") == 0) {
+			if (!auth_worker_error(conn))
+				return;
+			continue;
+		}
+		if (strcmp(line, "SUCCESS") == 0) {
+			auth_worker_success(conn);
+			continue;
+		}
 		id_str = line;
 		line = strchr(line, '\t');
 		if (line == NULL ||
@@ -358,6 +420,8 @@
 void auth_worker_server_init(void)
 {
 	worker_socket_path = "auth-worker";
+	auth_workers_throttle_count = global_auth_settings->worker_max_count;
+	i_assert(auth_workers_throttle_count > 0);
 
 	i_array_init(&worker_request_array, 128);
 	worker_request_queue = aqueue_init(&worker_request_array.arr);
--- a/src/auth/db-sql.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/db-sql.c	Thu Jan 19 17:46:52 2012 +0200
@@ -6,6 +6,7 @@
 
 #include "settings.h"
 #include "auth-request.h"
+#include "auth-worker-client.h"
 #include "db-sql.h"
 
 #include <stddef.h>
@@ -129,6 +130,25 @@
 	pool_unref(&conn->pool);
 }
 
+void db_sql_connect(struct sql_connection *conn)
+{
+	if (sql_connect(conn->db) < 0 && worker) {
+		/* auth worker's sql connection failed. we can't do anything
+		   useful until the connection works. there's no point in
+		   having tons of worker processes all logging failures,
+		   so tell the auth master to stop creating new workers (and
+		   maybe close old ones). this handling is especially useful if
+		   we reach the max. number of connections for sql server. */
+		auth_worker_client_send_error();
+	}
+}
+
+void db_sql_success(struct sql_connection *conn ATTR_UNUSED)
+{
+	if (worker)
+		auth_worker_client_send_success();
+}
+
 void db_sql_check_userdb_warning(struct sql_connection *conn)
 {
 	if (worker || conn->userdb_used || conn->set.userdb_warning_disable)
--- a/src/auth/db-sql.h	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/db-sql.h	Thu Jan 19 17:46:52 2012 +0200
@@ -34,6 +34,9 @@
 struct sql_connection *db_sql_init(const char *config_path, bool userdb);
 void db_sql_unref(struct sql_connection **conn);
 
+void db_sql_connect(struct sql_connection *conn);
+void db_sql_success(struct sql_connection *conn);
+
 void db_sql_check_userdb_warning(struct sql_connection *conn);
 
 #endif
--- a/src/auth/main.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/main.c	Thu Jan 19 17:46:52 2012 +0200
@@ -71,16 +71,6 @@
 		auth_request_state_count[AUTH_REQUEST_STATE_USERDB]));
 }
 
-void auth_worker_refresh_proctitle(const char *state)
-{
-	if (!global_auth_settings->verbose_proctitle || !worker)
-		return;
-
-	if (state == NULL)
-		state = "waiting for connection";
-	process_title_set(t_strdup_printf("worker: %s", state));
-}
-
 static const char *const *read_global_settings(void)
 {
 	struct master_service_settings_output set_output;
@@ -235,6 +225,10 @@
 	lib_signals_ignore(SIGHUP, TRUE);
 	lib_signals_ignore(SIGUSR2, TRUE);
 
+	/* set proctitles before init()s, since they may set them to error */
+	auth_refresh_proctitle();
+	auth_worker_refresh_proctitle(NULL);
+
 	child_wait_init();
 	auth_worker_server_init();
 	auths_init();
@@ -248,8 +242,6 @@
 		/* caching is handled only by the main auth process */
 		passdb_cache_init(global_auth_settings);
 	}
-	auth_refresh_proctitle();
-	auth_worker_refresh_proctitle(NULL);
 }
 
 static void main_deinit(void)
--- a/src/auth/passdb-sql.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/passdb-sql.c	Thu Jan 19 17:46:52 2012 +0200
@@ -66,6 +66,8 @@
 	password = NULL;
 
 	ret = sql_result_next_row(result);
+	if (ret >= 0)
+		db_sql_success(module->conn);
 	if (ret < 0) {
 		if (!module->conn->default_password_query) {
 			auth_request_log_error(auth_request, "sql",
@@ -269,7 +271,7 @@
 	module->module.blocking = (flags & SQL_DB_FLAG_BLOCKING) != 0;
 
 	if (!module->module.blocking || worker)
-		sql_connect(module->conn->db);
+		db_sql_connect(module->conn);
 	db_sql_check_userdb_warning(module->conn);
 }
 
--- a/src/auth/userdb-sql.c	Thu Jan 19 16:33:13 2012 +0200
+++ b/src/auth/userdb-sql.c	Thu Jan 19 17:46:52 2012 +0200
@@ -67,6 +67,8 @@
 	int ret;
 
 	ret = sql_result_next_row(sql_result);
+	if (ret >= 0)
+		db_sql_success(module->conn);
 	if (ret < 0) {
 		if (!module->conn->default_user_query) {
 			auth_request_log_error(auth_request, "sql",
@@ -212,6 +214,8 @@
 	}
 
 	ret = sql_result_next_row(ctx->result);
+	if (ret >= 0)
+		db_sql_success(module->conn);
 	if (ret > 0) {
 		if (userdb_sql_iterate_get_user(ctx, &user) < 0)
 			i_error("sql: Iterate query didn't return 'user' field");
@@ -278,7 +282,7 @@
 	_module->blocking = (flags & SQL_DB_FLAG_BLOCKING) != 0;
 
 	if (!_module->blocking || worker)
-		sql_connect(module->conn->db);
+		db_sql_connect(module->conn);
 }
 
 static void userdb_sql_deinit(struct userdb_module *_module)