changeset 18729:0ee3e734249a

director: Added "up" vs "down" states and doveadm director up/down commands. These commands are intended to be used by automated watchdogs that detect if backends are up or down. This way the vhost count doesn't get forgotten after server goes down. It also means that admin can manually take down a server by setting its vhost count to 0 without the watchdog automatically bringing it back up.
author Timo Sirainen <tss@iki.fi>
date Mon, 18 May 2015 07:49:15 -0400
parents 0d815b4db957
children 45013c8cf69c
files src/director/director-connection.c src/director/director.c src/director/director.h src/director/doveadm-connection.c src/director/mail-host.c src/director/mail-host.h src/doveadm/doveadm-director.c
diffstat 7 files changed, 142 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/src/director/director-connection.c	Mon May 18 13:56:07 2015 +0300
+++ b/src/director/director-connection.c	Mon May 18 07:49:15 2015 -0400
@@ -846,7 +846,8 @@
 	struct ip_addr ip;
 	const char *tag = "";
 	unsigned int vhost_count;
-	bool update;
+	bool update, down = FALSE;
+	time_t last_updown_change = 0;
 
 	if (str_array_length(args) < 2 ||
 	    net_addr2ip(args[0], &ip) < 0 ||
@@ -854,8 +855,17 @@
 		director_cmd_error(conn, "Invalid parameters");
 		return FALSE;
 	}
-	if (args[2] != NULL)
+	if (args[2] != NULL) {
 		tag = args[2];
+		if (args[3] != NULL) {
+			if ((args[3][0] != 'D' && args[3][0] != 'U') ||
+			    str_to_time(args[3]+1, &last_updown_change) < 0) {
+				director_cmd_error(conn, "Invalid updown parameters");
+				return FALSE;
+			}
+			down = args[3][0] == 'D';
+		}
+	}
 	if (conn->ignore_host_events) {
 		/* remote is sending hosts in a handshake, but it doesn't have
 		   a completed ring and we do. */
@@ -868,7 +878,10 @@
 		host = mail_host_add_ip(conn->dir->mail_hosts, &ip, tag);
 		update = TRUE;
 	} else {
-		update = host->vhost_count != vhost_count;
+		update = host->vhost_count != vhost_count ||
+			host->down != down ||
+			host->last_updown_change != last_updown_change;
+;
 		if (strcmp(tag, host->tag) != 0) {
 			i_error("director(%s): Host %s changed tag from '%s' to '%s'",
 				conn->name, net_ip2addr(&host->ip),
@@ -879,6 +892,8 @@
 	}
 
 	if (update) {
+		mail_host_set_down(conn->dir->mail_hosts, host,
+				   down, last_updown_change);
 		mail_host_set_vhost_count(conn->dir->mail_hosts,
 					  host, vhost_count);
 		director_update_host(conn->dir, conn->host, dir_host, host);
@@ -1581,14 +1596,23 @@
 director_connection_send_hosts(struct director_connection *conn, string_t *str)
 {
 	struct mail_host *const *hostp;
+	bool send_updowns;
+
+	send_updowns = conn->minor_version >= DIRECTOR_VERSION_UPDOWN;
 
 	str_printfa(str, "HOST-HAND-START\t%u\n", conn->dir->ring_handshaked);
 	array_foreach(mail_hosts_get(conn->dir->mail_hosts), hostp) {
+		struct mail_host *host = *hostp;
+
 		str_printfa(str, "HOST\t%s\t%u",
-			    net_ip2addr(&(*hostp)->ip), (*hostp)->vhost_count);
-		if ((*hostp)->tag[0] != '\0') {
+			    net_ip2addr(&host->ip), host->vhost_count);
+		if (host->tag[0] != '\0' || send_updowns) {
 			str_append_c(str, '\t');
-			str_append_tabescaped(str, (*hostp)->tag);
+			str_append_tabescaped(str, host->tag);
+		}
+		if (send_updowns) {
+			str_printfa(str, "\t%c%ld", host->down ? 'D' : 'U',
+				    (long)host->last_updown_change);
 		}
 		str_append_c(str, '\n');
 	}
--- a/src/director/director.c	Mon May 18 13:56:07 2015 +0300
+++ b/src/director/director.c	Mon May 18 07:49:15 2015 -0400
@@ -533,17 +533,19 @@
 		    net_ip2addr(&orig_src->ip), orig_src->port,
 		    orig_src->last_seq,
 		    net_ip2addr(&host->ip), host->vhost_count);
-	if (host->tag[0] == '\0')
-		;
-	else if (dir->ring_handshaked &&
-		 dir->ring_min_version < DIRECTOR_VERSION_TAGS) {
+	if (dir->ring_min_version >= DIRECTOR_VERSION_TAGS) {
+		str_append_c(str, '\t');
+		str_append_tabescaped(str, host->tag);
+	} else if (host->tag[0] != '\0' &&
+		   dir->ring_min_version < DIRECTOR_VERSION_TAGS) {
 		i_error("Ring has directors that don't support tags - removing host %s with tag '%s'",
 			net_ip2addr(&host->ip), host->tag);
 		director_remove_host(dir, NULL, NULL, host);
 		return;
-	} else {
-		str_append_c(str, '\t');
-		str_append_tabescaped(str, host->tag);
+	}
+	if (dir->ring_min_version >= DIRECTOR_VERSION_UPDOWN) {
+		str_printfa(str, "\t%c%ld", host->down ? 'D' : 'U',
+			    (long)host->last_updown_change);
 	}
 	str_append_c(str, '\n');
 	director_update_send(dir, src, str_c(str));
--- a/src/director/director.h	Mon May 18 13:56:07 2015 +0300
+++ b/src/director/director.h	Mon May 18 07:49:15 2015 -0400
@@ -6,7 +6,7 @@
 
 #define DIRECTOR_VERSION_NAME "director"
 #define DIRECTOR_VERSION_MAJOR 1
-#define DIRECTOR_VERSION_MINOR 5
+#define DIRECTOR_VERSION_MINOR 6
 
 /* weak users supported in protocol */
 #define DIRECTOR_VERSION_WEAK_USERS 1
@@ -20,6 +20,8 @@
 #define DIRECTOR_VERSION_OPTIONS 5
 /* user tags supported */
 #define DIRECTOR_VERSION_TAGS 5
+/* up/down state is tracked */
+#define DIRECTOR_VERSION_UPDOWN 6
 
 /* Minimum time between even attempting to communicate with a director that
    failed due to a protocol error. */
--- a/src/director/doveadm-connection.c	Mon May 18 13:56:07 2015 +0300
+++ b/src/director/doveadm-connection.c	Mon May 18 07:49:15 2015 -0400
@@ -51,6 +51,8 @@
 			    net_ip2addr(&(*hostp)->ip), (*hostp)->vhost_count,
 			    (*hostp)->user_count);
 		str_append_tabescaped(str, (*hostp)->tag);
+		str_printfa(str, "\t%c\t%ld", (*hostp)->down ? 'D' : 'U',
+			    (long)(*hostp)->last_updown_change);
 		str_append_c(str, '\n');
 	}
 	str_append_c(str, '\n');
@@ -285,6 +287,33 @@
 }
 
 static bool
+doveadm_cmd_host_updown(struct doveadm_connection *conn, bool down,
+			const char *line)
+{
+	struct mail_host *host;
+	struct ip_addr ip;
+
+	if (net_addr2ip(line, &ip) < 0) {
+		i_error("doveadm sent invalid %s parameters: %s",
+			down ? "HOST-DOWN" : "HOST-UP", line);
+		return FALSE;
+	}
+	host = mail_host_lookup(conn->dir->mail_hosts, &ip);
+	if (host == NULL) {
+		o_stream_nsend_str(conn->output, "NOTFOUND\n");
+		return TRUE;
+	}
+	if (host->down != down) {
+		mail_host_set_down(conn->dir->mail_hosts, host,
+				   down, ioloop_time);
+		director_update_host(conn->dir, conn->dir->self_host,
+				     NULL, host);
+	}
+	o_stream_nsend(conn->output, "OK\n", 3);
+	return TRUE;
+}
+
+static bool
 doveadm_cmd_host_remove(struct doveadm_connection *conn, const char *line)
 {
 	struct mail_host *host;
@@ -587,6 +616,10 @@
 			ret = doveadm_cmd_director_remove(conn, args);
 		else if (strcmp(cmd, "HOST-SET") == 0)
 			ret = doveadm_cmd_host_set(conn, args);
+		else if (strcmp(cmd, "HOST-UP") == 0)
+			ret = doveadm_cmd_host_updown(conn, FALSE, args);
+		else if (strcmp(cmd, "HOST-DOWN") == 0)
+			ret = doveadm_cmd_host_updown(conn, TRUE, args);
 		else if (strcmp(cmd, "HOST-REMOVE") == 0)
 			ret = doveadm_cmd_host_remove(conn, args);
 		else if (strcmp(cmd, "HOST-FLUSH") == 0)
--- a/src/director/mail-host.c	Mon May 18 13:56:07 2015 +0300
+++ b/src/director/mail-host.c	Mon May 18 07:49:15 2015 -0400
@@ -58,6 +58,9 @@
 	char num_str[MAX_INT_STRLEN];
 	unsigned int i, j;
 
+	if (host->down)
+		return;
+
 	ip_str = net_ip2addr(&host->ip);
 
 	md5_init(&md5_ctx);
@@ -99,6 +102,8 @@
 	/* rebuild vhosts */
 	array_clear(&list->vhosts);
 	array_foreach(&list->hosts, hostp) {
+		if ((*hostp)->down)
+			continue;
 		for (i = 0; i < (*hostp)->vhost_count; i++) {
 			vhost = array_append_space(&list->vhosts);
 			vhost->host = *hostp;
@@ -269,6 +274,16 @@
 	host->tag = i_strdup(tag);
 }
 
+void mail_host_set_down(struct mail_host_list *list,
+			struct mail_host *host, bool down, time_t timestamp)
+{
+	if (host->down != down) {
+		host->down = down;
+		host->last_updown_change = timestamp;
+		list->hosts_unsorted = TRUE;
+	}
+}
+
 void mail_host_set_vhost_count(struct mail_host_list *list,
 			       struct mail_host *host, unsigned int vhost_count)
 {
--- a/src/director/mail-host.h	Mon May 18 13:56:07 2015 +0300
+++ b/src/director/mail-host.h	Mon May 18 07:49:15 2015 -0400
@@ -8,6 +8,10 @@
 struct mail_host {
 	unsigned int user_count;
 	unsigned int vhost_count;
+	/* server up/down. down=TRUE has effectively the same result as if
+	   vhost_count=0. */
+	bool down;
+	time_t last_updown_change;
 
 	struct ip_addr ip;
 	char *tag;
@@ -26,6 +30,8 @@
 int mail_hosts_parse_and_add(struct mail_host_list *list,
 			     const char *hosts_string);
 void mail_host_set_tag(struct mail_host *host, const char *tag);
+void mail_host_set_down(struct mail_host_list *list,
+			struct mail_host *host, bool down, time_t timestamp);
 void mail_host_set_vhost_count(struct mail_host_list *list,
 			       struct mail_host *host,
 			       unsigned int vhost_count);
--- a/src/doveadm/doveadm-director.c	Mon May 18 13:56:07 2015 +0300
+++ b/src/doveadm/doveadm-director.c	Mon May 18 07:49:15 2015 -0400
@@ -176,21 +176,32 @@
 	doveadm_print_init(DOVEADM_PRINT_TYPE_TABLE);
 	doveadm_print_header_simple("mail server ip");
 	doveadm_print_header_simple("tag");
-	doveadm_print_header("vhosts", "vhosts",
-			     DOVEADM_PRINT_HEADER_FLAG_RIGHT_JUSTIFY);
-	doveadm_print_header("users", "users",
-			     DOVEADM_PRINT_HEADER_FLAG_RIGHT_JUSTIFY);
+	doveadm_print_header_simple("vhosts");
+	doveadm_print_header_simple("state");
+	doveadm_print_header("state-changed", "state changed", 0);
+	doveadm_print_header_simple("users");
 
 	director_send(ctx, "HOST-LIST\n");
 	while ((line = i_stream_read_next_line(ctx->input)) != NULL) {
 		if (*line == '\0')
 			break;
 		T_BEGIN {
+			unsigned int arg_count;
+			time_t ts;
+
 			args = t_strsplit_tab(line);
-			if (str_array_length(args) >= 4) {
-				doveadm_print(args[0]);
+			arg_count = str_array_length(args);
+			if (arg_count >= 6) {
+				/* ip vhosts users tag updown updown-ts */
+				doveadm_print(args[0]); 
 				doveadm_print(args[3]);
 				doveadm_print(args[1]);
+				doveadm_print(args[4][0] == 'D' ? "down" : "up");
+				if (str_to_time(args[5], &ts) < 0 ||
+				    ts <= 0)
+					doveadm_print("-");
+				else
+					doveadm_print(unixdate2str(ts));
 				doveadm_print(args[2]);
 			}
 		} T_END;
@@ -447,22 +458,24 @@
 	director_disconnect(ctx);
 }
 
-static void cmd_director_remove(int argc, char *argv[])
+static void
+cmd_director_ipcmd(const char *cmd_name, doveadm_command_t *cmd,
+		   const char *success_result, int argc, char *argv[])
 {
 	struct director_context *ctx;
 	struct ip_addr *ips;
 	unsigned int i, ips_count;
 	const char *host, *line;
 
-	ctx = cmd_director_init(argc, argv, "a:", cmd_director_remove);
+	ctx = cmd_director_init(argc, argv, "a:", cmd);
 	host = argv[optind++];
 	if (host == NULL || argv[optind] != NULL)
-		director_cmd_help(cmd_director_remove);
+		director_cmd_help(cmd);
 
 	director_get_host(host, &ips, &ips_count);
 	for (i = 0; i < ips_count; i++) {
 		director_send(ctx, t_strdup_printf(
-			"HOST-REMOVE\t%s\n", net_ip2addr(&ips[i])));
+			"%s\t%s\n", cmd_name, net_ip2addr(&ips[i])));
 	}
 	for (i = 0; i < ips_count; i++) {
 		line = i_stream_read_next_line(ctx->input);
@@ -476,12 +489,30 @@
 				line == NULL ? "failed" : line);
 			doveadm_exit_code = EX_TEMPFAIL;
 		} else if (doveadm_verbose) {
-			printf("%s: removed\n", net_ip2addr(&ips[i]));
+			printf("%s: %s\n", net_ip2addr(&ips[i]), success_result);
 		}
 	}
 	director_disconnect(ctx);
 }
 
+static void cmd_director_remove(int argc, char *argv[])
+{
+	cmd_director_ipcmd("HOST-REMOVE", cmd_director_remove,
+			   "removed", argc, argv);
+}
+
+static void cmd_director_up(int argc, char *argv[])
+{
+	cmd_director_ipcmd("HOST-UP", cmd_director_up,
+			   "up", argc, argv);
+}
+
+static void cmd_director_down(int argc, char *argv[])
+{
+	cmd_director_ipcmd("HOST-DOWN", cmd_director_down,
+			   "down", argc, argv);
+}
+
 static void cmd_director_move(int argc, char *argv[])
 {
 	struct director_context *ctx;
@@ -777,6 +808,10 @@
 	  "[-a <director socket path>] [-f <users file>] [-h | -u] [<host>]" },
 	{ cmd_director_add, "director add",
 	  "[-a <director socket path>] [-t <tag>] <host> [<vhost count>]" },
+	{ cmd_director_up, "director up",
+	  "[-a <director socket path>] <host>" },
+	{ cmd_director_down, "director down",
+	  "[-a <director socket path>] <host>" },
 	{ cmd_director_remove, "director remove",
 	  "[-a <director socket path>] <host>" },
 	{ cmd_director_move, "director move",