Mercurial > dovecot > core-2.2
changeset 14432:366b9e5fc85c
director: Don't communicate with directors that recently sent invalid input.
Track network and protocol failures separately. If a director had sent
invalid protocol data within last 60 seconds, don't try to connect to it and
don't allow it to connect to us.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Mon, 09 Apr 2012 08:17:52 +0300 |
parents | 084064444f89 |
children | 19e09ab09383 |
files | src/director/director-connection.c src/director/director-host.h src/director/director.c src/director/director.h src/director/doveadm-connection.c |
diffstat | 5 files changed, 61 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/src/director/director-connection.c Mon Apr 09 07:52:25 2012 +0300 +++ b/src/director/director-connection.c Mon Apr 09 08:17:52 2012 +0300 @@ -65,7 +65,7 @@ #define DIRECTOR_CONNECTION_PING_SYNC_INTERVAL_MSECS 1000 /* If outgoing director connection exists for less than this many seconds, mark the host as failed so we won't try to reconnect to it immediately */ -#define DIRECTOR_SUCCESS_MIN_CONNECT_SECS 10 +#define DIRECTOR_SUCCESS_MIN_CONNECT_SECS 40 #if DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS <= DIRECTOR_CONNECTION_PING_TIMEOUT_MSECS # error DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS is too low @@ -113,6 +113,7 @@ }; static void director_connection_disconnected(struct director_connection **conn); +static void director_connection_protocol_error(struct director_connection **conn); static void ATTR_FORMAT(2, 3) director_cmd_error(struct director_connection *conn, const char *fmt, ...) @@ -318,6 +319,7 @@ const char *connect_str; struct ip_addr ip; unsigned int port; + time_t next_comm_attempt; if (!director_args_parse_ip_port(conn, args, &ip, &port)) return FALSE; @@ -362,10 +364,19 @@ /* make sure we don't keep old sequence values across restarts */ conn->host->last_seq = 0; - if (dir->left == NULL) { + next_comm_attempt = conn->host->last_protocol_failure + + DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS; + if (next_comm_attempt > ioloop_time) { + /* the director recently sent invalid protocol data, + don't try retrying yet */ + i_error("director(%s): Remote sent invalid protocol data recently, " + "waiting %u secs before allowing further communication", + conn->name, (unsigned int)(next_comm_attempt-ioloop_time)); + return FALSE; + } else if (dir->left == NULL) { /* a) - just in case the left is also our right side reset its failed state, so we can connect to it */ - conn->host->last_failed = 0; + conn->host->last_network_failure = 0; if (!director_has_outgoing_connections(dir)) director_connect(dir); } else if (dir->left->host == conn->host) { @@ -548,9 +559,9 @@ host = director_host_lookup(conn->dir, &ip, port); if (host != NULL) { - /* already have this. just reset its last_failed timestamp, - since it might be up now. */ - host->last_failed = 0; + /* already have this. just reset its last_network_failure + timestamp, since it might be up now. */ + host->last_network_failure = 0; return TRUE; } @@ -853,7 +864,7 @@ /* the host is up now, make sure we can connect to it immediately if needed */ - conn->host->last_failed = 0; + conn->host->last_network_failure = 0; conn->handshake_received = TRUE; if (conn->in) { @@ -1024,7 +1035,7 @@ host = director_host_get(conn->dir, &ip, port); /* reset failure timestamp so we'll actually try to connect there. */ - host->last_failed = 0; + host->last_network_failure = 0; /* remote suggests us to connect elsewhere */ if (dir->right != NULL && @@ -1193,7 +1204,7 @@ /* buffer full */ i_error("BUG: Director %s sent us more than %d bytes", conn->name, MAX_INBUF_SIZE); - director_connection_disconnected(&conn); + director_connection_protocol_error(&conn); return; } @@ -1208,7 +1219,7 @@ i_debug("director(%s): Invalid input, disconnecting", conn->name); } - director_connection_disconnected(&conn); + director_connection_protocol_error(&conn); break; } } @@ -1394,13 +1405,6 @@ if (dir->debug && conn->host != NULL) i_debug("Disconnecting from %s", conn->host->name); - if (conn->host != NULL && !conn->wrong_host && - (!conn->handshake_received || - conn->created + DIRECTOR_SUCCESS_MIN_CONNECT_SECS > ioloop_time)) { - /* avoid reconnecting back here immediately */ - conn->host->last_failed = ioloop_time; - } - conns = array_get(&dir->connections, &count); for (i = 0; i < count; i++) { if (conns[i] == conn) { @@ -1449,6 +1453,24 @@ struct director_connection *conn = *_conn; struct director *dir = conn->dir; + if (conn->created + DIRECTOR_SUCCESS_MIN_CONNECT_SECS > ioloop_time) { + /* connection didn't exist for very long, assume it has a + network problem */ + conn->host->last_network_failure = ioloop_time; + } + + director_connection_deinit(_conn); + if (dir->right == NULL) + director_connect(dir); +} + +void director_connection_protocol_error(struct director_connection **_conn) +{ + struct director_connection *conn = *_conn; + struct director *dir = conn->dir; + + conn->host->last_protocol_failure = ioloop_time; + director_connection_deinit(_conn); if (dir->right == NULL) director_connect(dir);
--- a/src/director/director-host.h Mon Apr 09 07:52:25 2012 +0300 +++ b/src/director/director-host.h Mon Apr 09 08:17:52 2012 +0300 @@ -17,8 +17,9 @@ it can be ignored (or: it must be ignored to avoid potential command loops) */ unsigned int last_seq; - /* Last time host was detected to be down/broken */ - time_t last_failed; + /* Last time host was detected to be down */ + time_t last_network_failure; + time_t last_protocol_failure; /* we are this director */ unsigned int self:1; };
--- a/src/director/director.c Mon Apr 09 07:52:25 2012 +0300 +++ b/src/director/director.c Mon Apr 09 08:17:52 2012 +0300 @@ -110,13 +110,13 @@ port = dir->test_port != 0 ? dir->test_port : host->port; fd = net_connect_ip(&host->ip, port, &dir->self_ip); if (fd == -1) { - host->last_failed = ioloop_time; + host->last_network_failure = ioloop_time; i_error("connect(%s) failed: %m", host->name); return -1; } /* Reset timestamp so that director_connect() won't skip this host while we're still trying to connect to it */ - host->last_failed = 0; + host->last_network_failure = 0; director_connection_init_out(dir, fd, host); return 0; @@ -151,9 +151,15 @@ for (i = 1; i < count; i++) { unsigned int idx = (self_idx + i) % count; - if (hosts[idx]->last_failed + + if (hosts[idx]->last_network_failure + DIRECTOR_RECONNECT_RETRY_SECS > ioloop_time) { - /* failed recently, don't try retrying here */ + /* connection failed recently, don't try retrying here */ + continue; + } + if (hosts[idx]->last_protocol_failure + + DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS > ioloop_time) { + /* the director recently sent invalid protocol data, + don't try retrying yet */ continue; }
--- a/src/director/director.h Mon Apr 09 07:52:25 2012 +0300 +++ b/src/director/director.h Mon Apr 09 08:17:52 2012 +0300 @@ -11,6 +11,10 @@ /* weak users supported in protocol v1.1+ */ #define DIRECTOR_VERSION_WEAK_USERS 1 +/* Minimum time between even attempting to communicate with a director that + failed due to a protocol error. */ +#define DIRECTOR_PROTOCOL_FAILURE_RETRY_SECS 60 + struct director; struct mail_host; struct user;
--- a/src/director/doveadm-connection.c Mon Apr 09 07:52:25 2012 +0300 +++ b/src/director/doveadm-connection.c Mon Apr 09 08:17:52 2012 +0300 @@ -99,6 +99,7 @@ string_t *str = t_str_new(1024); const char *type; bool left, right; + time_t last_failed; array_foreach(&dir->dir_hosts, hostp) { const struct director_host *host = *hostp; @@ -116,9 +117,12 @@ type = "right"; else type = ""; + + last_failed = I_MAX(host->last_network_failure, + host->last_protocol_failure); str_printfa(str, "%s\t%u\t%s\t%lu\n", net_ip2addr(&host->ip), host->port, type, - (unsigned long)host->last_failed); + (unsigned long)last_failed); } str_append_c(str, '\n'); o_stream_send(conn->output, str_data(str), str_len(str));