Mercurial > dovecot > core-2.2
changeset 15132:27d3289e1f5c
director: Improved logging related to disconnections.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Wed, 12 Sep 2012 00:20:47 +0300 |
parents | 24e791bbcf69 |
children | 8cec3b4c43ca |
files | src/director/director-connection.c src/director/director-connection.h src/director/director.c src/director/director.h |
diffstat | 4 files changed, 109 insertions(+), 42 deletions(-) [+] |
line wrap: on
line diff
--- a/src/director/director-connection.c Wed Sep 12 00:04:34 2012 +0300 +++ b/src/director/director-connection.c Wed Sep 12 00:20:47 2012 +0300 @@ -71,6 +71,8 @@ mark the host as failed so we won't try to reconnect to it immediately */ #define DIRECTOR_SUCCESS_MIN_CONNECT_SECS 40 #define DIRECTOR_WAIT_DISCONNECT_SECS 10 +#define DIRECTOR_HANDSHAKE_WARN_SECS 29 +#define DIRECTOR_HANDSHAKE_BYTES_LOG_MIN_SECS (60*30) #if DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS <= DIRECTOR_CONNECTION_PING_TIMEOUT_MSECS # error DIRECTOR_CONNECTION_DONE_TIMEOUT_MSECS is too low @@ -92,6 +94,8 @@ /* for incoming connections the director host isn't known until ME-line is received */ struct director_host *host; + /* this is set only for wrong connections: */ + struct director_host *connect_request_to; int fd; struct io *io; @@ -118,7 +122,9 @@ }; static void director_connection_disconnected(struct director_connection **conn); -static void director_connection_reconnect(struct director_connection **conn); +static void director_connection_reconnect(struct director_connection **conn, + const char *reason); +static void director_connection_log_disconnect(struct director_connection *conn); static void ATTR_FORMAT(2, 3) director_cmd_error(struct director_connection *conn, const char *fmt, ...) @@ -169,7 +175,9 @@ static void director_connection_wait_timeout(struct director_connection *conn) { - director_connection_deinit(&conn); + director_connection_log_disconnect(conn); + director_connection_deinit(&conn, + "Timeout waiting for disconnect after CONNECT"); } static void director_connection_send_connect(struct director_connection *conn, @@ -186,6 +194,12 @@ (void)o_stream_flush(conn->output); o_stream_uncork(conn->output); + /* wait for a while for the remote to disconnect, so it will hopefully + see our CONNECT command. we'll also log the warning later to avoid + multiple log lines about it. */ + conn->connect_request_to = host; + director_host_ref(conn->connect_request_to); + conn->to_disconnect = timeout_add(DIRECTOR_WAIT_DISCONNECT_SECS*1000, director_connection_wait_timeout, conn); @@ -220,10 +234,10 @@ } else if (dir->left == NULL) { /* no conflicts yet */ } else if (dir->left->host == conn->host) { - i_info("Dropping existing connection %s " - "in favor of its new connection %s", - dir->left->host->name, conn->host->name); - director_connection_deinit(&dir->left); + i_warning("Replacing left director connection %s with %s", + dir->left->host->name, conn->host->name); + director_connection_deinit(&dir->left, t_strdup_printf( + "Replacing with %s", conn->host->name)); } else if (dir->left->verifying_left) { /* we're waiting to verify if our current left is still working. if we don't receive a PONG, the current left @@ -235,9 +249,6 @@ dir->self_host) < 0) { /* the old connection is the correct one. refer the client there (FIXME: do we ever get here?) */ - i_warning("Director connection %s tried to connect to " - "us, should use %s instead", - conn->name, dir->left->host->name); director_connection_send_connect(conn, dir->left->host); return TRUE; } else { @@ -267,7 +278,8 @@ /* either use this or disconnect it */ if (!director_connection_assign_left(conn)) { /* we don't want this */ - director_connection_deinit(&conn); + director_connection_deinit(&conn, + "Unwanted incoming connection"); director_assign_left(dir); break; } @@ -304,9 +316,10 @@ conn->wrong_host = TRUE; return FALSE; } - i_info("Replacing right director connection %s with %s", - dir->right->host->name, conn->host->name); - director_connection_deinit(&dir->right); + i_warning("Replacing right director connection %s with %s", + dir->right->host->name, conn->host->name); + director_connection_deinit(&dir->right, t_strdup_printf( + "Replacing with %s", conn->host->name)); } dir->right = conn; i_free(conn->name); @@ -410,7 +423,8 @@ } else if (dir->left->host == conn->host) { /* b) */ i_assert(dir->left != conn); - director_connection_deinit(&dir->left); + director_connection_deinit(&dir->left, + "Replacing with new incoming connection"); } else if (director_host_cmp_to_self(conn->host, dir->left->host, dir->self_host) < 0) { /* c) */ @@ -919,14 +933,19 @@ static bool director_handshake_cmd_done(struct director_connection *conn) { struct director *dir = conn->dir; - - if (dir->debug) { - unsigned int secs = time(NULL)-conn->created; + unsigned int handshake_secs = time(NULL) - conn->created; + string_t *str; - i_debug("director(%s): Handshake took %u secs, " - "bytes in=%"PRIuUOFF_T" out=%"PRIuUOFF_T, - conn->name, secs, conn->input->v_offset, - conn->output->offset); + if (handshake_secs >= DIRECTOR_HANDSHAKE_WARN_SECS || dir->debug) { + str = t_str_new(128); + str_printfa(str, "director(%s): Handshake took %u secs, " + "bytes in=%"PRIuUOFF_T" out=%"PRIuUOFF_T, + conn->name, handshake_secs, conn->input->v_offset, + conn->output->offset); + if (handshake_secs >= DIRECTOR_HANDSHAKE_WARN_SECS) + i_warning("%s", str_c(str)); + else + i_debug("%s", str_c(str)); } /* the host is up now, make sure we can connect to it immediately @@ -1143,12 +1162,8 @@ if (conn->in && conn != dir->left && conn->me_received && conn->to_disconnect == NULL && director_host_cmp_to_self(dir->left->host, conn->host, - dir->self_host) < 0) { - i_warning("Director connection %s tried to connect to " - "us, should use %s instead", - conn->name, dir->left->host->name); + dir->self_host) < 0) director_connection_send_connect(conn, dir->left->host); - } } } @@ -1220,6 +1235,11 @@ return director_connection_sync(conn, args); if (strcmp(cmd, "CONNECT") == 0) return director_cmd_connect(conn, args); + if (strcmp(cmd, "QUIT") == 0) { + i_warning("Director %s disconnected us with reason: %s", + conn->name, t_strarray_join(args, " ")); + return FALSE; + } director_cmd_error(conn, "Unknown command %s", cmd); return FALSE; @@ -1247,6 +1267,37 @@ return ret; } +static void +director_connection_log_disconnect(struct director_connection *conn) +{ + unsigned int secs = ioloop_time - conn->created; + string_t *str = t_str_new(128); + + i_assert(conn->connected); + + if (conn->connect_request_to != NULL) { + i_warning("Director %s tried to connect to us, " + "should use %s instead", + conn->name, conn->connect_request_to->name); + return; + } + + str_printfa(str, "Director %s disconnected: ", conn->name); + str_append(str, "Connection closed"); + if (errno != 0 && errno != EPIPE) + str_printfa(str, ": %m"); + + str_printfa(str, " (connected %u secs, " + "in=%"PRIuUOFF_T" out=%"PRIuUOFF_T, + secs, conn->input->v_offset, conn->output->offset); + + if (!conn->me_received) + str_append(str, ", handshake ME not received"); + else if (!conn->handshake_received) + str_append(str, ", handshake DONE not received"); + i_error("%s", str_c(str)); +} + static void director_connection_input(struct director_connection *conn) { struct director *dir = conn->dir; @@ -1258,16 +1309,14 @@ return; case -1: /* disconnected */ - i_error("Director %s disconnected%s", conn->name, - conn->handshake_received ? "" : - " before handshake finished"); + director_connection_log_disconnect(conn); director_connection_disconnected(&conn); return; case -2: /* buffer full */ director_cmd_error(conn, "Director sent us more than %d bytes", MAX_INBUF_SIZE); - director_connection_reconnect(&conn); + director_connection_reconnect(&conn, "Too long input line"); return; } @@ -1289,7 +1338,8 @@ } T_END; if (!ret) { - director_connection_reconnect(&conn); + director_connection_reconnect(&conn, t_strdup_printf( + "Invalid input: %s", line)); break; } } @@ -1474,7 +1524,8 @@ return conn; } -void director_connection_deinit(struct director_connection **_conn) +void director_connection_deinit(struct director_connection **_conn, + const char *remote_reason) { struct director_connection *const *conns, *conn = *_conn; struct director *dir = conn->dir; @@ -1482,8 +1533,15 @@ *_conn = NULL; - if (dir->debug && conn->host != NULL) - i_debug("Disconnecting from %s", conn->host->name); + if (dir->debug && conn->host != NULL) { + i_debug("Disconnecting from %s: %s", + conn->host->name, remote_reason); + } + if (*remote_reason != '\0' && + conn->minor_version >= DIRECTOR_VERSION_QUIT) { + o_stream_send_str(conn->output, t_strdup_printf( + "QUIT\t%s\n", remote_reason)); + } conns = array_get(&dir->connections, &count); for (i = 0; i < count; i++) { @@ -1504,6 +1562,8 @@ if (conn->host != NULL) director_host_unref(conn->host); + if (conn->connect_request_to != NULL) + director_host_unref(conn->connect_request_to); if (conn->user_iter != NULL) user_directory_iter_deinit(&conn->user_iter); if (conn->to_disconnect != NULL) @@ -1542,17 +1602,18 @@ conn->host->last_network_failure = ioloop_time; } - director_connection_deinit(_conn); + director_connection_deinit(_conn, ""); if (dir->right == NULL) director_connect(dir); } -void director_connection_reconnect(struct director_connection **_conn) +static void director_connection_reconnect(struct director_connection **_conn, + const char *reason) { struct director_connection *conn = *_conn; struct director *dir = conn->dir; - director_connection_deinit(_conn); + director_connection_deinit(_conn, reason); if (dir->right == NULL) director_connect(dir); }
--- a/src/director/director-connection.h Wed Sep 12 00:04:34 2012 +0300 +++ b/src/director/director-connection.h Wed Sep 12 00:20:47 2012 +0300 @@ -10,7 +10,8 @@ struct director_connection * director_connection_init_out(struct director *dir, int fd, struct director_host *host); -void director_connection_deinit(struct director_connection **conn); +void director_connection_deinit(struct director_connection **conn, + const char *remote_reason); void director_connection_send(struct director_connection *conn, const char *data);
--- a/src/director/director.c Wed Sep 12 00:04:34 2012 +0300 +++ b/src/director/director.c Wed Sep 12 00:20:47 2012 +0300 @@ -216,7 +216,10 @@ if (dir->left != NULL) { /* since we couldn't connect to it, it must have failed recently */ - director_connection_deinit(&dir->left); + i_warning("director: Assuming %s is dead, disconnecting", + director_connection_get_name(dir->left)); + director_connection_deinit(&dir->left, + "This connection is dead?"); } dir->ring_min_version = DIRECTOR_VERSION_MINOR; if (!dir->ring_handshaked) @@ -482,7 +485,7 @@ if (director_connection_get_host(conn) != removed_host) i++; else { - director_connection_deinit(&conn); + director_connection_deinit(&conn, "Removing from ring"); conns = array_get(&dir->connections, &count); } } @@ -845,7 +848,7 @@ while (array_count(&dir->connections) > 0) { connp = array_idx(&dir->connections, 0); conn = *connp; - director_connection_deinit(&conn); + director_connection_deinit(&conn, "Shutting down"); } user_directory_deinit(&dir->users);
--- a/src/director/director.h Wed Sep 12 00:04:34 2012 +0300 +++ b/src/director/director.h Wed Sep 12 00:20:47 2012 +0300 @@ -6,12 +6,14 @@ #define DIRECTOR_VERSION_NAME "director" #define DIRECTOR_VERSION_MAJOR 1 -#define DIRECTOR_VERSION_MINOR 2 +#define DIRECTOR_VERSION_MINOR 3 /* weak users supported in protocol v1.1+ */ #define DIRECTOR_VERSION_WEAK_USERS 1 /* director removes supported in v1.2+ */ #define DIRECTOR_VERSION_RING_REMOVE 2 +/* quit reason supported in v1.3+ */ +#define DIRECTOR_VERSION_QUIT 3 /* Minimum time between even attempting to communicate with a director that failed due to a protocol error. */