comparison src/director/director-connection.c @ 22252:8882a5cbe76d

director: Use more accurate timestamps for handshake timeout logging There could be some time between the create, connect and when ME was received.
author Timo Sirainen <timo.sirainen@dovecot.fi>
date Wed, 31 May 2017 13:17:37 +0300
parents dce18ed41532
children 9676f01a11df
comparison
equal deleted inserted replaced
22251:dce18ed41532 22252:8882a5cbe76d
96 #define DIRECTOR_OPT_CONSISTENT_HASHING "consistent-hashing" 96 #define DIRECTOR_OPT_CONSISTENT_HASHING "consistent-hashing"
97 97
98 struct director_connection { 98 struct director_connection {
99 struct director *dir; 99 struct director *dir;
100 char *name; 100 char *name;
101 time_t created; 101 struct timeval created, connected_time, me_received_time;
102 unsigned int minor_version; 102 unsigned int minor_version;
103 103
104 struct timeval last_input, last_output; 104 struct timeval last_input, last_output;
105 105
106 /* for incoming connections the director host isn't known until 106 /* for incoming connections the director host isn't known until
159 } 159 }
160 160
161 static void 161 static void
162 director_connection_init_timeout(struct director_connection *conn) 162 director_connection_init_timeout(struct director_connection *conn)
163 { 163 {
164 unsigned int secs = ioloop_time - conn->created; 164 unsigned int secs;
165 165
166 if (!conn->connected) { 166 if (!conn->connected) {
167 secs = ioloop_time - conn->created.tv_sec;
167 i_error("director(%s): Connect timed out (%u secs)", 168 i_error("director(%s): Connect timed out (%u secs)",
168 conn->name, secs); 169 conn->name, secs);
169 } else if (!conn->me_received) { 170 } else if (!conn->me_received) {
171 secs = ioloop_time - conn->connected_time.tv_sec;
170 i_error("director(%s): Handshaking ME timed out (%u secs)", 172 i_error("director(%s): Handshaking ME timed out (%u secs)",
171 conn->name, secs); 173 conn->name, secs);
172 } else if (!conn->in) { 174 } else if (!conn->in) {
175 secs = ioloop_time - conn->me_received_time.tv_sec;
173 i_error("director(%s): Sending handshake timed out (%u secs)", 176 i_error("director(%s): Sending handshake timed out (%u secs)",
174 conn->name, secs); 177 conn->name, secs);
175 } else { 178 } else {
179 secs = ioloop_time - conn->me_received_time.tv_sec;
176 i_error("director(%s): Handshaking DONE timed out (%u secs)", 180 i_error("director(%s): Handshaking DONE timed out (%u secs)",
177 conn->name, secs); 181 conn->name, secs);
178 } 182 }
179 director_connection_disconnected(&conn, "Handshake timeout"); 183 director_connection_disconnected(&conn, "Handshake timeout");
180 } 184 }
410 net_ip2addr(&conn->host->ip), conn->host->port, 414 net_ip2addr(&conn->host->ip), conn->host->port,
411 net_ip2addr(&ip), port); 415 net_ip2addr(&ip), port);
412 return FALSE; 416 return FALSE;
413 } 417 }
414 conn->me_received = TRUE; 418 conn->me_received = TRUE;
419 conn->me_received_time = ioloop_timeval;
415 420
416 if (args[2] != NULL) { 421 if (args[2] != NULL) {
417 time_t remote_time; 422 time_t remote_time;
418 int diff; 423 int diff;
419 424
1237 } 1242 }
1238 1243
1239 static bool director_handshake_cmd_done(struct director_connection *conn) 1244 static bool director_handshake_cmd_done(struct director_connection *conn)
1240 { 1245 {
1241 struct director *dir = conn->dir; 1246 struct director *dir = conn->dir;
1242 unsigned int handshake_secs = time(NULL) - conn->created; 1247 int handshake_msecs = timeval_diff_msecs(&ioloop_timeval, &conn->connected_time);
1243 string_t *str; 1248 string_t *str;
1244 1249
1245 if (conn->users_unsorted && conn->user_iter == NULL) { 1250 if (conn->users_unsorted && conn->user_iter == NULL) {
1246 /* we sent our user list before receiving remote's */ 1251 /* we sent our user list before receiving remote's */
1247 conn->users_unsorted = FALSE; 1252 conn->users_unsorted = FALSE;
1248 mail_hosts_sort_users(conn->dir->mail_hosts); 1253 mail_hosts_sort_users(conn->dir->mail_hosts);
1249 } 1254 }
1250 1255
1251 str = t_str_new(128); 1256 str = t_str_new(128);
1252 str_printfa(str, "director(%s): Handshake finished in %u secs " 1257 str_printfa(str, "director(%s): Handshake finished in %u.%03u secs "
1253 "(bytes in=%"PRIuUOFF_T" out=%"PRIuUOFF_T")", 1258 "(bytes in=%"PRIuUOFF_T" out=%"PRIuUOFF_T")",
1254 conn->name, handshake_secs, conn->input->v_offset, 1259 conn->name, handshake_msecs/1000, handshake_msecs%1000,
1255 conn->output->offset); 1260 conn->input->v_offset, conn->output->offset);
1256 if (handshake_secs >= DIRECTOR_HANDSHAKE_WARN_SECS) 1261 if (handshake_msecs >= DIRECTOR_HANDSHAKE_WARN_SECS*1000)
1257 i_warning("%s", str_c(str)); 1262 i_warning("%s", str_c(str));
1258 else 1263 else
1259 i_info("%s", str_c(str)); 1264 i_info("%s", str_c(str));
1260 1265
1261 /* the host is up now, make sure we can connect to it immediately 1266 /* the host is up now, make sure we can connect to it immediately
1720 1725
1721 static void 1726 static void
1722 director_connection_log_disconnect(struct director_connection *conn, int err, 1727 director_connection_log_disconnect(struct director_connection *conn, int err,
1723 const char *errstr) 1728 const char *errstr)
1724 { 1729 {
1725 unsigned int secs = ioloop_time - conn->created; 1730 unsigned int secs = ioloop_time - conn->connected_time.tv_sec;
1726 string_t *str = t_str_new(128); 1731 string_t *str = t_str_new(128);
1727 1732
1728 i_assert(conn->connected); 1733 i_assert(conn->connected);
1729 1734
1730 if (conn->connect_request_to != NULL) { 1735 if (conn->connect_request_to != NULL) {
1948 director_connection_init_common(struct director *dir, int fd) 1953 director_connection_init_common(struct director *dir, int fd)
1949 { 1954 {
1950 struct director_connection *conn; 1955 struct director_connection *conn;
1951 1956
1952 conn = i_new(struct director_connection, 1); 1957 conn = i_new(struct director_connection, 1);
1953 conn->created = ioloop_time; 1958 conn->created = ioloop_timeval;
1954 conn->fd = fd; 1959 conn->fd = fd;
1955 conn->dir = dir; 1960 conn->dir = dir;
1956 conn->input = i_stream_create_fd(conn->fd, MAX_INBUF_SIZE, FALSE); 1961 conn->input = i_stream_create_fd(conn->fd, MAX_INBUF_SIZE, FALSE);
1957 conn->output = o_stream_create_fd(conn->fd, MAX_OUTBUF_SIZE, FALSE); 1962 conn->output = o_stream_create_fd(conn->fd, MAX_OUTBUF_SIZE, FALSE);
1958 o_stream_set_no_error_handling(conn->output, TRUE); 1963 o_stream_set_no_error_handling(conn->output, TRUE);
1977 struct director_connection *conn; 1982 struct director_connection *conn;
1978 1983
1979 conn = director_connection_init_common(dir, fd); 1984 conn = director_connection_init_common(dir, fd);
1980 conn->in = TRUE; 1985 conn->in = TRUE;
1981 conn->connected = TRUE; 1986 conn->connected = TRUE;
1987 conn->connected_time = ioloop_timeval;
1982 conn->name = i_strdup_printf("%s/in", net_ip2addr(ip)); 1988 conn->name = i_strdup_printf("%s/in", net_ip2addr(ip));
1983 conn->io = io_add(conn->fd, IO_READ, director_connection_input, conn); 1989 conn->io = io_add(conn->fd, IO_READ, director_connection_input, conn);
1984 conn->to_ping = timeout_add(DIRECTOR_CONNECTION_ME_TIMEOUT_MSECS, 1990 conn->to_ping = timeout_add(DIRECTOR_CONNECTION_ME_TIMEOUT_MSECS,
1985 director_connection_init_timeout, conn); 1991 director_connection_init_timeout, conn);
1986 1992
1996 i_error("director(%s): connect() failed: %s", conn->name, 2002 i_error("director(%s): connect() failed: %s", conn->name,
1997 strerror(err)); 2003 strerror(err));
1998 director_connection_disconnected(&conn, strerror(err)); 2004 director_connection_disconnected(&conn, strerror(err));
1999 return; 2005 return;
2000 } 2006 }
2007 conn->connected_time = ioloop_timeval;
2001 conn->connected = TRUE; 2008 conn->connected = TRUE;
2002 o_stream_set_flush_callback(conn->output, 2009 o_stream_set_flush_callback(conn->output,
2003 director_connection_output, conn); 2010 director_connection_output, conn);
2004 2011
2005 io_remove(&conn->io); 2012 io_remove(&conn->io);
2128 const char *reason) 2135 const char *reason)
2129 { 2136 {
2130 struct director_connection *conn = *_conn; 2137 struct director_connection *conn = *_conn;
2131 struct director *dir = conn->dir; 2138 struct director *dir = conn->dir;
2132 2139
2133 if (conn->created + DIRECTOR_SUCCESS_MIN_CONNECT_SECS > ioloop_time && 2140 if (conn->connected_time.tv_sec + DIRECTOR_SUCCESS_MIN_CONNECT_SECS > ioloop_time &&
2134 conn->host != NULL) { 2141 conn->host != NULL) {
2135 /* connection didn't exist for very long, assume it has a 2142 /* connection didn't exist for very long, assume it has a
2136 network problem */ 2143 network problem */
2137 conn->host->last_network_failure = ioloop_time; 2144 conn->host->last_network_failure = ioloop_time;
2138 } 2145 }