# HG changeset patch # User Timo Sirainen # Date 1511651975 -7200 # Node ID 867c3905ac0b6a155f7ab4aa71a7b5a2833b24d7 # Parent d811474ef901b888a10b1d04746504221a606584 director: Avoid USER loops when ring latency is over 1 second Do this by adding a timestamp parameter to USER events. This way if it takes over 1 second for the USER event to traverse the ring, it won't get into an infinite loop getting the user updated over and over again. diff -r d811474ef901 -r 867c3905ac0b src/director/director-connection.c --- a/src/director/director-connection.c Sun Nov 26 01:14:01 2017 +0200 +++ b/src/director/director-connection.c Sun Nov 26 01:19:35 2017 +0200 @@ -682,12 +682,25 @@ user->host->user_count++; ret = TRUE; } - if (timestamp == ioloop_time && (time_t)user->timestamp != timestamp) { + /* Update user's timestamp if it's higher than the current one. Note + that we'll preserve the original timestamp. This is important when + the director ring is slow and a single USER can traverse through + the ring more than a second. We don't want to get into a loop where + the same USER goes through the ring forever. */ + if ((time_t)user->timestamp < timestamp) { + /* NOTE: This makes the users list somewhat out-of-order. + It's not a big problem - most likely it's only a few seconds + difference. The worst that can happen is that some users + take up memory that should have been freed already. */ + dir_debug("user refresh: %u refreshed timestamp from %u to %ld", + username_hash, user->timestamp, (long)timestamp); user_directory_refresh(users, user); + user->timestamp = timestamp; ret = TRUE; + } else { + dir_debug("user refresh: %u ignored timestamp %ld (we have %u)", + username_hash, (long)timestamp, user->timestamp); } - dir_debug("user refresh: %u refreshed timeout to %ld", - username_hash, (long)user->timestamp); if (unset_weak_user) { /* user is no longer weak. handle pending requests for @@ -765,10 +778,12 @@ struct mail_host *host; struct user *user; bool forced; + time_t timestamp = ioloop_time; - if (str_array_length(args) != 2 || + if (str_array_length(args) < 2 || str_to_uint(args[0], &username_hash) < 0 || - net_addr2ip(args[1], &ip) < 0) { + net_addr2ip(args[1], &ip) < 0 || + (args[2] != NULL && str_to_time(args[2], ×tamp) < 0)) { director_cmd_error(conn, "Invalid parameters"); return FALSE; } @@ -780,7 +795,7 @@ } if (director_user_refresh(conn, username_hash, - host, ioloop_time, FALSE, &forced, &user)) { + host, timestamp, FALSE, &forced, &user)) { struct director_host *src_host = forced ? conn->dir->self_host : conn->host; i_assert(!user->weak); diff -r d811474ef901 -r 867c3905ac0b src/director/director.c --- a/src/director/director.c Sun Nov 26 01:14:01 2017 +0200 +++ b/src/director/director.c Sun Nov 26 01:19:35 2017 +0200 @@ -730,11 +730,24 @@ void director_update_user(struct director *dir, struct director_host *src, struct user *user) { + struct director_connection *const *connp; + i_assert(src != NULL); - i_assert(!user->weak); - director_update_send(dir, src, t_strdup_printf("USER\t%u\t%s\n", - user->username_hash, user->host->ip_str)); + + array_foreach(&dir->connections, connp) { + if (director_connection_get_host(*connp) == src) + continue; + + if (director_connection_get_minor_version(*connp) >= DIRECTOR_VERSION_USER_TIMESTAMP) { + director_connection_send(*connp, t_strdup_printf( + "USER\t%u\t%s\t%u\n", user->username_hash, user->host->ip_str, + user->timestamp)); + } else { + director_connection_send(*connp, t_strdup_printf( + "USER\t%u\t%s\n", user->username_hash, user->host->ip_str)); + } + } } void director_update_user_weak(struct director *dir, struct director_host *src, diff -r d811474ef901 -r 867c3905ac0b src/director/director.h --- a/src/director/director.h Sun Nov 26 01:14:01 2017 +0200 +++ b/src/director/director.h Sun Nov 26 01:19:35 2017 +0200 @@ -28,6 +28,8 @@ #define DIRECTOR_VERSION_USER_KICK_ALT 8 /* Users are sent as "U" command in handshake */ #define DIRECTOR_VERSION_HANDSHAKE_U_CMD 9 +/* USER event with timestamp supported */ +#define DIRECTOR_VERSION_USER_TIMESTAMP 9 /* Minimum time between even attempting to communicate with a director that failed due to a protocol error. */ diff -r d811474ef901 -r 867c3905ac0b src/director/user-directory.h --- a/src/director/user-directory.h Sun Nov 26 01:14:01 2017 +0200 +++ b/src/director/user-directory.h Sun Nov 26 01:19:35 2017 +0200 @@ -5,7 +5,9 @@ ((user)->kill_ctx != NULL) struct user { - /* sorted by time */ + /* Approximately sorted by time (except during handshaking). + The sorting order may be constantly wrong a few seconds here and + there. */ struct user *prev, *next; /* first 32 bits of MD5(username). collisions are quite unlikely, but