changeset 19020:42d4da9ee7a9

dsync: Fixed again waiting for remote process wait to die. We can't rely on stderr getting closed. It doesn't happen if the remote process crashes. Now waiting for SIGCHLD in ioloop should solve this and still log all the error messages at exit.
author Timo Sirainen <tss@iki.fi>
date Thu, 27 Aug 2015 12:33:47 +0200
parents ec6e672a6e32
children 3fc9658c9712
files src/doveadm/doveadm-dsync.c
diffstat 1 files changed, 30 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/src/doveadm/doveadm-dsync.c	Thu Aug 27 10:39:26 2015 +0200
+++ b/src/doveadm/doveadm-dsync.c	Thu Aug 27 12:33:47 2015 +0200
@@ -5,6 +5,7 @@
 #include "array.h"
 #include "execv-const.h"
 #include "fd-set-nonblock.h"
+#include "child-wait.h"
 #include "istream.h"
 #include "ostream.h"
 #include "iostream-ssl.h"
@@ -72,6 +73,8 @@
 	const char *local_location;
 	pid_t remote_pid;
 	const char *const *remote_cmd_args;
+	struct child_wait *child_wait;
+	int exit_status;
 
 	int fd_in, fd_out, fd_err;
 	struct io *io_err;
@@ -99,6 +102,7 @@
 	unsigned int no_mail_sync:1;
 	unsigned int local_location_from_arg:1;
 	unsigned int replicator_notify:1;
+	unsigned int exited:1;
 };
 
 static bool legacy_dsync = FALSE;
@@ -118,10 +122,6 @@
 	case -1:
 		if (ctx->io_err != NULL)
 			io_remove(&ctx->io_err);
-		if (ctx->fd_in == -1) {
-			/* we're shutting down. */
-			io_loop_stop(current_ioloop);
-		}
 		break;
 	default:
 		while ((line = i_stream_next_line(ctx->err_stream)) != NULL)
@@ -407,35 +407,33 @@
 	return doveadm_is_killed() ? -1 : 0;
 }
 
-static void cmd_dsync_wait_remote(struct dsync_cmd_context *ctx,
-				  int *status_r)
+static void cmd_dsync_remote_exited(const struct child_wait_status *status,
+				    struct dsync_cmd_context *ctx)
+{
+	ctx->exited = TRUE;
+	ctx->exit_status = status->status;
+	io_loop_stop(current_ioloop);
+}
+
+static void cmd_dsync_wait_remote(struct dsync_cmd_context *ctx)
 {
 	struct timeout *to;
 
-	/* wait for stderr to close. this indicates that the remote process
-	   has died. while we're running we're also reading and printing all
-	   errors that still coming from it. */
+	/* wait in ioloop for the remote process to die. while we're running
+	   we're also reading and printing all errors that still coming from
+	   it. */
 	to = timeout_add(DSYNC_REMOTE_CMD_EXIT_WAIT_SECS*1000,
 			 io_loop_stop, current_ioloop);
 	io_loop_run(current_ioloop);
 	timeout_remove(&to);
 
-	/* unless we timed out, the process should be dead now or very soon. */
-	alarm(1);
-	if (waitpid(ctx->remote_pid, status_r, 0) == -1) {
-		if (errno != EINTR) {
-			i_error("waitpid(%ld) failed: %m",
+	if (!ctx->exited) {
+		i_error("Remote command process isn't dying, killing it");
+		if (kill(ctx->remote_pid, SIGKILL) < 0 && errno != ESRCH) {
+			i_error("kill(%ld, SIGKILL) failed: %m",
 				(long)ctx->remote_pid);
-		} else {
-			i_error("Remote command process isn't dying, killing it");
-			if (kill(ctx->remote_pid, SIGKILL) < 0 && errno != ESRCH) {
-				i_error("kill(%ld, SIGKILL) failed: %m",
-					(long)ctx->remote_pid);
-			}
 		}
-		*status_r = -1;
 	}
-	alarm(0);
 }
 
 static void cmd_dsync_log_remote_status(int status, bool remote_errors_logged,
@@ -557,7 +555,7 @@
 	enum mail_error mail_error = 0, mail_error2;
 	bool remote_errors_logged = FALSE;
 	bool changes_during_sync = FALSE;
-	int status = 0, ret = 0;
+	int ret = 0;
 
 	memset(&set, 0, sizeof(set));
 	if (_ctx->cur_client_ip.family != 0) {
@@ -621,6 +619,7 @@
 	if (doveadm_debug)
 		brain_flags |= DSYNC_BRAIN_FLAG_DEBUG;
 
+	child_wait_init();
 	brain = dsync_brain_master_init(user, ibc, ctx->sync_type,
 					brain_flags, &set);
 
@@ -629,6 +628,8 @@
 					&changes_during_sync, &mail_error) < 0)
 			ret = -1;
 	} else {
+		ctx->child_wait = child_wait_new_with_pid(ctx->remote_pid,
+			cmd_dsync_remote_exited, ctx);
 		cmd_dsync_run_remote(user);
 	}
 
@@ -682,10 +683,11 @@
 	   shouldn't (at least with ssh) and we need stderr to be open to be
 	   able to print the final errors */
 	if (ctx->run_type == DSYNC_RUN_TYPE_CMD) {
-		cmd_dsync_wait_remote(ctx, &status);
+		cmd_dsync_wait_remote(ctx);
+		remote_error_input(ctx);
 		remote_errors_logged = ctx->err_stream->v_offset > 0;
 		i_stream_destroy(&ctx->err_stream);
-		cmd_dsync_log_remote_status(status, remote_errors_logged,
+		cmd_dsync_log_remote_status(ctx->exit_status, remote_errors_logged,
 					    ctx->remote_cmd_args);
 	} else {
 		i_assert(ctx->err_stream == NULL);
@@ -695,6 +697,9 @@
 	if (ctx->fd_err != -1)
 		i_close_fd(&ctx->fd_err);
 
+	if (ctx->child_wait != NULL)
+		child_wait_free(&ctx->child_wait);
+	child_wait_deinit();
 	return ret;
 }