changeset 19646:25f06710e671

dsync: When comparing headers' hashes to match messages, try to normalize the input. This is especially useful because some IMAP servers return different data depending on whether we're fetching only specific header fields, all headers or entire body. For now we're assuming that any non-ASCII is going to be replaced with '?', which helps at least with Zimbra and Yahoo. The header hashing algorithm is now versionable, so it can be modified more easily in future. This change should make imapc_features=zimbra-workarounds setting obsolete.
author Timo Sirainen <timo.sirainen@dovecot.fi>
date Tue, 26 Jan 2016 19:56:43 +0200
parents a1cd3f372251
children 8f2ba9ebc463
files src/doveadm/dsync/dsync-brain-mailbox.c src/doveadm/dsync/dsync-brain-private.h src/doveadm/dsync/dsync-brain.c src/doveadm/dsync/dsync-ibc-stream.c src/doveadm/dsync/dsync-ibc.h src/doveadm/dsync/dsync-mail.c src/doveadm/dsync/dsync-mail.h src/doveadm/dsync/dsync-mailbox-export.c src/doveadm/dsync/dsync-mailbox-export.h src/doveadm/dsync/dsync-mailbox-import.c src/doveadm/dsync/dsync-mailbox-import.h src/plugins/pop3-migration/pop3-migration-plugin.c
diffstat 12 files changed, 71 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/src/doveadm/dsync/dsync-brain-mailbox.c	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-brain-mailbox.c	Tue Jan 26 19:56:43 2016 +0200
@@ -218,6 +218,8 @@
 		import_flags |= DSYNC_MAILBOX_IMPORT_FLAG_MAILS_USE_GUID128;
 	if (brain->no_notify)
 		import_flags |= DSYNC_MAILBOX_IMPORT_FLAG_NO_NOTIFY;
+	if (brain->hdr_hash_v2)
+		import_flags |= DSYNC_MAILBOX_IMPORT_FLAG_HDR_HASH_V2;
 
 	brain->box_importer = brain->backup_send ? NULL :
 		dsync_mailbox_import_init(brain->box, brain->virtual_all_box,
@@ -318,6 +320,8 @@
 		exporter_flags |= DSYNC_MAILBOX_EXPORTER_FLAG_MINIMAL_DMAIL_FILL;
 	if (brain->sync_since_timestamp > 0)
 		exporter_flags |= DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS;
+	if (brain->hdr_hash_v2)
+		exporter_flags |= DSYNC_MAILBOX_EXPORTER_FLAG_HDR_HASH_V2;
 
 	brain->box_exporter = brain->backup_recv ? NULL :
 		dsync_mailbox_export_init(brain->box, brain->log_scan,
--- a/src/doveadm/dsync/dsync-brain-private.h	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-brain-private.h	Tue Jan 26 19:56:43 2016 +0200
@@ -112,6 +112,7 @@
 	unsigned int require_full_resync:1;
 	unsigned int verbose_proctitle:1;
 	unsigned int no_notify:1;
+	unsigned int hdr_hash_v2:1;
 	unsigned int failed:1;
 };
 
--- a/src/doveadm/dsync/dsync-brain.c	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-brain.c	Tue Jan 26 19:56:43 2016 +0200
@@ -233,6 +233,7 @@
 	memcpy(ibc_set.sync_box_guid, set->sync_box_guid,
 	       sizeof(ibc_set.sync_box_guid));
 	ibc_set.sync_type = sync_type;
+	ibc_set.hdr_hash_v2 = TRUE;
 	ibc_set.lock_timeout = set->lock_timeout_secs;
 	/* reverse the backup direction for the slave */
 	ibc_set.brain_flags = flags & ~(DSYNC_BRAIN_FLAG_BACKUP_SEND |
@@ -267,6 +268,7 @@
 	}
 
 	memset(&ibc_set, 0, sizeof(ibc_set));
+	ibc_set.hdr_hash_v2 = TRUE;
 	ibc_set.hostname = my_hostdomain();
 	dsync_ibc_send_handshake(ibc, &ibc_set);
 
@@ -430,6 +432,7 @@
 			return FALSE;
 		}
 	}
+	brain->hdr_hash_v2 = ibc_set->hdr_hash_v2;
 
 	brain->state = brain->sync_type == DSYNC_BRAIN_SYNC_TYPE_STATE ?
 		DSYNC_STATE_MASTER_SEND_LAST_COMMON :
@@ -447,6 +450,7 @@
 
 	if (dsync_ibc_recv_handshake(brain->ibc, &ibc_set) == 0)
 		return FALSE;
+	brain->hdr_hash_v2 = ibc_set->hdr_hash_v2;
 
 	if (ibc_set->lock_timeout > 0) {
 		brain->lock_timeout = ibc_set->lock_timeout;
--- a/src/doveadm/dsync/dsync-ibc-stream.c	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-ibc-stream.c	Tue Jan 26 19:56:43 2016 +0200
@@ -26,12 +26,13 @@
 #define DSYNC_IBC_STREAM_OUTBUF_THROTTLE_SIZE (1024*128)
 
 #define DSYNC_PROTOCOL_VERSION_MAJOR 3
-#define DSYNC_PROTOCOL_VERSION_MINOR 3
-#define DSYNC_HANDSHAKE_VERSION "VERSION\tdsync\t3\t3\n"
+#define DSYNC_PROTOCOL_VERSION_MINOR 4
+#define DSYNC_HANDSHAKE_VERSION "VERSION\tdsync\t3\t4\n"
 
 #define DSYNC_PROTOCOL_MINOR_HAVE_ATTRIBUTES 1
 #define DSYNC_PROTOCOL_MINOR_HAVE_SAVE_GUID 2
 #define DSYNC_PROTOCOL_MINOR_HAVE_FINISH 3
+#define DSYNC_PROTOCOL_MINOR_HAVE_HDR_HASH_V2 4
 
 enum item_type {
 	ITEM_NONE,
@@ -826,6 +827,7 @@
 		set->brain_flags |= DSYNC_BRAIN_FLAG_PURGE_REMOTE;
 	if (dsync_deserializer_decode_try(decoder, "no_notify", &value))
 		set->brain_flags |= DSYNC_BRAIN_FLAG_NO_NOTIFY;
+	set->hdr_hash_v2 = ibc->minor_version >= DSYNC_PROTOCOL_MINOR_HAVE_HDR_HASH_V2;
 
 	*set_r = set;
 	return DSYNC_IBC_RECV_RET_OK;
--- a/src/doveadm/dsync/dsync-ibc.h	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-ibc.h	Tue Jan 26 19:56:43 2016 +0200
@@ -63,6 +63,7 @@
 
 	enum dsync_brain_sync_type sync_type;
 	enum dsync_brain_flags brain_flags;
+	bool hdr_hash_v2;
 	unsigned int lock_timeout;
 };
 
--- a/src/doveadm/dsync/dsync-mail.c	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-mail.c	Tue Jan 26 19:56:43 2016 +0200
@@ -24,7 +24,44 @@
 	return mailbox_header_lookup_init(box, hashed_headers);
 }
 
-int dsync_mail_get_hdr_hash(struct mail *mail, const char **hdr_hash_r)
+static void
+dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version,
+		     const unsigned char *data, size_t size)
+{
+	size_t i, start;
+
+	i_assert(version == 1 || version == 2);
+
+	if (version == 1) {
+		md5_update(md5_ctx, data, size);
+		return;
+	}
+	/* - Dovecot IMAP replaces NULs with 0x80 character.
+	   - Dovecot POP3 with outlook-no-nuls workaround replaces NULs
+	   with 0x80 character.
+	   - Zimbra replaces 8bit chars with '?' in header fetches,
+	   but not body fetches.
+	   - Yahoo replaces 8bit chars with '?' in partial header
+	   fetches, but not POP3 TOP.
+
+	   So we'll just replace all control and 8bit chars with '?',
+	   which hopefully will satisfy everybody.
+
+	   (Keep this code in sync with pop3-migration plugin.)
+	   */
+	for (i = start = 0; i < size; i++) {
+		if ((data[i] < 0x20 || data[i] >= 0x80) &&
+		    (data[i] != '\t' && data[i] != '\n')) {
+			md5_update(md5_ctx, data + start, i-start);
+			md5_update(md5_ctx, "?", 1);
+			start = i+1;
+		}
+	}
+	md5_update(md5_ctx, data + start, i-start);
+}
+
+int dsync_mail_get_hdr_hash(struct mail *mail, unsigned int version,
+			    const char **hdr_hash_r)
 {
 	struct istream *hdr_input, *input;
 	struct mailbox_header_lookup_ctx *hdr_ctx;
@@ -48,7 +85,7 @@
 			break;
 		if (size == 0)
 			break;
-		md5_update(&md5_ctx, data, size);
+		dsync_mail_hash_more(&md5_ctx, version, data, size);
 		i_stream_skip(input, size);
 	}
 	if (input->stream_errno != 0)
--- a/src/doveadm/dsync/dsync-mail.h	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-mail.h	Tue Jan 26 19:56:43 2016 +0200
@@ -85,7 +85,8 @@
 struct mailbox_header_lookup_ctx *
 dsync_mail_get_hash_headers(struct mailbox *box);
 
-int dsync_mail_get_hdr_hash(struct mail *mail, const char **hdr_hash_r);
+int dsync_mail_get_hdr_hash(struct mail *mail, unsigned int version,
+			    const char **hdr_hash_r);
 int dsync_mail_fill(struct mail *mail, bool minimal_fill,
 		    struct dsync_mail *dmail_r, const char **error_field_r);
 int dsync_mail_fill_nonminimal(struct mail *mail, struct dsync_mail *dmail_r,
--- a/src/doveadm/dsync/dsync-mailbox-export.c	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-mailbox-export.c	Tue Jan 26 19:56:43 2016 +0200
@@ -28,6 +28,7 @@
 	struct mailbox_transaction_context *trans;
 	struct mail_search_context *search_ctx;
 	unsigned int search_pos, search_count;
+	unsigned int hdr_hash_version;
 
 	/* GUID => instances */
 	HASH_TABLE(char *, struct dsync_mail_guid_instances *) export_guids;
@@ -162,7 +163,7 @@
 
 	if (!exporter->mails_have_guids) {
 		/* get header hash also */
-		if (dsync_mail_get_hdr_hash(mail, hdr_hash_r) < 0)
+		if (dsync_mail_get_hdr_hash(mail, exporter->hdr_hash_version, hdr_hash_r) < 0)
 			return dsync_mail_error(exporter, mail, "hdr-stream");
 		return 1;
 	} else if (**guid_r == '\0') {
@@ -502,6 +503,8 @@
 		(flags & DSYNC_MAILBOX_EXPORTER_FLAG_MINIMAL_DMAIL_FILL) != 0;
 	exporter->export_received_timestamps =
 		(flags & DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS) != 0;
+	exporter->hdr_hash_version =
+		(flags & DSYNC_MAILBOX_EXPORTER_FLAG_HDR_HASH_V2) ? 2 : 1;
 	p_array_init(&exporter->requested_uids, pool, 16);
 	p_array_init(&exporter->search_uids, pool, 16);
 	hash_table_create(&exporter->export_guids, pool, 0, str_hash, strcmp);
--- a/src/doveadm/dsync/dsync-mailbox-export.h	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-mailbox-export.h	Tue Jan 26 19:56:43 2016 +0200
@@ -5,7 +5,8 @@
 	DSYNC_MAILBOX_EXPORTER_FLAG_AUTO_EXPORT_MAILS	= 0x01,
 	DSYNC_MAILBOX_EXPORTER_FLAG_MAILS_HAVE_GUIDS	= 0x02,
 	DSYNC_MAILBOX_EXPORTER_FLAG_MINIMAL_DMAIL_FILL	= 0x04,
-	DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS		= 0x08
+	DSYNC_MAILBOX_EXPORTER_FLAG_TIMESTAMPS		= 0x08,
+	DSYNC_MAILBOX_EXPORTER_FLAG_HDR_HASH_V2		= 0x10
 };
 
 struct dsync_mailbox_exporter *
--- a/src/doveadm/dsync/dsync-mailbox-import.c	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-mailbox-import.c	Tue Jan 26 19:56:43 2016 +0200
@@ -63,6 +63,7 @@
 	uint64_t remote_highest_modseq, remote_highest_pvt_modseq;
 	time_t sync_since_timestamp;
 	enum mailbox_transaction_flags transaction_flags;
+	unsigned int hdr_hash_version;
 
 	enum mail_flags sync_flag;
 	const char *sync_keyword;
@@ -255,6 +256,8 @@
 		(flags & DSYNC_MAILBOX_IMPORT_FLAG_MAILS_HAVE_GUIDS) != 0;
 	importer->mails_use_guid128 =
 		(flags & DSYNC_MAILBOX_IMPORT_FLAG_MAILS_USE_GUID128) != 0;
+	importer->hdr_hash_version =
+		(flags & DSYNC_MAILBOX_IMPORT_FLAG_HDR_HASH_V2) != 0 ? 2 : 1;
 
 	mailbox_get_open_status(importer->box, STATUS_UIDNEXT |
 				STATUS_HIGHESTMODSEQ | STATUS_HIGHESTPVTMODSEQ,
@@ -601,6 +604,7 @@
 		}
 	} else {
 		if (dsync_mail_get_hdr_hash(importer->cur_mail,
+					    importer->hdr_hash_version,
 					    &hdr_hash) < 0) {
 			dsync_mail_error(importer, importer->cur_mail,
 					 "header hash");
@@ -1483,7 +1487,8 @@
 		return -1;
 	}
 
-	if (dsync_mail_get_hdr_hash(importer->cur_mail, &hdr_hash) < 0) {
+	if (dsync_mail_get_hdr_hash(importer->cur_mail,
+				    importer->hdr_hash_version, &hdr_hash) < 0) {
 		dsync_mail_error(importer, importer->cur_mail, "hdr-stream");
 		*result_r = "Error fetching header stream";
 		return -1;
--- a/src/doveadm/dsync/dsync-mailbox-import.h	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/doveadm/dsync/dsync-mailbox-import.h	Tue Jan 26 19:56:43 2016 +0200
@@ -10,7 +10,8 @@
 	DSYNC_MAILBOX_IMPORT_FLAG_DEBUG			= 0x08,
 	DSYNC_MAILBOX_IMPORT_FLAG_MAILS_HAVE_GUIDS	= 0x10,
 	DSYNC_MAILBOX_IMPORT_FLAG_MAILS_USE_GUID128	= 0x20,
-	DSYNC_MAILBOX_IMPORT_FLAG_NO_NOTIFY		= 0x40
+	DSYNC_MAILBOX_IMPORT_FLAG_NO_NOTIFY		= 0x40,
+	DSYNC_MAILBOX_IMPORT_FLAG_HDR_HASH_V2		= 0x80
 };
 
 struct mailbox;
--- a/src/plugins/pop3-migration/pop3-migration-plugin.c	Tue Jan 26 17:55:10 2016 +0200
+++ b/src/plugins/pop3-migration/pop3-migration-plugin.c	Tue Jan 26 19:56:43 2016 +0200
@@ -207,6 +207,8 @@
 
 		   So we'll just replace all control and 8bit chars with '?',
 		   which hopefully will satisfy everybody.
+
+		   (Keep this code in sync with dsync.)
 		*/
 		for (i = start = 0; i < size; i++) {
 			if ((data[i] < 0x20 || data[i] >= 0x80) &&