# HG changeset patch # User Timo Sirainen # Date 1454006822 -7200 # Node ID fc0219628b4909e2a3952850fb35334749eab1a8 # Parent f22a6d0198c4f5bbc9ceaea0b7bea6221d81afc1 dsync: Improved header hash v2 algorithm to remove repeated '?' chars. This is to help with Yahoo that replaces UTF-8 chars in headers with a single '?' (instead of '?' per each 8bit byte). diff -r f22a6d0198c4 -r fc0219628b49 src/doveadm/dsync/Makefile.am --- a/src/doveadm/dsync/Makefile.am Thu Jan 28 20:23:51 2016 +0200 +++ b/src/doveadm/dsync/Makefile.am Thu Jan 28 20:47:02 2016 +0200 @@ -58,6 +58,7 @@ dsync-transaction-log-scan.h test_programs = \ + test-dsync-mail \ test-dsync-mailbox-tree-sync noinst_PROGRAMS = $(test_programs) @@ -66,6 +67,10 @@ ../../lib-test/libtest.la \ ../../lib/liblib.la +test_dsync_mail_SOURCES = test-dsync-mail.c +test_dsync_mail_LDADD = $(pkglib_LTLIBRARIES) $(test_libs) +test_dsync_mail_DEPENDENCIES = $(pkglib_LTLIBRARIES) $(test_libs) + test_dsync_mailbox_tree_sync_SOURCES = test-dsync-mailbox-tree-sync.c test_dsync_mailbox_tree_sync_LDADD = dsync-mailbox-tree-sync.lo dsync-mailbox-tree.lo $(test_libs) test_dsync_mailbox_tree_sync_DEPENDENCIES = $(pkglib_LTLIBRARIES) $(test_libs) diff -r f22a6d0198c4 -r fc0219628b49 src/doveadm/dsync/dsync-mail.c --- a/src/doveadm/dsync/dsync-mail.c Thu Jan 28 20:23:51 2016 +0200 +++ b/src/doveadm/dsync/dsync-mail.c Thu Jan 28 20:47:02 2016 +0200 @@ -24,9 +24,8 @@ return mailbox_header_lookup_init(box, hashed_headers); } -static void -dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version, - const unsigned char *data, size_t size) +void dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version, + const unsigned char *data, size_t size) { size_t i, start; @@ -42,18 +41,22 @@ - Zimbra replaces 8bit chars with '?' in header fetches, but not body fetches. - Yahoo replaces 8bit chars with '?' in partial header - fetches, but not POP3 TOP. + fetches, but not POP3 TOP. UTF-8 character sequence writes only a + single '?' - So we'll just replace all control and 8bit chars with '?', - which hopefully will satisfy everybody. + So we'll just replace all control and 8bit chars with '?' and + remove any repeated '?', which hopefully will satisfy everybody. (Keep this code in sync with pop3-migration plugin.) */ for (i = start = 0; i < size; i++) { - if ((data[i] < 0x20 || data[i] >= 0x80) && + if ((data[i] < 0x20 || data[i] >= 0x7f || data[i] == '?') && (data[i] != '\t' && data[i] != '\n')) { - md5_update(md5_ctx, data + start, i-start); - md5_update(md5_ctx, "?", 1); + /* remove repeated '?' */ + if (start < i || i == 0) { + md5_update(md5_ctx, data + start, i-start); + md5_update(md5_ctx, "?", 1); + } start = i+1; } } diff -r f22a6d0198c4 -r fc0219628b49 src/doveadm/dsync/dsync-mail.h --- a/src/doveadm/dsync/dsync-mail.h Thu Jan 28 20:23:51 2016 +0200 +++ b/src/doveadm/dsync/dsync-mail.h Thu Jan 28 20:47:02 2016 +0200 @@ -3,6 +3,7 @@ #include "mail-types.h" +struct md5_context; struct mail; struct mailbox; @@ -95,4 +96,8 @@ void dsync_mail_change_dup(pool_t pool, const struct dsync_mail_change *src, struct dsync_mail_change *dest_r); +/* private: */ +void dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version, + const unsigned char *data, size_t size); + #endif diff -r f22a6d0198c4 -r fc0219628b49 src/doveadm/dsync/test-dsync-mail.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/doveadm/dsync/test-dsync-mail.c Thu Jan 28 20:47:02 2016 +0200 @@ -0,0 +1,40 @@ +/* Copyright (c) 2016 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "md5.h" +#include "dsync-mail.h" +#include "test-common.h" + +static const unsigned char test_input[] = + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20!?x??yz\x7f\x80\x90\xff-plop\xff"; +static const unsigned char test_output[] = + "?\t\n? !?x?yz?-plop?"; + +static void test_dsync_mail_hash_more(void) +{ + struct md5_context md5_ctx; + unsigned char md5_input[MD5_RESULTLEN], md5_output[MD5_RESULTLEN]; + + test_begin("dsync_mail_hash_more v2"); + md5_init(&md5_ctx); + dsync_mail_hash_more(&md5_ctx, 2, test_input, sizeof(test_input)-1); + md5_final(&md5_ctx, md5_input); + + md5_init(&md5_ctx); + md5_update(&md5_ctx, test_output, sizeof(test_output)-1); + md5_final(&md5_ctx, md5_output); + + test_assert(memcmp(md5_input, md5_output, MD5_RESULTLEN) == 0); + test_end(); +} + +int main(void) +{ + static void (*test_functions[])(void) = { + test_dsync_mail_hash_more, + NULL + }; + return test_run(test_functions); +}