Mercurial > dovecot > core-2.2
changeset 19674:fc0219628b49
dsync: Improved header hash v2 algorithm to remove repeated '?' chars.
This is to help with Yahoo that replaces UTF-8 chars in headers with a
single '?' (instead of '?' per each 8bit byte).
author | Timo Sirainen <timo.sirainen@dovecot.fi> |
---|---|
date | Thu, 28 Jan 2016 20:47:02 +0200 |
parents | f22a6d0198c4 |
children | 364874711d5b |
files | src/doveadm/dsync/Makefile.am src/doveadm/dsync/dsync-mail.c src/doveadm/dsync/dsync-mail.h src/doveadm/dsync/test-dsync-mail.c |
diffstat | 4 files changed, 62 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/src/doveadm/dsync/Makefile.am Thu Jan 28 20:23:51 2016 +0200 +++ b/src/doveadm/dsync/Makefile.am Thu Jan 28 20:47:02 2016 +0200 @@ -58,6 +58,7 @@ dsync-transaction-log-scan.h test_programs = \ + test-dsync-mail \ test-dsync-mailbox-tree-sync noinst_PROGRAMS = $(test_programs) @@ -66,6 +67,10 @@ ../../lib-test/libtest.la \ ../../lib/liblib.la +test_dsync_mail_SOURCES = test-dsync-mail.c +test_dsync_mail_LDADD = $(pkglib_LTLIBRARIES) $(test_libs) +test_dsync_mail_DEPENDENCIES = $(pkglib_LTLIBRARIES) $(test_libs) + test_dsync_mailbox_tree_sync_SOURCES = test-dsync-mailbox-tree-sync.c test_dsync_mailbox_tree_sync_LDADD = dsync-mailbox-tree-sync.lo dsync-mailbox-tree.lo $(test_libs) test_dsync_mailbox_tree_sync_DEPENDENCIES = $(pkglib_LTLIBRARIES) $(test_libs)
--- a/src/doveadm/dsync/dsync-mail.c Thu Jan 28 20:23:51 2016 +0200 +++ b/src/doveadm/dsync/dsync-mail.c Thu Jan 28 20:47:02 2016 +0200 @@ -24,9 +24,8 @@ return mailbox_header_lookup_init(box, hashed_headers); } -static void -dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version, - const unsigned char *data, size_t size) +void dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version, + const unsigned char *data, size_t size) { size_t i, start; @@ -42,18 +41,22 @@ - Zimbra replaces 8bit chars with '?' in header fetches, but not body fetches. - Yahoo replaces 8bit chars with '?' in partial header - fetches, but not POP3 TOP. + fetches, but not POP3 TOP. UTF-8 character sequence writes only a + single '?' - So we'll just replace all control and 8bit chars with '?', - which hopefully will satisfy everybody. + So we'll just replace all control and 8bit chars with '?' and + remove any repeated '?', which hopefully will satisfy everybody. (Keep this code in sync with pop3-migration plugin.) */ for (i = start = 0; i < size; i++) { - if ((data[i] < 0x20 || data[i] >= 0x80) && + if ((data[i] < 0x20 || data[i] >= 0x7f || data[i] == '?') && (data[i] != '\t' && data[i] != '\n')) { - md5_update(md5_ctx, data + start, i-start); - md5_update(md5_ctx, "?", 1); + /* remove repeated '?' */ + if (start < i || i == 0) { + md5_update(md5_ctx, data + start, i-start); + md5_update(md5_ctx, "?", 1); + } start = i+1; } }
--- a/src/doveadm/dsync/dsync-mail.h Thu Jan 28 20:23:51 2016 +0200 +++ b/src/doveadm/dsync/dsync-mail.h Thu Jan 28 20:47:02 2016 +0200 @@ -3,6 +3,7 @@ #include "mail-types.h" +struct md5_context; struct mail; struct mailbox; @@ -95,4 +96,8 @@ void dsync_mail_change_dup(pool_t pool, const struct dsync_mail_change *src, struct dsync_mail_change *dest_r); +/* private: */ +void dsync_mail_hash_more(struct md5_context *md5_ctx, unsigned int version, + const unsigned char *data, size_t size); + #endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/doveadm/dsync/test-dsync-mail.c Thu Jan 28 20:47:02 2016 +0200 @@ -0,0 +1,40 @@ +/* Copyright (c) 2016 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "md5.h" +#include "dsync-mail.h" +#include "test-common.h" + +static const unsigned char test_input[] = + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20!?x??yz\x7f\x80\x90\xff-plop\xff"; +static const unsigned char test_output[] = + "?\t\n? !?x?yz?-plop?"; + +static void test_dsync_mail_hash_more(void) +{ + struct md5_context md5_ctx; + unsigned char md5_input[MD5_RESULTLEN], md5_output[MD5_RESULTLEN]; + + test_begin("dsync_mail_hash_more v2"); + md5_init(&md5_ctx); + dsync_mail_hash_more(&md5_ctx, 2, test_input, sizeof(test_input)-1); + md5_final(&md5_ctx, md5_input); + + md5_init(&md5_ctx); + md5_update(&md5_ctx, test_output, sizeof(test_output)-1); + md5_final(&md5_ctx, md5_output); + + test_assert(memcmp(md5_input, md5_output, MD5_RESULTLEN) == 0); + test_end(); +} + +int main(void) +{ + static void (*test_functions[])(void) = { + test_dsync_mail_hash_more, + NULL + }; + return test_run(test_functions); +}