Mercurial > dovecot > core-2.2
changeset 15053:c976a9c01613
Replaced "decomposed titlecase" conversions with more generic normalizer function.
Plugins can now change mail_user.default_normalizer. Specific searches can
also use different normalizers by changing mail_search_context.normalizer.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 15 Sep 2012 03:12:20 +0300 |
parents | d5ebec837bfd |
children | 07ac1dbcc033 |
files | src/doveadm/doveadm-mail-fetch.c src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.c src/lib-charset/charset-utf8.h src/lib-imap/imap-base-subject.c src/lib-mail/message-decoder.c src/lib-mail/message-decoder.h src/lib-mail/message-header-decode.c src/lib-mail/message-header-decode.h src/lib-mail/message-search.c src/lib-mail/message-search.h src/lib-mail/test-message-decoder.c src/lib-mail/test-message-header-decode.c src/lib-storage/index/index-search.c src/lib-storage/mail-storage-private.h src/lib-storage/mail-user.c src/lib-storage/mail-user.h src/lib/unichar.h src/plugins/fts-squat/fts-backend-squat.c src/plugins/fts/fts-api-private.h src/plugins/fts/fts-api.c src/plugins/fts/fts-build-mail.c |
diffstat | 22 files changed, 125 insertions(+), 112 deletions(-) [+] |
line wrap: on
line diff
--- a/src/doveadm/doveadm-mail-fetch.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/doveadm/doveadm-mail-fetch.c Sat Sep 15 03:12:20 2012 +0300 @@ -265,7 +265,7 @@ parser = message_parser_init(pool_datastack_create(), input, MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, 0); - decoder = message_decoder_init(0); + decoder = message_decoder_init(NULL, 0); while ((ret = message_parser_parse_next_block(parser, &raw_block)) > 0) { if (!message_decoder_decode_next_block(decoder, &raw_block,
--- a/src/lib-charset/charset-iconv.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-charset/charset-iconv.c Sat Sep 15 03:12:20 2012 +0300 @@ -12,10 +12,10 @@ struct charset_translation { iconv_t cd; - enum charset_flags flags; + normalizer_func_t *normalizer; }; -int charset_to_utf8_begin(const char *charset, enum charset_flags flags, +int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer, struct charset_translation **t_r) { struct charset_translation *t; @@ -31,7 +31,7 @@ t = i_new(struct charset_translation, 1); t->cd = cd; - t->flags = flags; + t->normalizer = normalizer; *t_r = t; return 0; } @@ -54,12 +54,12 @@ } static int -charset_append_utf8(const void *src, size_t src_size, - buffer_t *dest, bool dtcase) +charset_append_utf8(struct charset_translation *t, + const void *src, size_t src_size, buffer_t *dest) { - if (dtcase) - return uni_utf8_to_decomposed_titlecase(src, src_size, dest); - if (!uni_utf8_get_valid_data(src, src_size, dest)) + if (t->normalizer != NULL) + return t->normalizer(src, src_size, dest); + else if (!uni_utf8_get_valid_data(src, src_size, dest)) return -1; else { buffer_append(dest, src, src_size); @@ -75,12 +75,11 @@ ICONV_CONST char *ic_srcbuf; char tmpbuf[8192], *ic_destbuf; size_t srcleft, destleft; - bool dtcase = (t->flags & CHARSET_FLAG_DECOMP_TITLECASE) != 0; bool ret = TRUE; if (t->cd == (iconv_t)-1) { /* input is already supposed to be UTF-8 */ - if (charset_append_utf8(src, *src_size, dest, dtcase) < 0) + if (charset_append_utf8(t, src, *src_size, dest) < 0) *result = CHARSET_RET_INVALID_INPUT; else *result = CHARSET_RET_OK; @@ -110,8 +109,8 @@ /* we just converted data to UTF-8. it shouldn't be invalid, but Solaris iconv appears to pass invalid data through sometimes (e.g. 8 bit characters with UTF-7) */ - if (charset_append_utf8(tmpbuf, sizeof(tmpbuf) - destleft, - dest, dtcase) < 0) + if (charset_append_utf8(t, tmpbuf, sizeof(tmpbuf) - destleft, + dest) < 0) *result = CHARSET_RET_INVALID_INPUT; return ret; }
--- a/src/lib-charset/charset-utf8.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-charset/charset-utf8.c Sat Sep 15 03:12:20 2012 +0300 @@ -16,14 +16,14 @@ strcasecmp(charset, "UTF8") == 0; } -int charset_to_utf8_str(const char *charset, enum charset_flags flags, +int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer, const char *input, string_t *output, enum charset_result *result_r) { struct charset_translation *t; size_t len = strlen(input); - if (charset_to_utf8_begin(charset, flags, &t) < 0) + if (charset_to_utf8_begin(charset, normalizer, &t) < 0) return -1; *result_r = charset_to_utf8(t, (const unsigned char *)input, @@ -35,31 +35,31 @@ #ifndef HAVE_ICONV struct charset_translation { - enum charset_flags flags; + normalizer_func_t *normalizer; }; -static struct charset_translation raw_translation = { 0 }; -static struct charset_translation tc_translation = { - CHARSET_FLAG_DECOMP_TITLECASE -}; - -int charset_to_utf8_begin(const char *charset, enum charset_flags flags, +int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer, struct charset_translation **t_r) { - if (charset_is_utf8(charset)) { - if ((flags & CHARSET_FLAG_DECOMP_TITLECASE) != 0) - *t_r = &tc_translation; - else - *t_r = &raw_translation; - return 0; + struct charset_translation *t; + + if (!charset_is_utf8(charset)) { + /* no support for charsets that need translation */ + return -1; } - /* no support for charsets that need translation */ - return -1; + t = i_new(struct charset_translation, 1); + t->normalizer = normalizer; + *t_r = t; + return 0; } -void charset_to_utf8_end(struct charset_translation **t ATTR_UNUSED) +void charset_to_utf8_end(struct charset_translation **_t) { + struct charset_translation *t = *_t; + + *_t = NULL; + i_free(t); } void charset_to_utf8_reset(struct charset_translation *t ATTR_UNUSED) @@ -70,11 +70,13 @@ charset_to_utf8(struct charset_translation *t, const unsigned char *src, size_t *src_size, buffer_t *dest) { - if ((t->flags & CHARSET_FLAG_DECOMP_TITLECASE) == 0) + if (t->normalizer != NULL) { + if (t->normalizer(src, *src_size, dest) < 0) + return CHARSET_RET_INVALID_INPUT; + } else if (!uni_utf8_get_valid_data(src, *src_size, dest)) { + return CHARSET_RET_INVALID_INPUT; + } else { buffer_append(dest, src, *src_size); - else { - if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0) - return CHARSET_RET_INVALID_INPUT; } return CHARSET_RET_OK; }
--- a/src/lib-charset/charset-utf8.h Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-charset/charset-utf8.h Sat Sep 15 03:12:20 2012 +0300 @@ -1,12 +1,9 @@ #ifndef CHARSET_UTF8_H #define CHARSET_UTF8_H -struct charset_translation; +#include "unichar.h" -enum charset_flags { - /* Translate the output to decomposed titlecase */ - CHARSET_FLAG_DECOMP_TITLECASE = 0x01 -}; +struct charset_translation; enum charset_result { CHARSET_RET_OK = 1, @@ -15,8 +12,9 @@ }; /* Begin translation to UTF-8. Returns -1 if charset is unknown. */ -int charset_to_utf8_begin(const char *charset, enum charset_flags flags, - struct charset_translation **t_r); +int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer, + struct charset_translation **t_r) + ATTR_NULL(2); void charset_to_utf8_end(struct charset_translation **t); void charset_to_utf8_reset(struct charset_translation *t); @@ -30,8 +28,8 @@ const unsigned char *src, size_t *src_size, buffer_t *dest); /* Translate a single string to UTF8. */ -int charset_to_utf8_str(const char *charset, enum charset_flags flags, +int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer, const char *input, string_t *output, - enum charset_result *result_r); + enum charset_result *result_r) ATTR_NULL(2); #endif
--- a/src/lib-imap/imap-base-subject.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-imap/imap-base-subject.c Sat Sep 15 03:12:20 2012 +0300 @@ -210,7 +210,7 @@ UTF-8. Convert all tabs and continuations to space. Convert all multiple spaces to a single space. */ message_header_decode_utf8((const unsigned char *)subject, subject_len, - buf, TRUE); + buf, uni_utf8_to_decomposed_titlecase); buffer_append_c(buf, '\0'); pack_whitespace(buf);
--- a/src/lib-mail/message-decoder.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-mail/message-decoder.c Sat Sep 15 03:12:20 2012 +0300 @@ -22,6 +22,7 @@ struct message_decoder_context { enum message_decoder_flags flags; + normalizer_func_t *normalizer; struct message_part *prev_part; struct message_header_line hdr; @@ -46,12 +47,14 @@ struct message_part *part); struct message_decoder_context * -message_decoder_init(enum message_decoder_flags flags) +message_decoder_init(normalizer_func_t *normalizer, + enum message_decoder_flags flags) { struct message_decoder_context *ctx; ctx = i_new(struct message_decoder_context, 1); ctx->flags = flags; + ctx->normalizer = normalizer; ctx->buf = buffer_create_dynamic(default_pool, 8192); ctx->buf2 = buffer_create_dynamic(default_pool, 8192); ctx->encoding_buf = buffer_create_dynamic(default_pool, 128); @@ -149,7 +152,6 @@ struct message_header_line *hdr, struct message_block *output) { - bool dtcase = (ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0; size_t value_len; if (hdr->continues) { @@ -168,12 +170,11 @@ buffer_set_used_size(ctx->buf, 0); message_header_decode_utf8(hdr->full_value, hdr->full_value_len, - ctx->buf, dtcase); + ctx->buf, ctx->normalizer); value_len = ctx->buf->used; - if (dtcase) { - (void)uni_utf8_to_decomposed_titlecase(hdr->name, hdr->name_len, - ctx->buf); + if (ctx->normalizer != NULL) { + (void)ctx->normalizer(hdr->name, hdr->name_len, ctx->buf); buffer_append_c(ctx->buf, '\0'); } else { if (!uni_utf8_get_valid_data((const unsigned char *)hdr->name, @@ -229,8 +230,6 @@ message_decode_body_init_charset(struct message_decoder_context *ctx, struct message_part *part) { - enum charset_flags flags; - ctx->binary_input = ctx->content_charset == NULL && (ctx->flags & MESSAGE_DECODER_FLAG_RETURN_BINARY) != 0 && (part->flags & (MESSAGE_PART_FLAG_TEXT | @@ -249,12 +248,10 @@ charset_to_utf8_end(&ctx->charset_trans); i_free_and_null(ctx->charset_trans_charset); - flags = (ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0 ? - CHARSET_FLAG_DECOMP_TITLECASE : 0; ctx->charset_trans_charset = i_strdup(ctx->content_charset != NULL ? ctx->content_charset : "UTF-8"); - if (charset_to_utf8_begin(ctx->charset_trans_charset, - flags, &ctx->charset_trans) < 0) + if (charset_to_utf8_begin(ctx->charset_trans_charset, ctx->normalizer, + &ctx->charset_trans) < 0) ctx->charset_trans = NULL; } @@ -331,9 +328,8 @@ output->size = size; } else if (ctx->charset_utf8) { buffer_set_used_size(ctx->buf2, 0); - if ((ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0) { - (void)uni_utf8_to_decomposed_titlecase(data, size, - ctx->buf2); + if (ctx->normalizer != NULL) { + (void)ctx->normalizer(data, size, ctx->buf2); output->data = ctx->buf2->data; output->size = ctx->buf2->used; } else if (uni_utf8_get_valid_data(data, size, ctx->buf2)) {
--- a/src/lib-mail/message-decoder.h Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-mail/message-decoder.h Sat Sep 15 03:12:20 2012 +0300 @@ -1,6 +1,8 @@ #ifndef MESSAGE_DECODER_H #define MESSAGE_DECODER_H +#include "unichar.h" + struct message_header_line; enum message_cte { @@ -12,9 +14,6 @@ }; enum message_decoder_flags { - /* Return all headers and parts through - uni_utf8_to_decomposed_titlecase() */ - MESSAGE_DECODER_FLAG_DTCASE = 0x01, /* Return binary MIME parts as-is without any conversion. */ MESSAGE_DECODER_FLAG_RETURN_BINARY = 0x02 }; @@ -24,7 +23,8 @@ /* Decode message's contents as UTF-8, both the headers and the MIME bodies. The bodies are decoded from quoted-printable and base64 formats if needed. */ struct message_decoder_context * -message_decoder_init(enum message_decoder_flags flags); +message_decoder_init(normalizer_func_t *normalizer, + enum message_decoder_flags flags); void message_decoder_deinit(struct message_decoder_context **ctx); /* Change the MESSAGE_DECODER_FLAG_RETURN_BINARY flag */
--- a/src/lib-mail/message-header-decode.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-mail/message-header-decode.c Sat Sep 15 03:12:20 2012 +0300 @@ -135,8 +135,8 @@ struct decode_utf8_context { buffer_t *dest; + normalizer_func_t *normalizer; unsigned int changed:1; - unsigned int dtcase:1; }; static bool @@ -145,13 +145,11 @@ { struct decode_utf8_context *ctx = context; struct charset_translation *t; - enum charset_flags flags; if (charset == NULL || charset_is_utf8(charset)) { /* ASCII / UTF-8 */ - if (ctx->dtcase) { - (void)uni_utf8_to_decomposed_titlecase(data, size, - ctx->dest); + if (ctx->normalizer != NULL) { + (void)ctx->normalizer(data, size, ctx->dest); } else { if (uni_utf8_get_valid_data(data, size, ctx->dest)) buffer_append(ctx->dest, data, size); @@ -159,8 +157,7 @@ return TRUE; } - flags = ctx->dtcase ? CHARSET_FLAG_DECOMP_TITLECASE : 0; - if (charset_to_utf8_begin(charset, flags, &t) < 0) { + if (charset_to_utf8_begin(charset, ctx->normalizer, &t) < 0) { /* data probably still contains some valid ASCII characters. append them. */ if (uni_utf8_get_valid_data(data, size, ctx->dest)) @@ -175,12 +172,12 @@ } void message_header_decode_utf8(const unsigned char *data, size_t size, - buffer_t *dest, bool dtcase) + buffer_t *dest, normalizer_func_t *normalizer) { struct decode_utf8_context ctx; memset(&ctx, 0, sizeof(ctx)); ctx.dest = dest; - ctx.dtcase = dtcase; + ctx.normalizer = normalizer; message_header_decode(data, size, decode_utf8_callback, &ctx); }
--- a/src/lib-mail/message-header-decode.h Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-mail/message-header-decode.h Sat Sep 15 03:12:20 2012 +0300 @@ -1,6 +1,8 @@ #ifndef MESSAGE_HEADER_DECODE_H #define MESSAGE_HEADER_DECODE_H +#include "unichar.h" + /* Return FALSE if you wish to stop decoding. charset is NULL when it's not RFC2047-encoded. */ typedef bool message_header_decode_callback_t(const unsigned char *data, @@ -13,9 +15,8 @@ message_header_decode_callback_t *callback, void *context); -/* Append decoded RFC2047 header as UTF-8 to given buffer. If dtcase=TRUE, - the header is appended through uni_utf8_to_decomposed_titlecase(). */ +/* Append decoded RFC2047 header as UTF-8 to given buffer. */ void message_header_decode_utf8(const unsigned char *data, size_t size, - buffer_t *dest, bool dtcase); + buffer_t *dest, normalizer_func_t *normalizer); #endif
--- a/src/lib-mail/message-search.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-mail/message-search.c Sat Sep 15 03:12:20 2012 +0300 @@ -12,6 +12,8 @@ struct message_search_context { enum message_search_flags flags; + normalizer_func_t *normalizer; + struct str_find_context *str_find_ctx; struct message_part *prev_part; @@ -20,21 +22,18 @@ }; struct message_search_context * -message_search_init(const char *key_utf8, +message_search_init(const char *normalized_key_utf8, + normalizer_func_t *normalizer, enum message_search_flags flags) { - enum message_decoder_flags decoder_flags = 0; struct message_search_context *ctx; - i_assert(*key_utf8 != '\0'); - - if ((flags & MESSAGE_SEARCH_FLAG_DTCASE) != 0) - decoder_flags |= MESSAGE_DECODER_FLAG_DTCASE; + i_assert(*normalized_key_utf8 != '\0'); ctx = i_new(struct message_search_context, 1); ctx->flags = flags; - ctx->decoder = message_decoder_init(decoder_flags); - ctx->str_find_ctx = str_find_init(default_pool, key_utf8); + ctx->decoder = message_decoder_init(normalizer, 0); + ctx->str_find_ctx = str_find_init(default_pool, normalized_key_utf8); return ctx; }
--- a/src/lib-mail/message-search.h Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-mail/message-search.h Sat Sep 15 03:12:20 2012 +0300 @@ -7,15 +7,13 @@ enum message_search_flags { /* Skip the main header and all the MIME headers. */ - MESSAGE_SEARCH_FLAG_SKIP_HEADERS = 0x01, - /* Search with decomposed titlecase (instead of exact case matching). - The search key must be given with dtcase also. */ - MESSAGE_SEARCH_FLAG_DTCASE = 0x02 + MESSAGE_SEARCH_FLAG_SKIP_HEADERS = 0x01 }; /* The key must be given in UTF-8 charset */ struct message_search_context * -message_search_init(const char *key_utf8, +message_search_init(const char *normalized_key_utf8, + normalizer_func_t *normalizer, enum message_search_flags flags); void message_search_deinit(struct message_search_context **ctx);
--- a/src/lib-mail/test-message-decoder.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-mail/test-message-decoder.c Sat Sep 15 03:12:20 2012 +0300 @@ -10,7 +10,8 @@ #include "test-common.h" void message_header_decode_utf8(const unsigned char *data, size_t size, - buffer_t *dest, bool dtcase ATTR_UNUSED) + buffer_t *dest, + normalizer_func_t *normalizer ATTR_UNUSED) { buffer_append(dest, data, size); } @@ -25,7 +26,7 @@ } int charset_to_utf8_begin(const char *charset ATTR_UNUSED, - enum charset_flags flags ATTR_UNUSED, + normalizer_func_t *normalizer ATTR_UNUSED, struct charset_translation **t_r) { *t_r = NULL; @@ -56,7 +57,7 @@ memset(&output, 0, sizeof(output)); input.part = ∂ - ctx = message_decoder_init(0); + ctx = message_decoder_init(NULL, 0); memset(&hdr, 0, sizeof(hdr)); hdr.name = "Content-Transfer-Encoding";
--- a/src/lib-mail/test-message-header-decode.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-mail/test-message-header-decode.c Sat Sep 15 03:12:20 2012 +0300 @@ -10,7 +10,7 @@ bool charset_is_utf8(const char *charset ATTR_UNUSED) { return TRUE; } int charset_to_utf8_begin(const char *charset ATTR_UNUSED, - enum charset_flags flags ATTR_UNUSED, + normalizer_func_t *normalizer ATTR_UNUSED, struct charset_translation **t_r ATTR_UNUSED) { return 0; } void charset_to_utf8_end(struct charset_translation **t ATTR_UNUSED) {}
--- a/src/lib-storage/index/index-search.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-storage/index/index-search.c Sat Sep 15 03:12:20 2012 +0300 @@ -38,6 +38,7 @@ #define SEARCH_RECALC_MIN_USECS 50000 struct search_header_context { + struct index_search_context *index_ctx; struct index_mail *imail; struct mail_search_arg *args; @@ -396,16 +397,16 @@ } static struct message_search_context * -msg_search_arg_context(struct mail_search_arg *arg) +msg_search_arg_context(struct index_search_context *ctx, + struct mail_search_arg *arg) { - enum message_search_flags flags = MESSAGE_SEARCH_FLAG_DTCASE; + enum message_search_flags flags = 0; if (arg->context == NULL) T_BEGIN { string_t *dtc = t_str_new(128); - if (uni_utf8_to_decomposed_titlecase(arg->value.str, - strlen(arg->value.str), - dtc) < 0) + if (ctx->mail_ctx.normalizer(arg->value.str, + strlen(arg->value.str), dtc) < 0) i_panic("search key not utf8: %s", arg->value.str); if (arg->type == SEARCH_BODY) @@ -413,8 +414,12 @@ /* we don't get here if arg is "", but dtc can be "" if it only contains characters that we need to ignore. handle those searches by returning them as non-matched. */ - if (str_len(dtc) > 0) - arg->context = message_search_init(str_c(dtc), flags); + if (str_len(dtc) > 0) { + arg->context = + message_search_init(str_c(dtc), + ctx->mail_ctx.normalizer, + flags); + } } T_END; return arg->context; } @@ -499,7 +504,7 @@ hdr.middle_len = 0; block.hdr = &hdr; - msg_search_ctx = msg_search_arg_context(arg); + msg_search_ctx = msg_search_arg_context(ctx->index_ctx, arg); if (msg_search_ctx == NULL) return; @@ -604,7 +609,7 @@ return; } - msg_search_ctx = msg_search_arg_context(arg); + msg_search_ctx = msg_search_arg_context(ctx->index_ctx, arg); if (msg_search_ctx == NULL) { ARG_SET_RESULT(arg, 0); return; @@ -645,6 +650,7 @@ return -1; memset(&hdr_ctx, 0, sizeof(hdr_ctx)); + hdr_ctx.index_ctx = ctx; /* hdr_ctx.imail is different from imail for mails in virtual mailboxes */ hdr_ctx.imail = (struct index_mail *)mail_get_real_mail(ctx->cur_mail); @@ -1150,6 +1156,7 @@ ctx = i_new(struct index_search_context, 1); ctx->mail_ctx.transaction = t; + ctx->mail_ctx.normalizer = t->box->storage->user->default_normalizer; ctx->box = t->box; ctx->view = t->view; ctx->mail_ctx.args = args;
--- a/src/lib-storage/mail-storage-private.h Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-storage/mail-storage-private.h Sat Sep 15 03:12:20 2012 +0300 @@ -2,6 +2,7 @@ #define MAIL_STORAGE_PRIVATE_H #include "module-context.h" +#include "unichar.h" #include "file-lock.h" #include "mail-storage.h" #include "mail-storage-hooks.h" @@ -449,6 +450,7 @@ struct mail_search_sort_program *sort_program; enum mail_fetch_field wanted_fields; struct mailbox_header_lookup_ctx *wanted_headers; + normalizer_func_t *normalizer; /* if non-NULL, specifies that a search resulting is being updated. this can be used as a search optimization: if searched message
--- a/src/lib-storage/mail-user.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-storage/mail-user.c Sat Sep 15 03:12:20 2012 +0300 @@ -52,6 +52,7 @@ user->unexpanded_set = settings_dup(set_info, set, pool); user->set = settings_dup(set_info, set, pool); user->service = master_service_get_name(master_service); + user->default_normalizer = uni_utf8_to_decomposed_titlecase; /* check settings so that the duplicated structure will again contain the parsed fields */
--- a/src/lib-storage/mail-user.h Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-storage/mail-user.h Sat Sep 15 03:12:20 2012 +0300 @@ -1,6 +1,7 @@ #ifndef MAIL_USER_H #define MAIL_USER_H +#include "unichar.h" #include "mail-storage-settings.h" struct module; @@ -38,6 +39,7 @@ ARRAY(const struct mail_storage_hooks *) hooks; struct mountpoint_list *mountpoints; + normalizer_func_t *default_normalizer; /* Module-specific contexts. See mail_storage_module_id. */ ARRAY(union mail_user_module_context *) module_contexts;
--- a/src/lib/unichar.h Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib/unichar.h Sat Sep 15 03:12:20 2012 +0300 @@ -27,6 +27,12 @@ typedef uint32_t unichar_t; ARRAY_DEFINE_TYPE(unichars, unichar_t); +/* Normalize UTF8 input and append it to output buffer. + Returns 0 if ok, -1 if input was invalid. Even if input was invalid, + as much as possible should be added to output. */ +typedef int normalizer_func_t(const void *input, size_t size, + buffer_t *output); + extern const unsigned char utf8_replacement_char[UTF8_REPLACEMENT_CHAR_LEN]; extern const uint8_t *const uni_utf8_non1_bytes;
--- a/src/plugins/fts-squat/fts-backend-squat.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/plugins/fts-squat/fts-backend-squat.c Sat Sep 15 03:12:20 2012 +0300 @@ -374,8 +374,8 @@ i_array_init(&tmp_maybe_uids, 128); dtc = t_str_new(128); - if (uni_utf8_to_decomposed_titlecase(arg->value.str, - strlen(arg->value.str), dtc) < 0) + if (backend->backend.ns->user-> + default_normalizer(arg->value.str, strlen(arg->value.str), dtc) < 0) i_panic("squat: search key not utf8"); ret = squat_trie_lookup(backend->trie, str_c(dtc), squat_type, @@ -462,7 +462,7 @@ struct fts_backend fts_backend_squat = { .name = "squat", - .flags = FTS_BACKEND_FLAG_BUILD_DTCASE, + .flags = FTS_BACKEND_FLAG_NORMALIZE_INPUT, { fts_backend_squat_alloc,
--- a/src/plugins/fts/fts-api-private.h Sat Sep 15 03:09:57 2012 +0300 +++ b/src/plugins/fts/fts-api-private.h Sat Sep 15 03:12:20 2012 +0300 @@ -1,6 +1,7 @@ #ifndef FTS_API_PRIVATE_H #define FTS_API_PRIVATE_H +#include "unichar.h" #include "fts-api.h" struct mail_user; @@ -53,9 +54,9 @@ enum fts_backend_flags { /* Backend supports indexing binary MIME parts */ FTS_BACKEND_FLAG_BINARY_MIME_PARTS = 0x01, - /* Send built text to backend as decomposed titlecase rather than + /* Send built text to backend normalized rather than preserving original case */ - FTS_BACKEND_FLAG_BUILD_DTCASE = 0x02, + FTS_BACKEND_FLAG_NORMALIZE_INPUT = 0x02, /* Send only fully indexable words rather than randomly sized blocks */ FTS_BACKEND_FLAG_BUILD_FULL_WORDS = 0x04, /* Fuzzy search works */ @@ -74,6 +75,7 @@ struct fts_backend_update_context { struct fts_backend *backend; + normalizer_func_t *normalizer; struct mailbox *cur_box, *backend_box;
--- a/src/plugins/fts/fts-api.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/plugins/fts/fts-api.c Sat Sep 15 03:12:20 2012 +0300 @@ -110,10 +110,15 @@ struct fts_backend_update_context * fts_backend_update_init(struct fts_backend *backend) { + struct fts_backend_update_context *ctx; + i_assert(!backend->updating); backend->updating = TRUE; - return backend->v.update_init(backend); + ctx = backend->v.update_init(backend); + if ((backend->flags & FTS_BACKEND_FLAG_NORMALIZE_INPUT) != 0) + ctx->normalizer = backend->ns->user->default_normalizer; + return ctx; } static void fts_backend_set_cur_mailbox(struct fts_backend_update_context *ctx)
--- a/src/plugins/fts/fts-build-mail.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/plugins/fts/fts-build-mail.c Sat Sep 15 03:12:20 2012 +0300 @@ -267,7 +267,6 @@ struct mail *mail) { struct fts_mail_build_context ctx; - enum message_decoder_flags decoder_flags = 0; struct istream *input; struct message_parser_ctx *parser; struct message_decoder_context *decoder; @@ -289,9 +288,7 @@ MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, 0); - if ((update_ctx->backend->flags & FTS_BACKEND_FLAG_BUILD_DTCASE) != 0) - decoder_flags |= MESSAGE_DECODER_FLAG_DTCASE; - decoder = message_decoder_init(decoder_flags); + decoder = message_decoder_init(update_ctx->normalizer, 0); for (;;) { ret = message_parser_parse_next_block(parser, &raw_block); i_assert(ret != 0);