changeset 15053:c976a9c01613

Replaced "decomposed titlecase" conversions with more generic normalizer function. Plugins can now change mail_user.default_normalizer. Specific searches can also use different normalizers by changing mail_search_context.normalizer.
author Timo Sirainen <tss@iki.fi>
date Sat, 15 Sep 2012 03:12:20 +0300
parents d5ebec837bfd
children 07ac1dbcc033
files src/doveadm/doveadm-mail-fetch.c src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.c src/lib-charset/charset-utf8.h src/lib-imap/imap-base-subject.c src/lib-mail/message-decoder.c src/lib-mail/message-decoder.h src/lib-mail/message-header-decode.c src/lib-mail/message-header-decode.h src/lib-mail/message-search.c src/lib-mail/message-search.h src/lib-mail/test-message-decoder.c src/lib-mail/test-message-header-decode.c src/lib-storage/index/index-search.c src/lib-storage/mail-storage-private.h src/lib-storage/mail-user.c src/lib-storage/mail-user.h src/lib/unichar.h src/plugins/fts-squat/fts-backend-squat.c src/plugins/fts/fts-api-private.h src/plugins/fts/fts-api.c src/plugins/fts/fts-build-mail.c
diffstat 22 files changed, 125 insertions(+), 112 deletions(-) [+]
line wrap: on
line diff
--- a/src/doveadm/doveadm-mail-fetch.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/doveadm/doveadm-mail-fetch.c	Sat Sep 15 03:12:20 2012 +0300
@@ -265,7 +265,7 @@
 	parser = message_parser_init(pool_datastack_create(), input,
 				     MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE,
 				     0);
-	decoder = message_decoder_init(0);
+	decoder = message_decoder_init(NULL, 0);
 
 	while ((ret = message_parser_parse_next_block(parser, &raw_block)) > 0) {
 		if (!message_decoder_decode_next_block(decoder, &raw_block,
--- a/src/lib-charset/charset-iconv.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-charset/charset-iconv.c	Sat Sep 15 03:12:20 2012 +0300
@@ -12,10 +12,10 @@
 
 struct charset_translation {
 	iconv_t cd;
-	enum charset_flags flags;
+	normalizer_func_t *normalizer;
 };
 
-int charset_to_utf8_begin(const char *charset, enum charset_flags flags,
+int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer,
 			  struct charset_translation **t_r)
 {
 	struct charset_translation *t;
@@ -31,7 +31,7 @@
 
 	t = i_new(struct charset_translation, 1);
 	t->cd = cd;
-	t->flags = flags;
+	t->normalizer = normalizer;
 	*t_r = t;
 	return 0;
 }
@@ -54,12 +54,12 @@
 }
 
 static int
-charset_append_utf8(const void *src, size_t src_size,
-		    buffer_t *dest, bool dtcase)
+charset_append_utf8(struct charset_translation *t,
+		    const void *src, size_t src_size, buffer_t *dest)
 {
-	if (dtcase)
-		return uni_utf8_to_decomposed_titlecase(src, src_size, dest);
-	if (!uni_utf8_get_valid_data(src, src_size, dest))
+	if (t->normalizer != NULL)
+		return t->normalizer(src, src_size, dest);
+	else if (!uni_utf8_get_valid_data(src, src_size, dest))
 		return -1;
 	else {
 		buffer_append(dest, src, src_size);
@@ -75,12 +75,11 @@
 	ICONV_CONST char *ic_srcbuf;
 	char tmpbuf[8192], *ic_destbuf;
 	size_t srcleft, destleft;
-	bool dtcase = (t->flags & CHARSET_FLAG_DECOMP_TITLECASE) != 0;
 	bool ret = TRUE;
 
 	if (t->cd == (iconv_t)-1) {
 		/* input is already supposed to be UTF-8 */
-		if (charset_append_utf8(src, *src_size, dest, dtcase) < 0)
+		if (charset_append_utf8(t, src, *src_size, dest) < 0)
 			*result = CHARSET_RET_INVALID_INPUT;
 		else
 			*result = CHARSET_RET_OK;
@@ -110,8 +109,8 @@
 	/* we just converted data to UTF-8. it shouldn't be invalid, but
 	   Solaris iconv appears to pass invalid data through sometimes
 	   (e.g. 8 bit characters with UTF-7) */
-	if (charset_append_utf8(tmpbuf, sizeof(tmpbuf) - destleft,
-				dest, dtcase) < 0)
+	if (charset_append_utf8(t, tmpbuf, sizeof(tmpbuf) - destleft,
+				dest) < 0)
 		*result = CHARSET_RET_INVALID_INPUT;
 	return ret;
 }
--- a/src/lib-charset/charset-utf8.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-charset/charset-utf8.c	Sat Sep 15 03:12:20 2012 +0300
@@ -16,14 +16,14 @@
 		strcasecmp(charset, "UTF8") == 0;
 }
 
-int charset_to_utf8_str(const char *charset, enum charset_flags flags,
+int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer,
 			const char *input, string_t *output,
 			enum charset_result *result_r)
 {
 	struct charset_translation *t;
 	size_t len = strlen(input);
 
-	if (charset_to_utf8_begin(charset, flags, &t) < 0)
+	if (charset_to_utf8_begin(charset, normalizer, &t) < 0)
 		return -1;
 
 	*result_r = charset_to_utf8(t, (const unsigned char *)input,
@@ -35,31 +35,31 @@
 #ifndef HAVE_ICONV
 
 struct charset_translation {
-	enum charset_flags flags;
+	normalizer_func_t *normalizer;
 };
 
-static struct charset_translation raw_translation = { 0 };
-static struct charset_translation tc_translation = {
-	CHARSET_FLAG_DECOMP_TITLECASE
-};
-
-int charset_to_utf8_begin(const char *charset, enum charset_flags flags,
+int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer,
 			  struct charset_translation **t_r)
 {
-	if (charset_is_utf8(charset)) {
-		if ((flags & CHARSET_FLAG_DECOMP_TITLECASE) != 0)
-			*t_r = &tc_translation;
-		else
-			*t_r = &raw_translation;
-		return 0;
+	struct charset_translation *t;
+
+	if (!charset_is_utf8(charset)) {
+		/* no support for charsets that need translation */
+		return -1;
 	}
 
-	/* no support for charsets that need translation */
-	return -1;
+	t = i_new(struct charset_translation, 1);
+	t->normalizer = normalizer;
+	*t_r = t;
+	return 0;
 }
 
-void charset_to_utf8_end(struct charset_translation **t ATTR_UNUSED)
+void charset_to_utf8_end(struct charset_translation **_t)
 {
+	struct charset_translation *t = *_t;
+
+	*_t = NULL;
+	i_free(t);
 }
 
 void charset_to_utf8_reset(struct charset_translation *t ATTR_UNUSED)
@@ -70,11 +70,13 @@
 charset_to_utf8(struct charset_translation *t,
 		const unsigned char *src, size_t *src_size, buffer_t *dest)
 {
-	if ((t->flags & CHARSET_FLAG_DECOMP_TITLECASE) == 0)
+	if (t->normalizer != NULL) {
+		if (t->normalizer(src, *src_size, dest) < 0)
+			return CHARSET_RET_INVALID_INPUT;
+	} else if (!uni_utf8_get_valid_data(src, *src_size, dest)) {
+		return CHARSET_RET_INVALID_INPUT;
+	} else {
 		buffer_append(dest, src, *src_size);
-	else {
-		if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
-			return CHARSET_RET_INVALID_INPUT;
 	}
 	return CHARSET_RET_OK;
 }
--- a/src/lib-charset/charset-utf8.h	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-charset/charset-utf8.h	Sat Sep 15 03:12:20 2012 +0300
@@ -1,12 +1,9 @@
 #ifndef CHARSET_UTF8_H
 #define CHARSET_UTF8_H
 
-struct charset_translation;
+#include "unichar.h"
 
-enum charset_flags {
-	/* Translate the output to decomposed titlecase */
-	CHARSET_FLAG_DECOMP_TITLECASE	= 0x01
-};
+struct charset_translation;
 
 enum charset_result {
 	CHARSET_RET_OK = 1,
@@ -15,8 +12,9 @@
 };
 
 /* Begin translation to UTF-8. Returns -1 if charset is unknown. */
-int charset_to_utf8_begin(const char *charset, enum charset_flags flags,
-			  struct charset_translation **t_r);
+int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer,
+			  struct charset_translation **t_r)
+	ATTR_NULL(2);
 void charset_to_utf8_end(struct charset_translation **t);
 void charset_to_utf8_reset(struct charset_translation *t);
 
@@ -30,8 +28,8 @@
 		const unsigned char *src, size_t *src_size, buffer_t *dest);
 
 /* Translate a single string to UTF8. */
-int charset_to_utf8_str(const char *charset, enum charset_flags flags,
+int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer,
 			const char *input, string_t *output,
-			enum charset_result *result_r);
+			enum charset_result *result_r) ATTR_NULL(2);
 
 #endif
--- a/src/lib-imap/imap-base-subject.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-imap/imap-base-subject.c	Sat Sep 15 03:12:20 2012 +0300
@@ -210,7 +210,7 @@
 	   UTF-8.  Convert all tabs and continuations to space.
 	   Convert all multiple spaces to a single space. */
 	message_header_decode_utf8((const unsigned char *)subject, subject_len,
-				   buf, TRUE);
+				   buf, uni_utf8_to_decomposed_titlecase);
 	buffer_append_c(buf, '\0');
 
 	pack_whitespace(buf);
--- a/src/lib-mail/message-decoder.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-mail/message-decoder.c	Sat Sep 15 03:12:20 2012 +0300
@@ -22,6 +22,7 @@
 
 struct message_decoder_context {
 	enum message_decoder_flags flags;
+	normalizer_func_t *normalizer;
 	struct message_part *prev_part;
 
 	struct message_header_line hdr;
@@ -46,12 +47,14 @@
 				 struct message_part *part);
 
 struct message_decoder_context *
-message_decoder_init(enum message_decoder_flags flags)
+message_decoder_init(normalizer_func_t *normalizer,
+		     enum message_decoder_flags flags)
 {
 	struct message_decoder_context *ctx;
 
 	ctx = i_new(struct message_decoder_context, 1);
 	ctx->flags = flags;
+	ctx->normalizer = normalizer;
 	ctx->buf = buffer_create_dynamic(default_pool, 8192);
 	ctx->buf2 = buffer_create_dynamic(default_pool, 8192);
 	ctx->encoding_buf = buffer_create_dynamic(default_pool, 128);
@@ -149,7 +152,6 @@
 				  struct message_header_line *hdr,
 				  struct message_block *output)
 {
-	bool dtcase = (ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0;
 	size_t value_len;
 
 	if (hdr->continues) {
@@ -168,12 +170,11 @@
 
 	buffer_set_used_size(ctx->buf, 0);
 	message_header_decode_utf8(hdr->full_value, hdr->full_value_len,
-				   ctx->buf, dtcase);
+				   ctx->buf, ctx->normalizer);
 	value_len = ctx->buf->used;
 
-	if (dtcase) {
-		(void)uni_utf8_to_decomposed_titlecase(hdr->name, hdr->name_len,
-						       ctx->buf);
+	if (ctx->normalizer != NULL) {
+		(void)ctx->normalizer(hdr->name, hdr->name_len, ctx->buf);
 		buffer_append_c(ctx->buf, '\0');
 	} else {
 		if (!uni_utf8_get_valid_data((const unsigned char *)hdr->name,
@@ -229,8 +230,6 @@
 message_decode_body_init_charset(struct message_decoder_context *ctx,
 				 struct message_part *part)
 {
-	enum charset_flags flags;
-
 	ctx->binary_input = ctx->content_charset == NULL &&
 		(ctx->flags & MESSAGE_DECODER_FLAG_RETURN_BINARY) != 0 &&
 		(part->flags & (MESSAGE_PART_FLAG_TEXT |
@@ -249,12 +248,10 @@
 		charset_to_utf8_end(&ctx->charset_trans);
 	i_free_and_null(ctx->charset_trans_charset);
 
-	flags = (ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0 ?
-		CHARSET_FLAG_DECOMP_TITLECASE : 0;
 	ctx->charset_trans_charset = i_strdup(ctx->content_charset != NULL ?
 					      ctx->content_charset : "UTF-8");
-	if (charset_to_utf8_begin(ctx->charset_trans_charset,
-				  flags, &ctx->charset_trans) < 0)
+	if (charset_to_utf8_begin(ctx->charset_trans_charset, ctx->normalizer,
+				  &ctx->charset_trans) < 0)
 		ctx->charset_trans = NULL;
 }
 
@@ -331,9 +328,8 @@
 		output->size = size;
 	} else if (ctx->charset_utf8) {
 		buffer_set_used_size(ctx->buf2, 0);
-		if ((ctx->flags & MESSAGE_DECODER_FLAG_DTCASE) != 0) {
-			(void)uni_utf8_to_decomposed_titlecase(data, size,
-							       ctx->buf2);
+		if (ctx->normalizer != NULL) {
+			(void)ctx->normalizer(data, size, ctx->buf2);
 			output->data = ctx->buf2->data;
 			output->size = ctx->buf2->used;
 		} else if (uni_utf8_get_valid_data(data, size, ctx->buf2)) {
--- a/src/lib-mail/message-decoder.h	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-mail/message-decoder.h	Sat Sep 15 03:12:20 2012 +0300
@@ -1,6 +1,8 @@
 #ifndef MESSAGE_DECODER_H
 #define MESSAGE_DECODER_H
 
+#include "unichar.h"
+
 struct message_header_line;
 
 enum message_cte {
@@ -12,9 +14,6 @@
 };
 
 enum message_decoder_flags {
-	/* Return all headers and parts through
-	   uni_utf8_to_decomposed_titlecase() */
-	MESSAGE_DECODER_FLAG_DTCASE		= 0x01,
 	/* Return binary MIME parts as-is without any conversion. */
 	MESSAGE_DECODER_FLAG_RETURN_BINARY	= 0x02
 };
@@ -24,7 +23,8 @@
 /* Decode message's contents as UTF-8, both the headers and the MIME bodies.
    The bodies are decoded from quoted-printable and base64 formats if needed. */
 struct message_decoder_context *
-message_decoder_init(enum message_decoder_flags flags);
+message_decoder_init(normalizer_func_t *normalizer,
+		     enum message_decoder_flags flags);
 void message_decoder_deinit(struct message_decoder_context **ctx);
 
 /* Change the MESSAGE_DECODER_FLAG_RETURN_BINARY flag */
--- a/src/lib-mail/message-header-decode.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-mail/message-header-decode.c	Sat Sep 15 03:12:20 2012 +0300
@@ -135,8 +135,8 @@
 
 struct decode_utf8_context {
 	buffer_t *dest;
+	normalizer_func_t *normalizer;
 	unsigned int changed:1;
-	unsigned int dtcase:1;
 };
 
 static bool
@@ -145,13 +145,11 @@
 {
 	struct decode_utf8_context *ctx = context;
 	struct charset_translation *t;
-	enum charset_flags flags;
 
 	if (charset == NULL || charset_is_utf8(charset)) {
 		/* ASCII / UTF-8 */
-		if (ctx->dtcase) {
-			(void)uni_utf8_to_decomposed_titlecase(data, size,
-							       ctx->dest);
+		if (ctx->normalizer != NULL) {
+			(void)ctx->normalizer(data, size, ctx->dest);
 		} else {
 			if (uni_utf8_get_valid_data(data, size, ctx->dest))
 				buffer_append(ctx->dest, data, size);
@@ -159,8 +157,7 @@
 		return TRUE;
 	}
 
-	flags = ctx->dtcase ? CHARSET_FLAG_DECOMP_TITLECASE : 0;
-	if (charset_to_utf8_begin(charset, flags, &t) < 0) {
+	if (charset_to_utf8_begin(charset, ctx->normalizer, &t) < 0) {
 		/* data probably still contains some valid ASCII characters.
 		   append them. */
 		if (uni_utf8_get_valid_data(data, size, ctx->dest))
@@ -175,12 +172,12 @@
 }
 
 void message_header_decode_utf8(const unsigned char *data, size_t size,
-				buffer_t *dest, bool dtcase)
+				buffer_t *dest, normalizer_func_t *normalizer)
 {
 	struct decode_utf8_context ctx;
 
 	memset(&ctx, 0, sizeof(ctx));
 	ctx.dest = dest;
-	ctx.dtcase = dtcase;
+	ctx.normalizer = normalizer;
 	message_header_decode(data, size, decode_utf8_callback, &ctx);
 }
--- a/src/lib-mail/message-header-decode.h	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-mail/message-header-decode.h	Sat Sep 15 03:12:20 2012 +0300
@@ -1,6 +1,8 @@
 #ifndef MESSAGE_HEADER_DECODE_H
 #define MESSAGE_HEADER_DECODE_H
 
+#include "unichar.h"
+
 /* Return FALSE if you wish to stop decoding. charset is NULL when it's not
    RFC2047-encoded. */
 typedef bool message_header_decode_callback_t(const unsigned char *data,
@@ -13,9 +15,8 @@
 			   message_header_decode_callback_t *callback,
 			   void *context);
 
-/* Append decoded RFC2047 header as UTF-8 to given buffer. If dtcase=TRUE,
-   the header is appended through uni_utf8_to_decomposed_titlecase(). */
+/* Append decoded RFC2047 header as UTF-8 to given buffer. */
 void message_header_decode_utf8(const unsigned char *data, size_t size,
-				buffer_t *dest, bool dtcase);
+				buffer_t *dest, normalizer_func_t *normalizer);
 
 #endif
--- a/src/lib-mail/message-search.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-mail/message-search.c	Sat Sep 15 03:12:20 2012 +0300
@@ -12,6 +12,8 @@
 
 struct message_search_context {
 	enum message_search_flags flags;
+	normalizer_func_t *normalizer;
+
 	struct str_find_context *str_find_ctx;
 	struct message_part *prev_part;
 
@@ -20,21 +22,18 @@
 };
 
 struct message_search_context *
-message_search_init(const char *key_utf8,
+message_search_init(const char *normalized_key_utf8,
+		    normalizer_func_t *normalizer,
 		    enum message_search_flags flags)
 {
-	enum message_decoder_flags decoder_flags = 0;
 	struct message_search_context *ctx;
 
-	i_assert(*key_utf8 != '\0');
-
-	if ((flags & MESSAGE_SEARCH_FLAG_DTCASE) != 0)
-		decoder_flags |= MESSAGE_DECODER_FLAG_DTCASE;
+	i_assert(*normalized_key_utf8 != '\0');
 
 	ctx = i_new(struct message_search_context, 1);
 	ctx->flags = flags;
-	ctx->decoder = message_decoder_init(decoder_flags);
-	ctx->str_find_ctx = str_find_init(default_pool, key_utf8);
+	ctx->decoder = message_decoder_init(normalizer, 0);
+	ctx->str_find_ctx = str_find_init(default_pool, normalized_key_utf8);
 	return ctx;
 }
 
--- a/src/lib-mail/message-search.h	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-mail/message-search.h	Sat Sep 15 03:12:20 2012 +0300
@@ -7,15 +7,13 @@
 
 enum message_search_flags {
 	/* Skip the main header and all the MIME headers. */
-	MESSAGE_SEARCH_FLAG_SKIP_HEADERS	= 0x01,
-	/* Search with decomposed titlecase (instead of exact case matching).
-	   The search key must be given with dtcase also. */
-	MESSAGE_SEARCH_FLAG_DTCASE		= 0x02
+	MESSAGE_SEARCH_FLAG_SKIP_HEADERS	= 0x01
 };
 
 /* The key must be given in UTF-8 charset */
 struct message_search_context *
-message_search_init(const char *key_utf8,
+message_search_init(const char *normalized_key_utf8,
+		    normalizer_func_t *normalizer,
 		    enum message_search_flags flags);
 void message_search_deinit(struct message_search_context **ctx);
 
--- a/src/lib-mail/test-message-decoder.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-mail/test-message-decoder.c	Sat Sep 15 03:12:20 2012 +0300
@@ -10,7 +10,8 @@
 #include "test-common.h"
 
 void message_header_decode_utf8(const unsigned char *data, size_t size,
-				buffer_t *dest, bool dtcase ATTR_UNUSED)
+				buffer_t *dest,
+				normalizer_func_t *normalizer ATTR_UNUSED)
 {
 	buffer_append(dest, data, size);
 }
@@ -25,7 +26,7 @@
 }
 
 int charset_to_utf8_begin(const char *charset ATTR_UNUSED,
-			  enum charset_flags flags ATTR_UNUSED,
+			  normalizer_func_t *normalizer ATTR_UNUSED,
 			  struct charset_translation **t_r)
 {
 	*t_r = NULL;
@@ -56,7 +57,7 @@
 	memset(&output, 0, sizeof(output));
 	input.part = &part;
 
-	ctx = message_decoder_init(0);
+	ctx = message_decoder_init(NULL, 0);
 
 	memset(&hdr, 0, sizeof(hdr));
 	hdr.name = "Content-Transfer-Encoding";
--- a/src/lib-mail/test-message-header-decode.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-mail/test-message-header-decode.c	Sat Sep 15 03:12:20 2012 +0300
@@ -10,7 +10,7 @@
 bool charset_is_utf8(const char *charset ATTR_UNUSED) { return TRUE; }
 
 int charset_to_utf8_begin(const char *charset ATTR_UNUSED,
-			  enum charset_flags flags ATTR_UNUSED,
+			  normalizer_func_t *normalizer ATTR_UNUSED,
 			  struct charset_translation **t_r ATTR_UNUSED) { return 0; }
 void charset_to_utf8_end(struct charset_translation **t ATTR_UNUSED) {}
 
--- a/src/lib-storage/index/index-search.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-storage/index/index-search.c	Sat Sep 15 03:12:20 2012 +0300
@@ -38,6 +38,7 @@
 #define SEARCH_RECALC_MIN_USECS 50000
 
 struct search_header_context {
+        struct index_search_context *index_ctx;
         struct index_mail *imail;
 	struct mail_search_arg *args;
 
@@ -396,16 +397,16 @@
 }
 
 static struct message_search_context *
-msg_search_arg_context(struct mail_search_arg *arg)
+msg_search_arg_context(struct index_search_context *ctx,
+		       struct mail_search_arg *arg)
 {
-	enum message_search_flags flags = MESSAGE_SEARCH_FLAG_DTCASE;
+	enum message_search_flags flags = 0;
 
 	if (arg->context == NULL) T_BEGIN {
 		string_t *dtc = t_str_new(128);
 
-		if (uni_utf8_to_decomposed_titlecase(arg->value.str,
-						     strlen(arg->value.str),
-						     dtc) < 0)
+		if (ctx->mail_ctx.normalizer(arg->value.str,
+					     strlen(arg->value.str), dtc) < 0)
 			i_panic("search key not utf8: %s", arg->value.str);
 
 		if (arg->type == SEARCH_BODY)
@@ -413,8 +414,12 @@
 		/* we don't get here if arg is "", but dtc can be "" if it
 		   only contains characters that we need to ignore. handle
 		   those searches by returning them as non-matched. */
-		if (str_len(dtc) > 0)
-			arg->context = message_search_init(str_c(dtc), flags);
+		if (str_len(dtc) > 0) {
+			arg->context =
+				message_search_init(str_c(dtc),
+						    ctx->mail_ctx.normalizer,
+						    flags);
+		}
 	} T_END;
 	return arg->context;
 }
@@ -499,7 +504,7 @@
 	hdr.middle_len = 0;
 	block.hdr = &hdr;
 
-	msg_search_ctx = msg_search_arg_context(arg);
+	msg_search_ctx = msg_search_arg_context(ctx->index_ctx, arg);
 	if (msg_search_ctx == NULL)
 		return;
 
@@ -604,7 +609,7 @@
 		return;
 	}
 
-	msg_search_ctx = msg_search_arg_context(arg);
+	msg_search_ctx = msg_search_arg_context(ctx->index_ctx, arg);
 	if (msg_search_ctx == NULL) {
 		ARG_SET_RESULT(arg, 0);
 		return;
@@ -645,6 +650,7 @@
 		return -1;
 
 	memset(&hdr_ctx, 0, sizeof(hdr_ctx));
+	hdr_ctx.index_ctx = ctx;
 	/* hdr_ctx.imail is different from imail for mails in
 	   virtual mailboxes */
 	hdr_ctx.imail = (struct index_mail *)mail_get_real_mail(ctx->cur_mail);
@@ -1150,6 +1156,7 @@
 
 	ctx = i_new(struct index_search_context, 1);
 	ctx->mail_ctx.transaction = t;
+	ctx->mail_ctx.normalizer = t->box->storage->user->default_normalizer;
 	ctx->box = t->box;
 	ctx->view = t->view;
 	ctx->mail_ctx.args = args;
--- a/src/lib-storage/mail-storage-private.h	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-storage/mail-storage-private.h	Sat Sep 15 03:12:20 2012 +0300
@@ -2,6 +2,7 @@
 #define MAIL_STORAGE_PRIVATE_H
 
 #include "module-context.h"
+#include "unichar.h"
 #include "file-lock.h"
 #include "mail-storage.h"
 #include "mail-storage-hooks.h"
@@ -449,6 +450,7 @@
 	struct mail_search_sort_program *sort_program;
 	enum mail_fetch_field wanted_fields;
 	struct mailbox_header_lookup_ctx *wanted_headers;
+	normalizer_func_t *normalizer;
 
 	/* if non-NULL, specifies that a search resulting is being updated.
 	   this can be used as a search optimization: if searched message
--- a/src/lib-storage/mail-user.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-storage/mail-user.c	Sat Sep 15 03:12:20 2012 +0300
@@ -52,6 +52,7 @@
 	user->unexpanded_set = settings_dup(set_info, set, pool);
 	user->set = settings_dup(set_info, set, pool);
 	user->service = master_service_get_name(master_service);
+	user->default_normalizer = uni_utf8_to_decomposed_titlecase;
 
 	/* check settings so that the duplicated structure will again
 	   contain the parsed fields */
--- a/src/lib-storage/mail-user.h	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-storage/mail-user.h	Sat Sep 15 03:12:20 2012 +0300
@@ -1,6 +1,7 @@
 #ifndef MAIL_USER_H
 #define MAIL_USER_H
 
+#include "unichar.h"
 #include "mail-storage-settings.h"
 
 struct module;
@@ -38,6 +39,7 @@
 	ARRAY(const struct mail_storage_hooks *) hooks;
 
 	struct mountpoint_list *mountpoints;
+	normalizer_func_t *default_normalizer;
 
 	/* Module-specific contexts. See mail_storage_module_id. */
 	ARRAY(union mail_user_module_context *) module_contexts;
--- a/src/lib/unichar.h	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib/unichar.h	Sat Sep 15 03:12:20 2012 +0300
@@ -27,6 +27,12 @@
 typedef uint32_t unichar_t;
 ARRAY_DEFINE_TYPE(unichars, unichar_t);
 
+/* Normalize UTF8 input and append it to output buffer.
+   Returns 0 if ok, -1 if input was invalid. Even if input was invalid,
+   as much as possible should be added to output. */
+typedef int normalizer_func_t(const void *input, size_t size,
+			      buffer_t *output);
+
 extern const unsigned char utf8_replacement_char[UTF8_REPLACEMENT_CHAR_LEN];
 extern const uint8_t *const uni_utf8_non1_bytes;
 
--- a/src/plugins/fts-squat/fts-backend-squat.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/plugins/fts-squat/fts-backend-squat.c	Sat Sep 15 03:12:20 2012 +0300
@@ -374,8 +374,8 @@
 	i_array_init(&tmp_maybe_uids, 128);
 
 	dtc = t_str_new(128);
-	if (uni_utf8_to_decomposed_titlecase(arg->value.str,
-					     strlen(arg->value.str), dtc) < 0)
+	if (backend->backend.ns->user->
+	    default_normalizer(arg->value.str, strlen(arg->value.str), dtc) < 0)
 		i_panic("squat: search key not utf8");
 
 	ret = squat_trie_lookup(backend->trie, str_c(dtc), squat_type,
@@ -462,7 +462,7 @@
 
 struct fts_backend fts_backend_squat = {
 	.name = "squat",
-	.flags = FTS_BACKEND_FLAG_BUILD_DTCASE,
+	.flags = FTS_BACKEND_FLAG_NORMALIZE_INPUT,
 
 	{
 		fts_backend_squat_alloc,
--- a/src/plugins/fts/fts-api-private.h	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/plugins/fts/fts-api-private.h	Sat Sep 15 03:12:20 2012 +0300
@@ -1,6 +1,7 @@
 #ifndef FTS_API_PRIVATE_H
 #define FTS_API_PRIVATE_H
 
+#include "unichar.h"
 #include "fts-api.h"
 
 struct mail_user;
@@ -53,9 +54,9 @@
 enum fts_backend_flags {
 	/* Backend supports indexing binary MIME parts */
 	FTS_BACKEND_FLAG_BINARY_MIME_PARTS	= 0x01,
-	/* Send built text to backend as decomposed titlecase rather than
+	/* Send built text to backend normalized rather than
 	   preserving original case */
-	FTS_BACKEND_FLAG_BUILD_DTCASE		= 0x02,
+	FTS_BACKEND_FLAG_NORMALIZE_INPUT	= 0x02,
 	/* Send only fully indexable words rather than randomly sized blocks */
 	FTS_BACKEND_FLAG_BUILD_FULL_WORDS	= 0x04,
 	/* Fuzzy search works */
@@ -74,6 +75,7 @@
 
 struct fts_backend_update_context {
 	struct fts_backend *backend;
+	normalizer_func_t *normalizer;
 
 	struct mailbox *cur_box, *backend_box;
 
--- a/src/plugins/fts/fts-api.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/plugins/fts/fts-api.c	Sat Sep 15 03:12:20 2012 +0300
@@ -110,10 +110,15 @@
 struct fts_backend_update_context *
 fts_backend_update_init(struct fts_backend *backend)
 {
+	struct fts_backend_update_context *ctx;
+
 	i_assert(!backend->updating);
 
 	backend->updating = TRUE;
-	return backend->v.update_init(backend);
+	ctx = backend->v.update_init(backend);
+	if ((backend->flags & FTS_BACKEND_FLAG_NORMALIZE_INPUT) != 0)
+		ctx->normalizer = backend->ns->user->default_normalizer;
+	return ctx;
 }
 
 static void fts_backend_set_cur_mailbox(struct fts_backend_update_context *ctx)
--- a/src/plugins/fts/fts-build-mail.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/plugins/fts/fts-build-mail.c	Sat Sep 15 03:12:20 2012 +0300
@@ -267,7 +267,6 @@
 		    struct mail *mail)
 {
 	struct fts_mail_build_context ctx;
-	enum message_decoder_flags decoder_flags = 0;
 	struct istream *input;
 	struct message_parser_ctx *parser;
 	struct message_decoder_context *decoder;
@@ -289,9 +288,7 @@
 				     MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE,
 				     0);
 
-	if ((update_ctx->backend->flags & FTS_BACKEND_FLAG_BUILD_DTCASE) != 0)
-		decoder_flags |= MESSAGE_DECODER_FLAG_DTCASE;
-	decoder = message_decoder_init(decoder_flags);
+	decoder = message_decoder_init(update_ctx->normalizer, 0);
 	for (;;) {
 		ret = message_parser_parse_next_block(parser, &raw_block);
 		i_assert(ret != 0);