diff src/lib-charset/charset-utf8.c @ 15053:c976a9c01613

Replaced "decomposed titlecase" conversions with more generic normalizer function. Plugins can now change mail_user.default_normalizer. Specific searches can also use different normalizers by changing mail_search_context.normalizer.
author Timo Sirainen <tss@iki.fi>
date Sat, 15 Sep 2012 03:12:20 +0300
parents ba770cba5598
children 90710c6c3beb
line wrap: on
line diff
--- a/src/lib-charset/charset-utf8.c	Sat Sep 15 03:09:57 2012 +0300
+++ b/src/lib-charset/charset-utf8.c	Sat Sep 15 03:12:20 2012 +0300
@@ -16,14 +16,14 @@
 		strcasecmp(charset, "UTF8") == 0;
 }
 
-int charset_to_utf8_str(const char *charset, enum charset_flags flags,
+int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer,
 			const char *input, string_t *output,
 			enum charset_result *result_r)
 {
 	struct charset_translation *t;
 	size_t len = strlen(input);
 
-	if (charset_to_utf8_begin(charset, flags, &t) < 0)
+	if (charset_to_utf8_begin(charset, normalizer, &t) < 0)
 		return -1;
 
 	*result_r = charset_to_utf8(t, (const unsigned char *)input,
@@ -35,31 +35,31 @@
 #ifndef HAVE_ICONV
 
 struct charset_translation {
-	enum charset_flags flags;
+	normalizer_func_t *normalizer;
 };
 
-static struct charset_translation raw_translation = { 0 };
-static struct charset_translation tc_translation = {
-	CHARSET_FLAG_DECOMP_TITLECASE
-};
-
-int charset_to_utf8_begin(const char *charset, enum charset_flags flags,
+int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer,
 			  struct charset_translation **t_r)
 {
-	if (charset_is_utf8(charset)) {
-		if ((flags & CHARSET_FLAG_DECOMP_TITLECASE) != 0)
-			*t_r = &tc_translation;
-		else
-			*t_r = &raw_translation;
-		return 0;
+	struct charset_translation *t;
+
+	if (!charset_is_utf8(charset)) {
+		/* no support for charsets that need translation */
+		return -1;
 	}
 
-	/* no support for charsets that need translation */
-	return -1;
+	t = i_new(struct charset_translation, 1);
+	t->normalizer = normalizer;
+	*t_r = t;
+	return 0;
 }
 
-void charset_to_utf8_end(struct charset_translation **t ATTR_UNUSED)
+void charset_to_utf8_end(struct charset_translation **_t)
 {
+	struct charset_translation *t = *_t;
+
+	*_t = NULL;
+	i_free(t);
 }
 
 void charset_to_utf8_reset(struct charset_translation *t ATTR_UNUSED)
@@ -70,11 +70,13 @@
 charset_to_utf8(struct charset_translation *t,
 		const unsigned char *src, size_t *src_size, buffer_t *dest)
 {
-	if ((t->flags & CHARSET_FLAG_DECOMP_TITLECASE) == 0)
+	if (t->normalizer != NULL) {
+		if (t->normalizer(src, *src_size, dest) < 0)
+			return CHARSET_RET_INVALID_INPUT;
+	} else if (!uni_utf8_get_valid_data(src, *src_size, dest)) {
+		return CHARSET_RET_INVALID_INPUT;
+	} else {
 		buffer_append(dest, src, *src_size);
-	else {
-		if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
-			return CHARSET_RET_INVALID_INPUT;
 	}
 	return CHARSET_RET_OK;
 }