Mercurial > dovecot > core-2.2
diff src/lib-charset/charset-utf8.c @ 15053:c976a9c01613
Replaced "decomposed titlecase" conversions with more generic normalizer function.
Plugins can now change mail_user.default_normalizer. Specific searches can
also use different normalizers by changing mail_search_context.normalizer.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 15 Sep 2012 03:12:20 +0300 |
parents | ba770cba5598 |
children | 90710c6c3beb |
line wrap: on
line diff
--- a/src/lib-charset/charset-utf8.c Sat Sep 15 03:09:57 2012 +0300 +++ b/src/lib-charset/charset-utf8.c Sat Sep 15 03:12:20 2012 +0300 @@ -16,14 +16,14 @@ strcasecmp(charset, "UTF8") == 0; } -int charset_to_utf8_str(const char *charset, enum charset_flags flags, +int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer, const char *input, string_t *output, enum charset_result *result_r) { struct charset_translation *t; size_t len = strlen(input); - if (charset_to_utf8_begin(charset, flags, &t) < 0) + if (charset_to_utf8_begin(charset, normalizer, &t) < 0) return -1; *result_r = charset_to_utf8(t, (const unsigned char *)input, @@ -35,31 +35,31 @@ #ifndef HAVE_ICONV struct charset_translation { - enum charset_flags flags; + normalizer_func_t *normalizer; }; -static struct charset_translation raw_translation = { 0 }; -static struct charset_translation tc_translation = { - CHARSET_FLAG_DECOMP_TITLECASE -}; - -int charset_to_utf8_begin(const char *charset, enum charset_flags flags, +int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer, struct charset_translation **t_r) { - if (charset_is_utf8(charset)) { - if ((flags & CHARSET_FLAG_DECOMP_TITLECASE) != 0) - *t_r = &tc_translation; - else - *t_r = &raw_translation; - return 0; + struct charset_translation *t; + + if (!charset_is_utf8(charset)) { + /* no support for charsets that need translation */ + return -1; } - /* no support for charsets that need translation */ - return -1; + t = i_new(struct charset_translation, 1); + t->normalizer = normalizer; + *t_r = t; + return 0; } -void charset_to_utf8_end(struct charset_translation **t ATTR_UNUSED) +void charset_to_utf8_end(struct charset_translation **_t) { + struct charset_translation *t = *_t; + + *_t = NULL; + i_free(t); } void charset_to_utf8_reset(struct charset_translation *t ATTR_UNUSED) @@ -70,11 +70,13 @@ charset_to_utf8(struct charset_translation *t, const unsigned char *src, size_t *src_size, buffer_t *dest) { - if ((t->flags & CHARSET_FLAG_DECOMP_TITLECASE) == 0) + if (t->normalizer != NULL) { + if (t->normalizer(src, *src_size, dest) < 0) + return CHARSET_RET_INVALID_INPUT; + } else if (!uni_utf8_get_valid_data(src, *src_size, dest)) { + return CHARSET_RET_INVALID_INPUT; + } else { buffer_append(dest, src, *src_size); - else { - if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0) - return CHARSET_RET_INVALID_INPUT; } return CHARSET_RET_OK; }