Mercurial > dovecot > core-2.2
changeset 18144:7459c0891a85
lib-charset: UTF-8 -> UTF-8 translation was never returning CHARSET_RET_INCOMPLETE_INPUT
Instead the incomplete input was just being modified into broken output.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 10 Jan 2015 04:25:21 +0200 |
parents | 55184e2a689f |
children | f191dbcaec5f |
files | src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.c src/lib-charset/charset-utf8.h |
diffstat | 3 files changed, 31 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-charset/charset-iconv.c Thu Jan 08 23:07:54 2015 +0200 +++ b/src/lib-charset/charset-iconv.c Sat Jan 10 04:25:21 2015 +0200 @@ -53,20 +53,6 @@ (void)iconv(t->cd, NULL, NULL, NULL, NULL); } -static int -charset_append_utf8(struct charset_translation *t, - const void *src, size_t src_size, buffer_t *dest) -{ - if (t->normalizer != NULL) - return t->normalizer(src, src_size, dest); - else if (!uni_utf8_get_valid_data(src, src_size, dest)) - return -1; - else { - buffer_append(dest, src, src_size); - return 0; - } -} - static bool charset_to_utf8_try(struct charset_translation *t, const unsigned char *src, size_t *src_size, buffer_t *dest, @@ -74,15 +60,12 @@ { ICONV_CONST char *ic_srcbuf; char tmpbuf[8192], *ic_destbuf; - size_t srcleft, destleft; + size_t srcleft, destleft, tmpbuf_used; bool ret = TRUE; if (t->cd == (iconv_t)-1) { /* input is already supposed to be UTF-8 */ - if (charset_append_utf8(t, src, *src_size, dest) < 0) - *result = CHARSET_RET_INVALID_INPUT; - else - *result = CHARSET_RET_OK; + *result = charset_utf8_to_utf8(t->normalizer, src, src_size, dest); return TRUE; } destleft = sizeof(tmpbuf); @@ -109,8 +92,9 @@ /* we just converted data to UTF-8. it shouldn't be invalid, but Solaris iconv appears to pass invalid data through sometimes (e.g. 8 bit characters with UTF-7) */ - if (charset_append_utf8(t, tmpbuf, sizeof(tmpbuf) - destleft, - dest) < 0) + tmpbuf_used = sizeof(tmpbuf) - destleft; + if (charset_utf8_to_utf8(t->normalizer, (void *)tmpbuf, + &tmpbuf_used, dest) != CHARSET_RET_OK) *result = CHARSET_RET_INVALID_INPUT; return ret; }
--- a/src/lib-charset/charset-utf8.c Thu Jan 08 23:07:54 2015 +0200 +++ b/src/lib-charset/charset-utf8.c Sat Jan 10 04:25:21 2015 +0200 @@ -70,15 +70,31 @@ charset_to_utf8(struct charset_translation *t, const unsigned char *src, size_t *src_size, buffer_t *dest) { - if (t->normalizer != NULL) { - if (t->normalizer(src, *src_size, dest) < 0) + return charset_utf8_to_utf8(t->normalizer, src, src_size, dest); +} + +#endif + +enum charset_result +charset_utf8_to_utf8(normalizer_func_t *normalizer, + const unsigned char *src, size_t *src_size, buffer_t *dest) +{ + enum charset_result res = CHARSET_RET_OK; + size_t pos; + + uni_utf8_partial_strlen_n(src, *src_size, &pos); + if (pos < *src_size) { + *src_size = pos; + res = CHARSET_RET_INCOMPLETE_INPUT; + } + + if (normalizer != NULL) { + if (normalizer(src, *src_size, dest) < 0) return CHARSET_RET_INVALID_INPUT; } else if (!uni_utf8_get_valid_data(src, *src_size, dest)) { return CHARSET_RET_INVALID_INPUT; } else { buffer_append(dest, src, *src_size); } - return CHARSET_RET_OK; + return res; } - -#endif
--- a/src/lib-charset/charset-utf8.h Thu Jan 08 23:07:54 2015 +0200 +++ b/src/lib-charset/charset-utf8.h Sat Jan 10 04:25:21 2015 +0200 @@ -32,4 +32,9 @@ const char *input, string_t *output, enum charset_result *result_r) ATTR_NULL(2); +/* INTERNAL: */ +enum charset_result +charset_utf8_to_utf8(normalizer_func_t *normalizer, + const unsigned char *src, size_t *src_size, buffer_t *dest); + #endif