Mercurial > dovecot > core-2.2
changeset 18150:3d9ec121dc81
lib-charset: Added CHARSET_MAX_PENDING_BUF_SIZE macro and asserts for it.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Thu, 15 Jan 2015 01:05:36 +0200 |
parents | 0e74934072e0 |
children | e49a2e800650 |
files | src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.c src/lib-charset/charset-utf8.h |
diffstat | 3 files changed, 13 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-charset/charset-iconv.c Thu Jan 15 01:05:13 2015 +0200 +++ b/src/lib-charset/charset-iconv.c Thu Jan 15 01:05:36 2015 +0200 @@ -129,6 +129,7 @@ if (prev_invalid_pos != (size_t)-1) result = CHARSET_RET_INVALID_INPUT; + i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE); *src_size = pos; return result; }
--- a/src/lib-charset/charset-utf8.c Thu Jan 15 01:05:13 2015 +0200 +++ b/src/lib-charset/charset-utf8.c Thu Jan 15 01:05:36 2015 +0200 @@ -94,6 +94,7 @@ uni_utf8_partial_strlen_n(src, *src_size, &pos); if (pos < *src_size) { + i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE); *src_size = pos; res = CHARSET_RET_INCOMPLETE_INPUT; }
--- a/src/lib-charset/charset-utf8.h Thu Jan 15 01:05:13 2015 +0200 +++ b/src/lib-charset/charset-utf8.h Thu Jan 15 01:05:36 2015 +0200 @@ -3,6 +3,11 @@ #include "unichar.h" +/* Max number of bytes that iconv can require for a single character. + UTF-8 takes max 6 bytes per character. Not sure about others, but I'd think + 10 is more than enough for everyone.. */ +#define CHARSET_MAX_PENDING_BUF_SIZE 10 + struct charset_translation; enum charset_result { @@ -25,7 +30,12 @@ bool charset_is_utf8(const char *charset) ATTR_PURE; /* Translate src to UTF-8. src_size is updated to contain the number of - characters actually translated from src. */ + characters actually translated from src. The src_size should never shrink + more than CHARSET_MAX_PENDING_BUF_SIZE bytes. + + If src contains invalid input, UNICODE_REPLACEMENT_CHAR is placed in such + positions and the invalid input is skipped over. Return value is also + CHARSET_RET_INCOMPLETE_INPUT in that case. */ enum charset_result charset_to_utf8(struct charset_translation *t, const unsigned char *src, size_t *src_size, buffer_t *dest);