changeset 18150:3d9ec121dc81

lib-charset: Added CHARSET_MAX_PENDING_BUF_SIZE macro and asserts for it.
author Timo Sirainen <tss@iki.fi>
date Thu, 15 Jan 2015 01:05:36 +0200
parents 0e74934072e0
children e49a2e800650
files src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.c src/lib-charset/charset-utf8.h
diffstat 3 files changed, 13 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-charset/charset-iconv.c	Thu Jan 15 01:05:13 2015 +0200
+++ b/src/lib-charset/charset-iconv.c	Thu Jan 15 01:05:36 2015 +0200
@@ -129,6 +129,7 @@
 	if (prev_invalid_pos != (size_t)-1)
 		result = CHARSET_RET_INVALID_INPUT;
 
+	i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE);
 	*src_size = pos;
 	return result;
 }
--- a/src/lib-charset/charset-utf8.c	Thu Jan 15 01:05:13 2015 +0200
+++ b/src/lib-charset/charset-utf8.c	Thu Jan 15 01:05:36 2015 +0200
@@ -94,6 +94,7 @@
 
 	uni_utf8_partial_strlen_n(src, *src_size, &pos);
 	if (pos < *src_size) {
+		i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE);
 		*src_size = pos;
 		res = CHARSET_RET_INCOMPLETE_INPUT;
 	}
--- a/src/lib-charset/charset-utf8.h	Thu Jan 15 01:05:13 2015 +0200
+++ b/src/lib-charset/charset-utf8.h	Thu Jan 15 01:05:36 2015 +0200
@@ -3,6 +3,11 @@
 
 #include "unichar.h"
 
+/* Max number of bytes that iconv can require for a single character.
+   UTF-8 takes max 6 bytes per character. Not sure about others, but I'd think
+   10 is more than enough for everyone.. */
+#define CHARSET_MAX_PENDING_BUF_SIZE 10
+
 struct charset_translation;
 
 enum charset_result {
@@ -25,7 +30,12 @@
 bool charset_is_utf8(const char *charset) ATTR_PURE;
 
 /* Translate src to UTF-8. src_size is updated to contain the number of
-   characters actually translated from src. */
+   characters actually translated from src. The src_size should never shrink
+   more than CHARSET_MAX_PENDING_BUF_SIZE bytes.
+
+   If src contains invalid input, UNICODE_REPLACEMENT_CHAR is placed in such
+   positions and the invalid input is skipped over. Return value is also
+   CHARSET_RET_INCOMPLETE_INPUT in that case. */
 enum charset_result
 charset_to_utf8(struct charset_translation *t,
 		const unsigned char *src, size_t *src_size, buffer_t *dest);