changeset 18144:7459c0891a85

lib-charset: UTF-8 -> UTF-8 translation was never returning CHARSET_RET_INCOMPLETE_INPUT Instead the incomplete input was just being modified into broken output.
author Timo Sirainen <tss@iki.fi>
date Sat, 10 Jan 2015 04:25:21 +0200
parents 55184e2a689f
children f191dbcaec5f
files src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.c src/lib-charset/charset-utf8.h
diffstat 3 files changed, 31 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-charset/charset-iconv.c	Thu Jan 08 23:07:54 2015 +0200
+++ b/src/lib-charset/charset-iconv.c	Sat Jan 10 04:25:21 2015 +0200
@@ -53,20 +53,6 @@
 		(void)iconv(t->cd, NULL, NULL, NULL, NULL);
 }
 
-static int
-charset_append_utf8(struct charset_translation *t,
-		    const void *src, size_t src_size, buffer_t *dest)
-{
-	if (t->normalizer != NULL)
-		return t->normalizer(src, src_size, dest);
-	else if (!uni_utf8_get_valid_data(src, src_size, dest))
-		return -1;
-	else {
-		buffer_append(dest, src, src_size);
-		return 0;
-	}
-}
-
 static bool
 charset_to_utf8_try(struct charset_translation *t,
 		    const unsigned char *src, size_t *src_size, buffer_t *dest,
@@ -74,15 +60,12 @@
 {
 	ICONV_CONST char *ic_srcbuf;
 	char tmpbuf[8192], *ic_destbuf;
-	size_t srcleft, destleft;
+	size_t srcleft, destleft, tmpbuf_used;
 	bool ret = TRUE;
 
 	if (t->cd == (iconv_t)-1) {
 		/* input is already supposed to be UTF-8 */
-		if (charset_append_utf8(t, src, *src_size, dest) < 0)
-			*result = CHARSET_RET_INVALID_INPUT;
-		else
-			*result = CHARSET_RET_OK;
+		*result = charset_utf8_to_utf8(t->normalizer, src, src_size, dest);
 		return TRUE;
 	}
 	destleft = sizeof(tmpbuf);
@@ -109,8 +92,9 @@
 	/* we just converted data to UTF-8. it shouldn't be invalid, but
 	   Solaris iconv appears to pass invalid data through sometimes
 	   (e.g. 8 bit characters with UTF-7) */
-	if (charset_append_utf8(t, tmpbuf, sizeof(tmpbuf) - destleft,
-				dest) < 0)
+	tmpbuf_used = sizeof(tmpbuf) - destleft;
+	if (charset_utf8_to_utf8(t->normalizer, (void *)tmpbuf,
+				 &tmpbuf_used, dest) != CHARSET_RET_OK)
 		*result = CHARSET_RET_INVALID_INPUT;
 	return ret;
 }
--- a/src/lib-charset/charset-utf8.c	Thu Jan 08 23:07:54 2015 +0200
+++ b/src/lib-charset/charset-utf8.c	Sat Jan 10 04:25:21 2015 +0200
@@ -70,15 +70,31 @@
 charset_to_utf8(struct charset_translation *t,
 		const unsigned char *src, size_t *src_size, buffer_t *dest)
 {
-	if (t->normalizer != NULL) {
-		if (t->normalizer(src, *src_size, dest) < 0)
+	return charset_utf8_to_utf8(t->normalizer, src, src_size, dest);
+}
+
+#endif
+
+enum charset_result
+charset_utf8_to_utf8(normalizer_func_t *normalizer,
+		     const unsigned char *src, size_t *src_size, buffer_t *dest)
+{
+	enum charset_result res = CHARSET_RET_OK;
+	size_t pos;
+
+	uni_utf8_partial_strlen_n(src, *src_size, &pos);
+	if (pos < *src_size) {
+		*src_size = pos;
+		res = CHARSET_RET_INCOMPLETE_INPUT;
+	}
+
+	if (normalizer != NULL) {
+		if (normalizer(src, *src_size, dest) < 0)
 			return CHARSET_RET_INVALID_INPUT;
 	} else if (!uni_utf8_get_valid_data(src, *src_size, dest)) {
 		return CHARSET_RET_INVALID_INPUT;
 	} else {
 		buffer_append(dest, src, *src_size);
 	}
-	return CHARSET_RET_OK;
+	return res;
 }
-
-#endif
--- a/src/lib-charset/charset-utf8.h	Thu Jan 08 23:07:54 2015 +0200
+++ b/src/lib-charset/charset-utf8.h	Sat Jan 10 04:25:21 2015 +0200
@@ -32,4 +32,9 @@
 			const char *input, string_t *output,
 			enum charset_result *result_r) ATTR_NULL(2);
 
+/* INTERNAL: */
+enum charset_result
+charset_utf8_to_utf8(normalizer_func_t *normalizer,
+		     const unsigned char *src, size_t *src_size, buffer_t *dest);
+
 #endif