changeset 4605:e6cb9f75b76a HEAD

Added charset_is_utf8() and charset_to_ucase_utf8_full().
author Timo Sirainen <tss@iki.fi>
date Sat, 16 Sep 2006 16:50:21 +0300
parents 2415f43b1dcf
children baaf62ced3d0
files src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.c src/lib-charset/charset-utf8.h
diffstat 3 files changed, 47 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-charset/charset-iconv.c	Sat Sep 16 16:48:05 2006 +0300
+++ b/src/lib-charset/charset-iconv.c	Sat Sep 16 16:50:21 2006 +0300
@@ -113,6 +113,32 @@
 	return ret;
 }
 
+enum charset_result
+charset_to_ucase_utf8_full(struct charset_translation *t,
+			   const unsigned char *src, size_t *src_size,
+			   buffer_t *dest)
+{
+	enum charset_result ret;
+	size_t pos, used, size;
+
+	for (pos = 0;;) {
+		size = *src_size - pos;
+		ret = charset_to_ucase_utf8(t, src + pos, &size, dest);
+		pos += size;
+
+		if (ret != CHARSET_RET_OUTPUT_FULL) {
+			*src_size = pos;
+			return ret;
+		}
+
+		/* force buffer to grow */
+		used = dest->used;
+		size = buffer_get_size(dest) - used + 1;
+		(void)buffer_append_space_unsafe(dest, size);
+		buffer_set_used_size(dest, used);
+	}
+}
+
 static const char *
 charset_to_utf8_string_int(const char *charset, bool *unknown_charset,
 			   const unsigned char *data, size_t size,
@@ -123,10 +149,7 @@
 	char *outbuf, *outpos;
 	size_t inleft, outleft, outsize, pos;
 
-	if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
-	    strcasecmp(charset, "ascii") == 0 ||
-	    strcasecmp(charset, "UTF-8") == 0 ||
-	    strcasecmp(charset, "UTF8") == 0) {
+	if (charset == NULL || charset_is_utf8(charset)) {
 		if (unknown_charset != NULL)
 			*unknown_charset = FALSE;
 
--- a/src/lib-charset/charset-utf8.c	Sat Sep 16 16:48:05 2006 +0300
+++ b/src/lib-charset/charset-utf8.c	Sat Sep 16 16:50:21 2006 +0300
@@ -6,6 +6,14 @@
 
 #include <ctype.h>
 
+bool charset_is_utf8(const char *charset)
+{
+	return strcasecmp(charset, "us-ascii") == 0 ||
+		strcasecmp(charset, "ascii") == 0 ||
+		strcasecmp(charset, "UTF-8") == 0 ||
+		strcasecmp(charset, "UTF8") == 0;
+}
+
 void _charset_utf8_ucase(const unsigned char *src, size_t src_size,
 			 buffer_t *dest, size_t destpos)
 {
@@ -112,10 +120,7 @@
 			     const unsigned char *data, size_t size,
 			     size_t *utf8_size_r)
 {
-	if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
-	    strcasecmp(charset, "ascii") == 0 ||
-	    strcasecmp(charset, "UTF-8") == 0 ||
-	    strcasecmp(charset, "UTF8") == 0) {
+	if (charset == NULL || charset_is_utf8(charset)) {
 		if (unknown_charset != NULL)
 			*unknown_charset = FALSE;
 		return _charset_utf8_ucase_strdup(data, size, utf8_size_r);
--- a/src/lib-charset/charset-utf8.h	Sat Sep 16 16:48:05 2006 +0300
+++ b/src/lib-charset/charset-utf8.h	Sat Sep 16 16:50:21 2006 +0300
@@ -16,12 +16,21 @@
 
 void charset_to_utf8_reset(struct charset_translation *t);
 
-/* Translate src to UTF-8. If src_size is updated to contain the number of
-   characters actually translated from src. */
+/* Returns TRUE if charset is UTF-8 or ASCII */
+bool charset_is_utf8(const char *charset);
+
+/* Translate src to UTF-8. src_size is updated to contain the number of
+   characters actually translated from src. Note that dest buffer is used
+   only up to its current size, for growing it automatically use
+   charset_to_ucase_utf8_full(). */
 enum charset_result
 charset_to_ucase_utf8(struct charset_translation *t,
 		      const unsigned char *src, size_t *src_size,
 		      buffer_t *dest);
+enum charset_result
+charset_to_ucase_utf8_full(struct charset_translation *t,
+			   const unsigned char *src, size_t *src_size,
+			   buffer_t *dest);
 
 /* Simple wrappers for above functions. If utf8_size is non-NULL, it's set
    to same as strlen(returned data). */