changeset 6131:5f56b2eb32b3 HEAD

Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8 comparing.
author Timo Sirainen <tss@iki.fi>
date Fri, 20 Jul 2007 17:27:02 +0300
parents 9afe3fa4858d
children d01522d276f6
files src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.c src/lib-charset/charset-utf8.h src/lib-mail/message-decoder.c src/lib-mail/message-header-decode.c
diffstat 5 files changed, 44 insertions(+), 63 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-charset/charset-iconv.c	Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-charset/charset-iconv.c	Fri Jul 20 17:27:02 2007 +0300
@@ -1,7 +1,8 @@
-/* Copyright (C) 2002 Timo Sirainen */
+/* Copyright (C) 2002-2007 Timo Sirainen */
 
 #include "lib.h"
 #include "buffer.h"
+#include "unichar.h"
 #include "charset-utf8.h"
 
 #ifdef HAVE_ICONV
@@ -63,32 +64,37 @@
 		    enum charset_result *result)
 {
 	ICONV_CONST char *ic_srcbuf;
-	char *ic_destbuf;
-	size_t srcleft, destpos, destleft, size;
+	char tmpbuf[8192], *ic_destbuf;
+	size_t srcleft, destleft;
 	bool ret = TRUE;
 
-	destpos = dest->used;
 	if (t->cd == (iconv_t)-1) {
 		/* no translation needed - just copy it to outbuf uppercased */
-		if (t->ucase)
-			charset_utf8_ucase_write(dest, destpos, src, *src_size);
-		else
+		*result = CHARSET_RET_OK;
+		if (!t->ucase) {
 			buffer_append(dest, src, *src_size);
+			return TRUE;
+		}
 
-		*result = CHARSET_RET_OK;
+		if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
+			*result = CHARSET_RET_INVALID_INPUT;
 		return TRUE;
 	}
-	destleft = buffer_get_size(dest) - destpos;
-	if (destleft < *src_size) {
-		/* The buffer is most likely too small to hold the output,
-		   so increase it at least to the input size. */
-		destleft = *src_size;
+	if (!t->ucase) {
+		destleft = buffer_get_size(dest) - dest->used;
+		if (destleft < *src_size) {
+			/* The buffer is most likely too small to hold the
+			   output, so increase it at least to the input size. */
+			destleft = *src_size;
+		}
+		ic_destbuf = buffer_append_space_unsafe(dest, destleft);
+	} else {
+		destleft = sizeof(tmpbuf);
+		ic_destbuf = tmpbuf;
 	}
 
-	size = destleft;
 	srcleft = *src_size;
 	ic_srcbuf = (ICONV_CONST char *) src;
-	ic_destbuf = buffer_append_space_unsafe(dest, destleft);
 
 	if (iconv(t->cd, &ic_srcbuf, &srcleft,
 		  &ic_destbuf, &destleft) != (size_t)-1)
@@ -104,16 +110,17 @@
 		*result = CHARSET_RET_INVALID_INPUT;
 		return TRUE;
 	}
-	size -= destleft;
-
-	/* give back the memory we didn't use */
-	buffer_set_used_size(dest, dest->used - destleft);
+	*src_size -= srcleft;
 
-	*src_size -= srcleft;
-	if (t->ucase) {
-		charset_utf8_ucase_write(dest, destpos,
-					 (unsigned char *)ic_destbuf - size,
-					 size);
+	if (!t->ucase) {
+		/* give back the memory we didn't use */
+		buffer_set_used_size(dest, dest->used - destleft);
+	} else {
+		size_t tmpsize = sizeof(tmpbuf) - destleft;
+
+		/* we just converted data to UTF-8, it can't be invalid */
+		if (uni_utf8_to_decomposed_titlecase(tmpbuf, tmpsize, dest) < 0)
+			i_unreached();
 	}
 	return ret;
 }
--- a/src/lib-charset/charset-utf8.c	Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-charset/charset-utf8.c	Fri Jul 20 17:27:02 2007 +0300
@@ -14,31 +14,6 @@
 		strcasecmp(charset, "UTF8") == 0;
 }
 
-void charset_utf8_ucase_write(buffer_t *dest, size_t destpos,
-			      const unsigned char *src, size_t src_size)
-{
-	char *destbuf;
-	size_t i;
-
-	destbuf = buffer_get_space_unsafe(dest, destpos, src_size);
-	for (i = 0; i < src_size; i++)
-		destbuf[i] = i_toupper(src[i]); /* FIXME: utf8 */
-}
-
-const char *charset_utf8_ucase_strdup(const unsigned char *data, size_t size,
-				      size_t *utf8_size_r)
-{
-	buffer_t *dest;
-
-	dest = buffer_create_dynamic(pool_datastack_create(), size);
-	charset_utf8_ucase_write(dest, 0, data, size);
-	if (utf8_size_r != NULL)
-		*utf8_size_r = buffer_get_used_size(dest);
-	buffer_append_c(dest, '\0');
-	return buffer_free_without_data(dest);
-}
-
-
 #ifndef HAVE_ICONV
 
 #include <ctype.h>
@@ -82,10 +57,12 @@
 charset_to_utf8(struct charset_translation *t,
 		const unsigned char *src, size_t *src_size, buffer_t *dest)
 {
-	if (t == &utf8_translation_uc || t == &ascii_translation_uc)
-		charset_utf8_ucase_write(dest, dest->used, src, *src_size);
-	else
+	if (t != &utf8_translation_uc && t != &ascii_translation_uc) {
 		buffer_append(dest, src, *src_size);
+		return CHARSET_RET_OK;
+	}
+	if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
+		return CHARSET_RET_INVALID_INPUT;
 	return CHARSET_RET_OK;
 }
 
--- a/src/lib-charset/charset-utf8.h	Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-charset/charset-utf8.h	Fri Jul 20 17:27:02 2007 +0300
@@ -24,9 +24,4 @@
 charset_to_utf8(struct charset_translation *t,
 		const unsigned char *src, size_t *src_size, buffer_t *dest);
 
-void charset_utf8_ucase_write(buffer_t *dest, size_t destpos,
-			      const unsigned char *src, size_t src_size);
-const char *charset_utf8_ucase_strdup(const unsigned char *data, size_t size,
-				      size_t *utf8_size_r);
-
 #endif
--- a/src/lib-mail/message-decoder.c	Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-mail/message-decoder.c	Fri Jul 20 17:27:02 2007 +0300
@@ -4,6 +4,7 @@
 #include "buffer.h"
 #include "base64.h"
 #include "str.h"
+#include "unichar.h"
 #include "charset-utf8.h"
 #include "quoted-printable.h"
 #include "rfc822-parser.h"
@@ -159,9 +160,8 @@
 	value_len = ctx->buf->used;
 
 	if (ctx->ucase) {
-		charset_utf8_ucase_write(ctx->buf, ctx->buf->used,
-					 (const unsigned char *)hdr->name,
-					 hdr->name_len);
+		(void)uni_utf8_to_decomposed_titlecase(hdr->name, hdr->name_len,
+						       ctx->buf);
 		buffer_append_c(ctx->buf, '\0');
 	}
 
@@ -300,7 +300,8 @@
 	if (ctx->charset_utf8) {
 		if (ctx->ucase) {
 			buffer_set_used_size(ctx->buf2, 0);
-			charset_utf8_ucase_write(ctx->buf2, 0, data, size);
+			(void)uni_utf8_to_decomposed_titlecase(data, size,
+							       ctx->buf);
 			output->data = ctx->buf2->data;
 			output->size = ctx->buf2->used;
 		} else {
--- a/src/lib-mail/message-header-decode.c	Fri Jul 20 17:26:04 2007 +0300
+++ b/src/lib-mail/message-header-decode.c	Fri Jul 20 17:27:02 2007 +0300
@@ -3,6 +3,7 @@
 #include "lib.h"
 #include "base64.h"
 #include "buffer.h"
+#include "unichar.h"
 #include "charset-utf8.h"
 #include "quoted-printable.h"
 #include "message-header-decode.h"
@@ -139,8 +140,8 @@
 	if (charset == NULL || charset_is_utf8(charset)) {
 		/* ASCII / UTF-8 */
 		if (ctx->ucase) {
-			charset_utf8_ucase_write(ctx->dest, ctx->dest->used,
-						 data, size);
+			(void)uni_utf8_to_decomposed_titlecase(data, size,
+							       ctx->dest);
 		} else {
 			buffer_append(ctx->dest, data, size);
 		}