Mercurial > dovecot > core-2.2
changeset 6131:5f56b2eb32b3 HEAD
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
comparing.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Fri, 20 Jul 2007 17:27:02 +0300 |
parents | 9afe3fa4858d |
children | d01522d276f6 |
files | src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.c src/lib-charset/charset-utf8.h src/lib-mail/message-decoder.c src/lib-mail/message-header-decode.c |
diffstat | 5 files changed, 44 insertions(+), 63 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-charset/charset-iconv.c Fri Jul 20 17:26:04 2007 +0300 +++ b/src/lib-charset/charset-iconv.c Fri Jul 20 17:27:02 2007 +0300 @@ -1,7 +1,8 @@ -/* Copyright (C) 2002 Timo Sirainen */ +/* Copyright (C) 2002-2007 Timo Sirainen */ #include "lib.h" #include "buffer.h" +#include "unichar.h" #include "charset-utf8.h" #ifdef HAVE_ICONV @@ -63,32 +64,37 @@ enum charset_result *result) { ICONV_CONST char *ic_srcbuf; - char *ic_destbuf; - size_t srcleft, destpos, destleft, size; + char tmpbuf[8192], *ic_destbuf; + size_t srcleft, destleft; bool ret = TRUE; - destpos = dest->used; if (t->cd == (iconv_t)-1) { /* no translation needed - just copy it to outbuf uppercased */ - if (t->ucase) - charset_utf8_ucase_write(dest, destpos, src, *src_size); - else + *result = CHARSET_RET_OK; + if (!t->ucase) { buffer_append(dest, src, *src_size); + return TRUE; + } - *result = CHARSET_RET_OK; + if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0) + *result = CHARSET_RET_INVALID_INPUT; return TRUE; } - destleft = buffer_get_size(dest) - destpos; - if (destleft < *src_size) { - /* The buffer is most likely too small to hold the output, - so increase it at least to the input size. */ - destleft = *src_size; + if (!t->ucase) { + destleft = buffer_get_size(dest) - dest->used; + if (destleft < *src_size) { + /* The buffer is most likely too small to hold the + output, so increase it at least to the input size. */ + destleft = *src_size; + } + ic_destbuf = buffer_append_space_unsafe(dest, destleft); + } else { + destleft = sizeof(tmpbuf); + ic_destbuf = tmpbuf; } - size = destleft; srcleft = *src_size; ic_srcbuf = (ICONV_CONST char *) src; - ic_destbuf = buffer_append_space_unsafe(dest, destleft); if (iconv(t->cd, &ic_srcbuf, &srcleft, &ic_destbuf, &destleft) != (size_t)-1) @@ -104,16 +110,17 @@ *result = CHARSET_RET_INVALID_INPUT; return TRUE; } - size -= destleft; - - /* give back the memory we didn't use */ - buffer_set_used_size(dest, dest->used - destleft); + *src_size -= srcleft; - *src_size -= srcleft; - if (t->ucase) { - charset_utf8_ucase_write(dest, destpos, - (unsigned char *)ic_destbuf - size, - size); + if (!t->ucase) { + /* give back the memory we didn't use */ + buffer_set_used_size(dest, dest->used - destleft); + } else { + size_t tmpsize = sizeof(tmpbuf) - destleft; + + /* we just converted data to UTF-8, it can't be invalid */ + if (uni_utf8_to_decomposed_titlecase(tmpbuf, tmpsize, dest) < 0) + i_unreached(); } return ret; }
--- a/src/lib-charset/charset-utf8.c Fri Jul 20 17:26:04 2007 +0300 +++ b/src/lib-charset/charset-utf8.c Fri Jul 20 17:27:02 2007 +0300 @@ -14,31 +14,6 @@ strcasecmp(charset, "UTF8") == 0; } -void charset_utf8_ucase_write(buffer_t *dest, size_t destpos, - const unsigned char *src, size_t src_size) -{ - char *destbuf; - size_t i; - - destbuf = buffer_get_space_unsafe(dest, destpos, src_size); - for (i = 0; i < src_size; i++) - destbuf[i] = i_toupper(src[i]); /* FIXME: utf8 */ -} - -const char *charset_utf8_ucase_strdup(const unsigned char *data, size_t size, - size_t *utf8_size_r) -{ - buffer_t *dest; - - dest = buffer_create_dynamic(pool_datastack_create(), size); - charset_utf8_ucase_write(dest, 0, data, size); - if (utf8_size_r != NULL) - *utf8_size_r = buffer_get_used_size(dest); - buffer_append_c(dest, '\0'); - return buffer_free_without_data(dest); -} - - #ifndef HAVE_ICONV #include <ctype.h> @@ -82,10 +57,12 @@ charset_to_utf8(struct charset_translation *t, const unsigned char *src, size_t *src_size, buffer_t *dest) { - if (t == &utf8_translation_uc || t == &ascii_translation_uc) - charset_utf8_ucase_write(dest, dest->used, src, *src_size); - else + if (t != &utf8_translation_uc && t != &ascii_translation_uc) { buffer_append(dest, src, *src_size); + return CHARSET_RET_OK; + } + if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0) + return CHARSET_RET_INVALID_INPUT; return CHARSET_RET_OK; }
--- a/src/lib-charset/charset-utf8.h Fri Jul 20 17:26:04 2007 +0300 +++ b/src/lib-charset/charset-utf8.h Fri Jul 20 17:27:02 2007 +0300 @@ -24,9 +24,4 @@ charset_to_utf8(struct charset_translation *t, const unsigned char *src, size_t *src_size, buffer_t *dest); -void charset_utf8_ucase_write(buffer_t *dest, size_t destpos, - const unsigned char *src, size_t src_size); -const char *charset_utf8_ucase_strdup(const unsigned char *data, size_t size, - size_t *utf8_size_r); - #endif
--- a/src/lib-mail/message-decoder.c Fri Jul 20 17:26:04 2007 +0300 +++ b/src/lib-mail/message-decoder.c Fri Jul 20 17:27:02 2007 +0300 @@ -4,6 +4,7 @@ #include "buffer.h" #include "base64.h" #include "str.h" +#include "unichar.h" #include "charset-utf8.h" #include "quoted-printable.h" #include "rfc822-parser.h" @@ -159,9 +160,8 @@ value_len = ctx->buf->used; if (ctx->ucase) { - charset_utf8_ucase_write(ctx->buf, ctx->buf->used, - (const unsigned char *)hdr->name, - hdr->name_len); + (void)uni_utf8_to_decomposed_titlecase(hdr->name, hdr->name_len, + ctx->buf); buffer_append_c(ctx->buf, '\0'); } @@ -300,7 +300,8 @@ if (ctx->charset_utf8) { if (ctx->ucase) { buffer_set_used_size(ctx->buf2, 0); - charset_utf8_ucase_write(ctx->buf2, 0, data, size); + (void)uni_utf8_to_decomposed_titlecase(data, size, + ctx->buf); output->data = ctx->buf2->data; output->size = ctx->buf2->used; } else {
--- a/src/lib-mail/message-header-decode.c Fri Jul 20 17:26:04 2007 +0300 +++ b/src/lib-mail/message-header-decode.c Fri Jul 20 17:27:02 2007 +0300 @@ -3,6 +3,7 @@ #include "lib.h" #include "base64.h" #include "buffer.h" +#include "unichar.h" #include "charset-utf8.h" #include "quoted-printable.h" #include "message-header-decode.h" @@ -139,8 +140,8 @@ if (charset == NULL || charset_is_utf8(charset)) { /* ASCII / UTF-8 */ if (ctx->ucase) { - charset_utf8_ucase_write(ctx->dest, ctx->dest->used, - data, size); + (void)uni_utf8_to_decomposed_titlecase(data, size, + ctx->dest); } else { buffer_append(ctx->dest, data, size); }