# HG changeset patch # User Timo Sirainen # Date 1158414621 -10800 # Node ID e6cb9f75b76a9e1f8e3d85fd93fb12b940cefabe # Parent 2415f43b1dcfc290796f0593cf8e2bc9c11a1c0f Added charset_is_utf8() and charset_to_ucase_utf8_full(). diff -r 2415f43b1dcf -r e6cb9f75b76a src/lib-charset/charset-iconv.c --- a/src/lib-charset/charset-iconv.c Sat Sep 16 16:48:05 2006 +0300 +++ b/src/lib-charset/charset-iconv.c Sat Sep 16 16:50:21 2006 +0300 @@ -113,6 +113,32 @@ return ret; } +enum charset_result +charset_to_ucase_utf8_full(struct charset_translation *t, + const unsigned char *src, size_t *src_size, + buffer_t *dest) +{ + enum charset_result ret; + size_t pos, used, size; + + for (pos = 0;;) { + size = *src_size - pos; + ret = charset_to_ucase_utf8(t, src + pos, &size, dest); + pos += size; + + if (ret != CHARSET_RET_OUTPUT_FULL) { + *src_size = pos; + return ret; + } + + /* force buffer to grow */ + used = dest->used; + size = buffer_get_size(dest) - used + 1; + (void)buffer_append_space_unsafe(dest, size); + buffer_set_used_size(dest, used); + } +} + static const char * charset_to_utf8_string_int(const char *charset, bool *unknown_charset, const unsigned char *data, size_t size, @@ -123,10 +149,7 @@ char *outbuf, *outpos; size_t inleft, outleft, outsize, pos; - if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 || - strcasecmp(charset, "ascii") == 0 || - strcasecmp(charset, "UTF-8") == 0 || - strcasecmp(charset, "UTF8") == 0) { + if (charset == NULL || charset_is_utf8(charset)) { if (unknown_charset != NULL) *unknown_charset = FALSE; diff -r 2415f43b1dcf -r e6cb9f75b76a src/lib-charset/charset-utf8.c --- a/src/lib-charset/charset-utf8.c Sat Sep 16 16:48:05 2006 +0300 +++ b/src/lib-charset/charset-utf8.c Sat Sep 16 16:50:21 2006 +0300 @@ -6,6 +6,14 @@ #include +bool charset_is_utf8(const char *charset) +{ + return strcasecmp(charset, "us-ascii") == 0 || + strcasecmp(charset, "ascii") == 0 || + strcasecmp(charset, "UTF-8") == 0 || + strcasecmp(charset, "UTF8") == 0; +} + void _charset_utf8_ucase(const unsigned char *src, size_t src_size, buffer_t *dest, size_t destpos) { @@ -112,10 +120,7 @@ const unsigned char *data, size_t size, size_t *utf8_size_r) { - if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 || - strcasecmp(charset, "ascii") == 0 || - strcasecmp(charset, "UTF-8") == 0 || - strcasecmp(charset, "UTF8") == 0) { + if (charset == NULL || charset_is_utf8(charset)) { if (unknown_charset != NULL) *unknown_charset = FALSE; return _charset_utf8_ucase_strdup(data, size, utf8_size_r); diff -r 2415f43b1dcf -r e6cb9f75b76a src/lib-charset/charset-utf8.h --- a/src/lib-charset/charset-utf8.h Sat Sep 16 16:48:05 2006 +0300 +++ b/src/lib-charset/charset-utf8.h Sat Sep 16 16:50:21 2006 +0300 @@ -16,12 +16,21 @@ void charset_to_utf8_reset(struct charset_translation *t); -/* Translate src to UTF-8. If src_size is updated to contain the number of - characters actually translated from src. */ +/* Returns TRUE if charset is UTF-8 or ASCII */ +bool charset_is_utf8(const char *charset); + +/* Translate src to UTF-8. src_size is updated to contain the number of + characters actually translated from src. Note that dest buffer is used + only up to its current size, for growing it automatically use + charset_to_ucase_utf8_full(). */ enum charset_result charset_to_ucase_utf8(struct charset_translation *t, const unsigned char *src, size_t *src_size, buffer_t *dest); +enum charset_result +charset_to_ucase_utf8_full(struct charset_translation *t, + const unsigned char *src, size_t *src_size, + buffer_t *dest); /* Simple wrappers for above functions. If utf8_size is non-NULL, it's set to same as strlen(returned data). */