# HG changeset patch # User Timo Sirainen # Date 1299259652 -7200 # Node ID 91c605339e45c920d1d26e7c4ffda15fa710c2f0 # Parent fe9a5bb624709a3fa2c5fe1648b1efc72f9512b2 lib-charset: Skip over only invalid characters, not entire buffers around them. diff -r fe9a5bb62470 -r 91c605339e45 src/lib-charset/charset-iconv.c --- a/src/lib-charset/charset-iconv.c Thu Feb 17 08:02:45 2011 +0200 +++ b/src/lib-charset/charset-iconv.c Fri Mar 04 19:27:32 2011 +0200 @@ -104,10 +104,7 @@ else { /* should be EILSEQ */ *result = CHARSET_RET_INVALID_INPUT; - if (!dtcase) - buffer_set_used_size(dest, dest->used - destleft); - uni_ucs4_to_utf8_c(UNICODE_REPLACEMENT_CHAR, dest); - return TRUE; + ret = FALSE; } *src_size -= srcleft; @@ -132,6 +129,7 @@ bool dtcase = (t->flags & CHARSET_FLAG_DECOMP_TITLECASE) != 0; enum charset_result result; size_t pos, used, size, prev_pos = 0, prev_used = 0; + size_t prev_invalid_pos = (size_t)-1; bool ret; for (pos = 0;;) { @@ -139,12 +137,17 @@ ret = charset_to_utf8_try(t, src + pos, &size, dest, &result); pos += size; - if (ret) { - *src_size = pos; - return result; - } + if (ret) + break; - if (!dtcase) { + if (result == CHARSET_RET_INVALID_INPUT) { + if (prev_invalid_pos != dest->used) { + uni_ucs4_to_utf8_c(UNICODE_REPLACEMENT_CHAR, + dest); + prev_invalid_pos = dest->used; + } + pos++; + } else if (!dtcase) { /* force buffer to grow */ used = dest->used; size = buffer_get_size(dest) - used + 1; @@ -156,6 +159,12 @@ prev_used = dest->used; } } + + if (prev_invalid_pos != (size_t)-1) + result = CHARSET_RET_INVALID_INPUT; + + *src_size = pos; + return result; } #endif