Mercurial > dovecot > core-2.2
changeset 10294:64df978b2926 HEAD
Added uni_utf8_str_is_valid().
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Mon, 09 Nov 2009 21:21:55 -0500 |
parents | d57dcba5dd24 |
children | d52d842e8f81 |
files | src/lib/unichar.c src/lib/unichar.h |
diffstat | 2 files changed, 27 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib/unichar.c Mon Nov 09 21:03:15 2009 -0500 +++ b/src/lib/unichar.c Mon Nov 09 21:21:55 2009 -0500 @@ -323,8 +323,8 @@ return len; } -bool uni_utf8_get_valid_data(const unsigned char *input, size_t size, - buffer_t *buf) +static int uni_utf8_find_invalid_pos(const unsigned char *input, size_t size, + size_t *pos_r) { size_t i, len; @@ -334,13 +334,24 @@ i++; else { len = is_valid_utf8_seq(input + i, size-i); - if (unlikely(len == 0)) - goto broken; + if (unlikely(len == 0)) { + *pos_r = i; + return -1; + } i += len; } } - return TRUE; -broken: + return 0; +} + +bool uni_utf8_get_valid_data(const unsigned char *input, size_t size, + buffer_t *buf) +{ + size_t i, len; + + if (uni_utf8_find_invalid_pos(input, size, &i) == 0) + return TRUE; + /* broken utf-8 input - skip the broken characters */ buffer_append(buf, input, i++); @@ -362,3 +373,11 @@ } return FALSE; } + +bool uni_utf8_str_is_valid(const char *str) +{ + size_t i; + + return uni_utf8_find_invalid_pos((const unsigned char *)str, + strlen(str), &i) == 0; +}
--- a/src/lib/unichar.h Mon Nov 09 21:03:15 2009 -0500 +++ b/src/lib/unichar.h Mon Nov 09 21:21:55 2009 -0500 @@ -69,5 +69,7 @@ replacement character (0xfffd), write the output to buf and return FALSE. */ bool uni_utf8_get_valid_data(const unsigned char *input, size_t size, buffer_t *buf); +/* Returns TRUE if string is valid UTF-8 input. */ +bool uni_utf8_str_is_valid(const char *str); #endif