# HG changeset patch # User Timo Sirainen # Date 1257819715 18000 # Node ID bf2fb1679cb436706575b52b6af94836cc2efd95 # Parent 76ff6831c9ae1ad8f9f80da87db7c5dacaf9193b Added uni_utf8_str_is_valid(). diff -r 76ff6831c9ae -r bf2fb1679cb4 src/lib/unichar.c --- a/src/lib/unichar.c Tue Nov 10 19:13:36 2009 -0500 +++ b/src/lib/unichar.c Mon Nov 09 21:21:55 2009 -0500 @@ -323,8 +323,8 @@ return len; } -bool uni_utf8_get_valid_data(const unsigned char *input, size_t size, - buffer_t *buf) +static int uni_utf8_find_invalid_pos(const unsigned char *input, size_t size, + size_t *pos_r) { size_t i, len; @@ -334,13 +334,24 @@ i++; else { len = is_valid_utf8_seq(input + i, size-i); - if (unlikely(len == 0)) - goto broken; + if (unlikely(len == 0)) { + *pos_r = i; + return -1; + } i += len; } } - return TRUE; -broken: + return 0; +} + +bool uni_utf8_get_valid_data(const unsigned char *input, size_t size, + buffer_t *buf) +{ + size_t i, len; + + if (uni_utf8_find_invalid_pos(input, size, &i) == 0) + return TRUE; + /* broken utf-8 input - skip the broken characters */ buffer_append(buf, input, i++); @@ -362,3 +373,11 @@ } return FALSE; } + +bool uni_utf8_str_is_valid(const char *str) +{ + size_t i; + + return uni_utf8_find_invalid_pos((const unsigned char *)str, + strlen(str), &i) == 0; +} diff -r 76ff6831c9ae -r bf2fb1679cb4 src/lib/unichar.h --- a/src/lib/unichar.h Tue Nov 10 19:13:36 2009 -0500 +++ b/src/lib/unichar.h Mon Nov 09 21:21:55 2009 -0500 @@ -69,5 +69,7 @@ replacement character (0xfffd), write the output to buf and return FALSE. */ bool uni_utf8_get_valid_data(const unsigned char *input, size_t size, buffer_t *buf); +/* Returns TRUE if string is valid UTF-8 input. */ +bool uni_utf8_str_is_valid(const char *str); #endif