changeset 9476:bf2fb1679cb4 HEAD

Added uni_utf8_str_is_valid().
author Timo Sirainen <tss@iki.fi>
date Mon, 09 Nov 2009 21:21:55 -0500
parents 76ff6831c9ae
children 96b5d6d8dd2c
files src/lib/unichar.c src/lib/unichar.h
diffstat 2 files changed, 27 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib/unichar.c	Tue Nov 10 19:13:36 2009 -0500
+++ b/src/lib/unichar.c	Mon Nov 09 21:21:55 2009 -0500
@@ -323,8 +323,8 @@
 	return len;
 }
 
-bool uni_utf8_get_valid_data(const unsigned char *input, size_t size,
-			     buffer_t *buf)
+static int uni_utf8_find_invalid_pos(const unsigned char *input, size_t size,
+				     size_t *pos_r)
 {
 	size_t i, len;
 
@@ -334,13 +334,24 @@
 			i++;
 		else {
 			len = is_valid_utf8_seq(input + i, size-i);
-			if (unlikely(len == 0))
-				goto broken;
+			if (unlikely(len == 0)) {
+				*pos_r = i;
+				return -1;
+			}
 			i += len;
 		}
 	}
-	return TRUE;
-broken:
+	return 0;
+}
+
+bool uni_utf8_get_valid_data(const unsigned char *input, size_t size,
+			     buffer_t *buf)
+{
+	size_t i, len;
+
+	if (uni_utf8_find_invalid_pos(input, size, &i) == 0)
+		return TRUE;
+
 	/* broken utf-8 input - skip the broken characters */
 	buffer_append(buf, input, i++);
 
@@ -362,3 +373,11 @@
 	}
 	return FALSE;
 }
+
+bool uni_utf8_str_is_valid(const char *str)
+{
+	size_t i;
+
+	return uni_utf8_find_invalid_pos((const unsigned char *)str,
+					 strlen(str), &i) == 0;
+}
--- a/src/lib/unichar.h	Tue Nov 10 19:13:36 2009 -0500
+++ b/src/lib/unichar.h	Mon Nov 09 21:21:55 2009 -0500
@@ -69,5 +69,7 @@
    replacement character (0xfffd), write the output to buf and return FALSE. */
 bool uni_utf8_get_valid_data(const unsigned char *input, size_t size,
 			     buffer_t *buf);
+/* Returns TRUE if string is valid UTF-8 input. */
+bool uni_utf8_str_is_valid(const char *str);
 
 #endif