diff src/lib/unichar.h @ 5683:8101787cdd1c HEAD

Rewrote some code and cleaned up the API
author Timo Sirainen <tss@iki.fi>
date Mon, 11 Jun 2007 04:37:29 +0300
parents c98008a7e9b7
children 04b9eb27283c
line wrap: on
line diff
--- a/src/lib/unichar.h	Mon Jun 11 02:27:55 2007 +0300
+++ b/src/lib/unichar.h	Mon Jun 11 04:37:29 2007 +0300
@@ -3,7 +3,7 @@
 
 typedef uint32_t unichar_t;
 
-extern const char *const uni_utf8_skip;
+extern const uint8_t *const uni_utf8_non1_bytes;
 
 /* Returns number of characters in a NUL-terminated unicode string */
 unsigned int uni_strlen(const unichar_t *str);
@@ -12,15 +12,23 @@
 int uni_utf8_to_ucs4(const char *input, buffer_t *output);
 /* Translates UCS-4 input to UTF-8 output. */
 void uni_ucs4_to_utf8(const unichar_t *input, size_t len, buffer_t *output);
+void uni_ucs4_to_utf8_c(unichar_t chr, buffer_t *output);
 
-/* Returns the next UTF-8 character, or (unichar_t)-1 for invalid input and
-   (unichar_t)-2 for incomplete trailing character. */
-unichar_t uni_utf8_get_char(const char *input);
-unichar_t uni_utf8_get_char_len(const unsigned char *input, size_t max_len);
+/* Returns 1 if *chr_r is set, 0 for incomplete trailing character,
+   -1 for invalid input. */
+int uni_utf8_get_char(const char *input, unichar_t *chr_r);
+int uni_utf8_get_char_n(const void *input, size_t max_len, unichar_t *chr_r);
 /* Returns UTF-8 string length with maximum input size. */
 unsigned int uni_utf8_strlen_n(const void *input, size_t size);
 
-#define uni_utf8_next_char(p) \
-	((p) + uni_utf8_skip[*(const uint8_t *)(p)])
+/* Returns the number of bytes belonging to this partial UTF-8 character.
+   Invalid input is returned with length 1. */
+static inline unsigned int uni_utf8_char_bytes(char chr)
+{
+	/* 0x00 .. 0x7f are ASCII. 0x80 .. 0xC1 are invalid. */
+	if ((uint8_t)chr < (192 + 2))
+		return 1;
+	return uni_utf8_non1_bytes[(uint8_t)chr - (192 + 2)];
+}
 
 #endif