Mercurial > dovecot > original-hg > dovecot-1.2
diff src/lib/unichar.h @ 5683:8101787cdd1c HEAD
Rewrote some code and cleaned up the API
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Mon, 11 Jun 2007 04:37:29 +0300 |
parents | c98008a7e9b7 |
children | 04b9eb27283c |
line wrap: on
line diff
--- a/src/lib/unichar.h Mon Jun 11 02:27:55 2007 +0300 +++ b/src/lib/unichar.h Mon Jun 11 04:37:29 2007 +0300 @@ -3,7 +3,7 @@ typedef uint32_t unichar_t; -extern const char *const uni_utf8_skip; +extern const uint8_t *const uni_utf8_non1_bytes; /* Returns number of characters in a NUL-terminated unicode string */ unsigned int uni_strlen(const unichar_t *str); @@ -12,15 +12,23 @@ int uni_utf8_to_ucs4(const char *input, buffer_t *output); /* Translates UCS-4 input to UTF-8 output. */ void uni_ucs4_to_utf8(const unichar_t *input, size_t len, buffer_t *output); +void uni_ucs4_to_utf8_c(unichar_t chr, buffer_t *output); -/* Returns the next UTF-8 character, or (unichar_t)-1 for invalid input and - (unichar_t)-2 for incomplete trailing character. */ -unichar_t uni_utf8_get_char(const char *input); -unichar_t uni_utf8_get_char_len(const unsigned char *input, size_t max_len); +/* Returns 1 if *chr_r is set, 0 for incomplete trailing character, + -1 for invalid input. */ +int uni_utf8_get_char(const char *input, unichar_t *chr_r); +int uni_utf8_get_char_n(const void *input, size_t max_len, unichar_t *chr_r); /* Returns UTF-8 string length with maximum input size. */ unsigned int uni_utf8_strlen_n(const void *input, size_t size); -#define uni_utf8_next_char(p) \ - ((p) + uni_utf8_skip[*(const uint8_t *)(p)]) +/* Returns the number of bytes belonging to this partial UTF-8 character. + Invalid input is returned with length 1. */ +static inline unsigned int uni_utf8_char_bytes(char chr) +{ + /* 0x00 .. 0x7f are ASCII. 0x80 .. 0xC1 are invalid. */ + if ((uint8_t)chr < (192 + 2)) + return 1; + return uni_utf8_non1_bytes[(uint8_t)chr - (192 + 2)]; +} #endif