Mercurial > dovecot > original-hg > dovecot-1.2
annotate src/lib/unichar.h @ 6129:04b9eb27283c HEAD
Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
use a unicharmap.c file generated from UnicodeData.txt.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Fri, 20 Jul 2007 17:25:16 +0300 |
parents | 8101787cdd1c |
children | e4eb71ae8e96 |
rev | line source |
---|---|
4899
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
1 #ifndef __UNICHAR_H |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
2 #define __UNICHAR_H |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
3 |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
4 typedef uint32_t unichar_t; |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
5 |
5683
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
6 extern const uint8_t *const uni_utf8_non1_bytes; |
4899
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
7 |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
8 /* Returns number of characters in a NUL-terminated unicode string */ |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
9 unsigned int uni_strlen(const unichar_t *str); |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
10 /* Translates UTF-8 input to UCS-4 output. Returns 0 if ok, -1 if input was |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
11 invalid */ |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
12 int uni_utf8_to_ucs4(const char *input, buffer_t *output); |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
13 /* Translates UCS-4 input to UTF-8 output. */ |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
14 void uni_ucs4_to_utf8(const unichar_t *input, size_t len, buffer_t *output); |
5683
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
15 void uni_ucs4_to_utf8_c(unichar_t chr, buffer_t *output); |
4899
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
16 |
5683
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
17 /* Returns 1 if *chr_r is set, 0 for incomplete trailing character, |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
18 -1 for invalid input. */ |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
19 int uni_utf8_get_char(const char *input, unichar_t *chr_r); |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
20 int uni_utf8_get_char_n(const void *input, size_t max_len, unichar_t *chr_r); |
4899
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
21 /* Returns UTF-8 string length with maximum input size. */ |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
22 unsigned int uni_utf8_strlen_n(const void *input, size_t size); |
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
23 |
5683
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
24 /* Returns the number of bytes belonging to this partial UTF-8 character. |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
25 Invalid input is returned with length 1. */ |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
26 static inline unsigned int uni_utf8_char_bytes(char chr) |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
27 { |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
28 /* 0x00 .. 0x7f are ASCII. 0x80 .. 0xC1 are invalid. */ |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
29 if ((uint8_t)chr < (192 + 2)) |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
30 return 1; |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
31 return uni_utf8_non1_bytes[(uint8_t)chr - (192 + 2)]; |
8101787cdd1c
Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents:
4899
diff
changeset
|
32 } |
4899
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
33 |
6129
04b9eb27283c
Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
5683
diff
changeset
|
34 /* Return given character in titlecase. */ |
04b9eb27283c
Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
5683
diff
changeset
|
35 unichar_t uni_ucs4_to_titlecase(unichar_t chr); |
04b9eb27283c
Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
5683
diff
changeset
|
36 |
04b9eb27283c
Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
5683
diff
changeset
|
37 /* Convert UTF-8 input to titlecase and decompose the titlecase characters to |
04b9eb27283c
Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
5683
diff
changeset
|
38 output buffer. Returns 0 if ok, -1 if input was invalid. This generates |
04b9eb27283c
Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
5683
diff
changeset
|
39 output that's compatible with i;unicode-casemap comparator. */ |
04b9eb27283c
Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
5683
diff
changeset
|
40 int uni_utf8_to_decomposed_titlecase(const void *input, size_t max_len, |
04b9eb27283c
Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
5683
diff
changeset
|
41 buffer_t *output); |
04b9eb27283c
Added uni_ucs4_to_titlecase() and uni_utf8_to_decomposed_titlecase(). They
Timo Sirainen <tss@iki.fi>
parents:
5683
diff
changeset
|
42 |
4899
c98008a7e9b7
Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
43 #endif |