annotate src/lib/unichar.h @ 5683:8101787cdd1c HEAD

Rewrote some code and cleaned up the API
author Timo Sirainen <tss@iki.fi>
date Mon, 11 Jun 2007 04:37:29 +0300
parents c98008a7e9b7
children 04b9eb27283c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4899
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 #ifndef __UNICHAR_H
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2 #define __UNICHAR_H
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
4 typedef uint32_t unichar_t;
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
5
5683
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
6 extern const uint8_t *const uni_utf8_non1_bytes;
4899
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
7
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
8 /* Returns number of characters in a NUL-terminated unicode string */
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
9 unsigned int uni_strlen(const unichar_t *str);
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
10 /* Translates UTF-8 input to UCS-4 output. Returns 0 if ok, -1 if input was
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
11 invalid */
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
12 int uni_utf8_to_ucs4(const char *input, buffer_t *output);
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
13 /* Translates UCS-4 input to UTF-8 output. */
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
14 void uni_ucs4_to_utf8(const unichar_t *input, size_t len, buffer_t *output);
5683
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
15 void uni_ucs4_to_utf8_c(unichar_t chr, buffer_t *output);
4899
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
16
5683
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
17 /* Returns 1 if *chr_r is set, 0 for incomplete trailing character,
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
18 -1 for invalid input. */
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
19 int uni_utf8_get_char(const char *input, unichar_t *chr_r);
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
20 int uni_utf8_get_char_n(const void *input, size_t max_len, unichar_t *chr_r);
4899
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
21 /* Returns UTF-8 string length with maximum input size. */
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
22 unsigned int uni_utf8_strlen_n(const void *input, size_t size);
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
23
5683
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
24 /* Returns the number of bytes belonging to this partial UTF-8 character.
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
25 Invalid input is returned with length 1. */
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
26 static inline unsigned int uni_utf8_char_bytes(char chr)
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
27 {
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
28 /* 0x00 .. 0x7f are ASCII. 0x80 .. 0xC1 are invalid. */
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
29 if ((uint8_t)chr < (192 + 2))
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
30 return 1;
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
31 return uni_utf8_non1_bytes[(uint8_t)chr - (192 + 2)];
8101787cdd1c Rewrote some code and cleaned up the API
Timo Sirainen <tss@iki.fi>
parents: 4899
diff changeset
32 }
4899
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
33
c98008a7e9b7 Added unichar_t UCS-4 type and some ucs4/utf8 functions.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
34 #endif