Mercurial > dovecot > original-hg > dovecot-1.2
comparison src/lib/unichar.h @ 7185:6f014a866f38 HEAD
Replace invalid UTF8 input with a replacement character.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Tue, 22 Jan 2008 09:31:59 +0200 |
parents | dcbf6afdf931 |
children | 81806d402514 |
comparison
equal
deleted
inserted
replaced
7184:7416737df8b8 | 7185:6f014a866f38 |
---|---|
1 #ifndef UNICHAR_H | 1 #ifndef UNICHAR_H |
2 #define UNICHAR_H | 2 #define UNICHAR_H |
3 | |
4 /* Character used to replace invalid input. */ | |
5 #define UNICODE_REPLACEMENT_CHAR 0xfffd | |
3 | 6 |
4 typedef uint32_t unichar_t; | 7 typedef uint32_t unichar_t; |
5 ARRAY_DEFINE_TYPE(unichars, unichar_t); | 8 ARRAY_DEFINE_TYPE(unichars, unichar_t); |
6 | 9 |
7 extern const uint8_t *const uni_utf8_non1_bytes; | 10 extern const uint8_t *const uni_utf8_non1_bytes; |
35 /* Return given character in titlecase. */ | 38 /* Return given character in titlecase. */ |
36 unichar_t uni_ucs4_to_titlecase(unichar_t chr); | 39 unichar_t uni_ucs4_to_titlecase(unichar_t chr); |
37 | 40 |
38 /* Convert UTF-8 input to titlecase and decompose the titlecase characters to | 41 /* Convert UTF-8 input to titlecase and decompose the titlecase characters to |
39 output buffer. Returns 0 if ok, -1 if input was invalid. This generates | 42 output buffer. Returns 0 if ok, -1 if input was invalid. This generates |
40 output that's compatible with i;unicode-casemap comparator. */ | 43 output that's compatible with i;unicode-casemap comparator. Invalid input |
44 is replaced with unicode replacement character (0xfffd). */ | |
41 int uni_utf8_to_decomposed_titlecase(const void *input, size_t max_len, | 45 int uni_utf8_to_decomposed_titlecase(const void *input, size_t max_len, |
42 buffer_t *output); | 46 buffer_t *output); |
43 | 47 |
44 /* If input contains only valid UTF-8 characters, return TRUE. If input | 48 /* If input contains only valid UTF-8 characters, return TRUE without updating |
45 contains invalid UTF-8 characters, write only the valid ones to buf and | 49 buf. If input contains invalid UTF-8 characters, replace them with unicode |
46 return FALSE. */ | 50 replacement character (0xfffd), write the output to buf and return FALSE. */ |
47 bool uni_utf8_get_valid_data(const unsigned char *input, size_t size, | 51 bool uni_utf8_get_valid_data(const unsigned char *input, size_t size, |
48 buffer_t *buf); | 52 buffer_t *buf); |
49 | 53 |
50 #endif | 54 #endif |