comparison src/lib/unichar.h @ 7185:6f014a866f38 HEAD

Replace invalid UTF8 input with a replacement character.
author Timo Sirainen <tss@iki.fi>
date Tue, 22 Jan 2008 09:31:59 +0200
parents dcbf6afdf931
children 81806d402514
comparison
equal deleted inserted replaced
7184:7416737df8b8 7185:6f014a866f38
1 #ifndef UNICHAR_H 1 #ifndef UNICHAR_H
2 #define UNICHAR_H 2 #define UNICHAR_H
3
4 /* Character used to replace invalid input. */
5 #define UNICODE_REPLACEMENT_CHAR 0xfffd
3 6
4 typedef uint32_t unichar_t; 7 typedef uint32_t unichar_t;
5 ARRAY_DEFINE_TYPE(unichars, unichar_t); 8 ARRAY_DEFINE_TYPE(unichars, unichar_t);
6 9
7 extern const uint8_t *const uni_utf8_non1_bytes; 10 extern const uint8_t *const uni_utf8_non1_bytes;
35 /* Return given character in titlecase. */ 38 /* Return given character in titlecase. */
36 unichar_t uni_ucs4_to_titlecase(unichar_t chr); 39 unichar_t uni_ucs4_to_titlecase(unichar_t chr);
37 40
38 /* Convert UTF-8 input to titlecase and decompose the titlecase characters to 41 /* Convert UTF-8 input to titlecase and decompose the titlecase characters to
39 output buffer. Returns 0 if ok, -1 if input was invalid. This generates 42 output buffer. Returns 0 if ok, -1 if input was invalid. This generates
40 output that's compatible with i;unicode-casemap comparator. */ 43 output that's compatible with i;unicode-casemap comparator. Invalid input
44 is replaced with unicode replacement character (0xfffd). */
41 int uni_utf8_to_decomposed_titlecase(const void *input, size_t max_len, 45 int uni_utf8_to_decomposed_titlecase(const void *input, size_t max_len,
42 buffer_t *output); 46 buffer_t *output);
43 47
44 /* If input contains only valid UTF-8 characters, return TRUE. If input 48 /* If input contains only valid UTF-8 characters, return TRUE without updating
45 contains invalid UTF-8 characters, write only the valid ones to buf and 49 buf. If input contains invalid UTF-8 characters, replace them with unicode
46 return FALSE. */ 50 replacement character (0xfffd), write the output to buf and return FALSE. */
47 bool uni_utf8_get_valid_data(const unsigned char *input, size_t size, 51 bool uni_utf8_get_valid_data(const unsigned char *input, size_t size,
48 buffer_t *buf); 52 buffer_t *buf);
49 53
50 #endif 54 #endif