comparison src/lib/unichar.c @ 9603:5efba9f9f0a7 HEAD

Added a global utf8_replacement_char variable.
author Timo Sirainen <tss@iki.fi>
date Fri, 20 Aug 2010 20:37:31 +0100
parents cc7aa7a4dd6d
children
comparison
equal deleted inserted replaced
9602:cc7aa7a4dd6d 9603:5efba9f9f0a7
7 7
8 #include "unicodemap.c" 8 #include "unicodemap.c"
9 9
10 #define HANGUL_FIRST 0xac00 10 #define HANGUL_FIRST 0xac00
11 #define HANGUL_LAST 0xd7a3 11 #define HANGUL_LAST 0xd7a3
12
13 const unsigned char utf8_replacement_char[UTF8_REPLACEMENT_CHAR_LEN] =
14 { 0xef, 0xbf, 0xbd }; /* 0xfffd */
12 15
13 static const uint8_t utf8_non1_bytes[256 - 192 - 2] = { 16 static const uint8_t utf8_non1_bytes[256 - 192 - 2] = {
14 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 17 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
15 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 18 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
16 }; 19 };
260 return TRUE; 263 return TRUE;
261 } 264 }
262 265
263 static void output_add_replacement_char(buffer_t *output) 266 static void output_add_replacement_char(buffer_t *output)
264 { 267 {
265 /* 0xfffd */ 268 if (output->used >= UTF8_REPLACEMENT_CHAR_LEN &&
266 static const unsigned char replacement_utf8[] = { 0xef, 0xbf, 0xbd };
267 #define REPLACEMENT_UTF8_LEN 3
268
269 if (output->used >= REPLACEMENT_UTF8_LEN &&
270 memcmp(CONST_PTR_OFFSET(output->data, 269 memcmp(CONST_PTR_OFFSET(output->data,
271 output->used - REPLACEMENT_UTF8_LEN), 270 output->used - UTF8_REPLACEMENT_CHAR_LEN),
272 replacement_utf8, REPLACEMENT_UTF8_LEN) == 0) { 271 utf8_replacement_char, UTF8_REPLACEMENT_CHAR_LEN) == 0) {
273 /* don't add the replacement char multiple times */ 272 /* don't add the replacement char multiple times */
274 return; 273 return;
275 } 274 }
276 buffer_append(output, replacement_utf8, REPLACEMENT_UTF8_LEN); 275 buffer_append(output, utf8_replacement_char, UTF8_REPLACEMENT_CHAR_LEN);
277 } 276 }
278 277
279 int uni_utf8_to_decomposed_titlecase(const void *_input, size_t max_len, 278 int uni_utf8_to_decomposed_titlecase(const void *_input, size_t max_len,
280 buffer_t *output) 279 buffer_t *output)
281 { 280 {