annotate src/lib-charset/charset-utf8.c @ 766:03832c7f389b HEAD

Compiles again without iconv()
author Timo Sirainen <tss@iki.fi>
date Mon, 09 Dec 2002 15:39:32 +0200
parents 3521edb6c240
children d96cbba73a8b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 /* Copyright (C) 2002 Timo Sirainen */
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
4 #include "buffer.h"
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
5 #include "charset-utf8.h"
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
6
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
7 #include <ctype.h>
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
8
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
9 void _charset_utf8_ucase(const unsigned char *src, size_t src_size,
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
10 Buffer *dest, size_t destpos)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
11 {
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
12 char *destbuf;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
13 size_t i;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
14
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
15 destbuf = buffer_get_space(dest, destpos, src_size);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
16 for (i = 0; i < src_size; i++)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
17 destbuf[i] = i_toupper(src[i]); /* FIXME: utf8 */
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
18 }
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
19
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
20 const char *_charset_utf8_ucase_strdup(const Buffer *data, size_t *utf8_size)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
21 {
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
22 const char *buf;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
23 size_t size;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
24 Buffer *dest;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
25
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
26 buf = buffer_get_data(data, &size);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
27
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
28 dest = buffer_create_dynamic(data_stack_pool, size, (size_t)-1);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
29 _charset_utf8_ucase(buf, size, dest, 0);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
30 if (utf8_size != NULL)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
31 *utf8_size = buffer_get_used_size(dest);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
32 buffer_append_c(dest, '\0');
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
33 return buffer_free_without_data(dest);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
34 }
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
35
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
36
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
37 #ifndef HAVE_ICONV_H
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
38
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
39 #include <ctype.h>
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
40
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
41 struct _CharsetTranslation {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
42 int dummy;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
43 };
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
44
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
45 static CharsetTranslation ascii_translation, utf8_translation;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
46
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
47 CharsetTranslation *charset_to_utf8_begin(const char *charset,
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
48 int *unknown_charset)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
49 {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
50 if (unknown_charset != NULL)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
51 *unknown_charset = FALSE;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
52
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
53 if (strcasecmp(charset, "us-ascii") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
54 strcasecmp(charset, "ascii") == 0)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
55 return &ascii_translation;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
56
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
57 if (strcasecmp(charset, "UTF-8") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
58 strcasecmp(charset, "UTF8") == 0)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
59 return &utf8_translation;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
60
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
61 /* no support for charsets that need translation */
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
62 if (unknown_charset != NULL)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
63 *unknown_charset = TRUE;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
64 return NULL;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
65 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
66
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
67 void charset_to_utf8_end(CharsetTranslation *t __attr_unused__)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
68 {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
69 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
70
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
71 void charset_to_utf8_reset(CharsetTranslation *t __attr_unused__)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
72 {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
73 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
74
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
75 CharsetResult
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
76 charset_to_ucase_utf8(CharsetTranslation *t __attr_unused__,
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
77 const Buffer *src, size_t *src_pos, Buffer *dest)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
78 {
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
79 size_t size, destpos, destleft;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
80
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
81 destpos = buffer_get_used_size(dest);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
82 destleft = buffer_get_size(dest) - destpos;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
83
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
84 /* no translation needed - just copy it to outbuf uppercased */
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
85 size = buffer_get_used_size(src);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
86 if (size > destleft)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
87 size = destleft;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
88 _charset_utf8_ucase(buffer_get_data(src, NULL), size, dest, destpos);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
89 if (src_pos != NULL)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
90 *src_pos = size;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
91 return CHARSET_RET_OK;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
92 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
93
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
94 const char *
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
95 charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
96 const Buffer *data, size_t *utf8_size)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
97 {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
98 if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
99 strcasecmp(charset, "ascii") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
100 strcasecmp(charset, "UTF-8") == 0 ||
753
3521edb6c240 charset_to_ucase_utf8_string() didn't return \0 terminated string with
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
101 strcasecmp(charset, "UTF8") == 0) {
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
102 if (unknown_charset != NULL)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
103 *unknown_charset = FALSE;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
104 return _charset_utf8_ucase_strdup(data, utf8_size);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
105 } else {
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
106 if (unknown_charset != NULL)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
107 *unknown_charset = TRUE;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
108 return NULL;
753
3521edb6c240 charset_to_ucase_utf8_string() didn't return \0 terminated string with
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
109 }
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
110 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
111
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
112 #endif