Mercurial > dovecot > core-2.2
annotate src/lib-charset/charset-utf8.c @ 961:41b6754c2e35 HEAD
Didn't compile without iconv.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Mon, 13 Jan 2003 22:56:44 +0200 |
parents | fd8888f6f037 |
children | 952bf533c2ea |
rev | line source |
---|---|
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
1 /* Copyright (C) 2002 Timo Sirainen */ |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
2 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
3 #include "lib.h" |
766 | 4 #include "buffer.h" |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
5 #include "charset-utf8.h" |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
6 |
766 | 7 #include <ctype.h> |
8 | |
9 void _charset_utf8_ucase(const unsigned char *src, size_t src_size, | |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
10 buffer_t *dest, size_t destpos) |
766 | 11 { |
12 char *destbuf; | |
13 size_t i; | |
14 | |
15 destbuf = buffer_get_space(dest, destpos, src_size); | |
16 for (i = 0; i < src_size; i++) | |
17 destbuf[i] = i_toupper(src[i]); /* FIXME: utf8 */ | |
18 } | |
19 | |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
20 const char *_charset_utf8_ucase_strdup(const unsigned char *data, size_t size, |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
21 size_t *utf8_size_r) |
766 | 22 { |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
23 buffer_t *dest; |
766 | 24 |
25 dest = buffer_create_dynamic(data_stack_pool, size, (size_t)-1); | |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
26 _charset_utf8_ucase(data, size, dest, 0); |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
27 if (utf8_size_r != NULL) |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
28 *utf8_size_r = buffer_get_used_size(dest); |
766 | 29 buffer_append_c(dest, '\0'); |
30 return buffer_free_without_data(dest); | |
31 } | |
32 | |
33 | |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
34 #ifndef HAVE_ICONV_H |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
35 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
36 #include <ctype.h> |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
37 |
961 | 38 struct charset_translation { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
39 int dummy; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
40 }; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
41 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
42 static struct charset_translation ascii_translation, utf8_translation; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
43 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
44 struct charset_translation *charset_to_utf8_begin(const char *charset, |
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
45 int *unknown_charset) |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
46 { |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
47 if (unknown_charset != NULL) |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
48 *unknown_charset = FALSE; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
49 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
50 if (strcasecmp(charset, "us-ascii") == 0 || |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
51 strcasecmp(charset, "ascii") == 0) |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
52 return &ascii_translation; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
53 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
54 if (strcasecmp(charset, "UTF-8") == 0 || |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
55 strcasecmp(charset, "UTF8") == 0) |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
56 return &utf8_translation; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
57 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
58 /* no support for charsets that need translation */ |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
59 if (unknown_charset != NULL) |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
60 *unknown_charset = TRUE; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
61 return NULL; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
62 } |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
63 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
64 void charset_to_utf8_end(struct charset_translation *t __attr_unused__) |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
65 { |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
66 } |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
67 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
68 void charset_to_utf8_reset(struct charset_translation *t __attr_unused__) |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
69 { |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
70 } |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
71 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
72 enum charset_result |
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
73 charset_to_ucase_utf8(struct charset_translation *t __attr_unused__, |
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
74 const unsigned char *src, size_t *src_size, |
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
75 buffer_t *dest) |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
76 { |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
77 size_t destpos, destleft; |
766 | 78 |
79 destpos = buffer_get_used_size(dest); | |
80 destleft = buffer_get_size(dest) - destpos; | |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
81 |
766 | 82 /* no translation needed - just copy it to outbuf uppercased */ |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
83 if (*src_size > destleft) |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
84 *src_size = destleft; |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
85 _charset_utf8_ucase(src, *src_size, dest, destpos); |
766 | 86 return CHARSET_RET_OK; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
87 } |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
88 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
89 const char * |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
90 charset_to_utf8_string(const char *charset, int *unknown_charset, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
91 const unsigned char *data, size_t size, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
92 size_t *utf8_size_r) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
93 { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
94 if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 || |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
95 strcasecmp(charset, "ascii") == 0 || |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
96 strcasecmp(charset, "UTF-8") == 0 || |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
97 strcasecmp(charset, "UTF8") == 0) { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
98 if (unknown_charset != NULL) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
99 *unknown_charset = FALSE; |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
100 if (utf8_size_r != NULL) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
101 *utf8_size_r = size; |
898
0d5be52d7131
Use unsigned char* when accessing non-NUL terminating strings. Compiler
Timo Sirainen <tss@iki.fi>
parents:
792
diff
changeset
|
102 return t_strndup(data, size); |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
103 } else { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
104 if (unknown_charset != NULL) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
105 *unknown_charset = TRUE; |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
106 return NULL; |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
107 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
108 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
109 |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
110 const char * |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
111 charset_to_ucase_utf8_string(const char *charset, int *unknown_charset, |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
112 const unsigned char *data, size_t size, |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
113 size_t *utf8_size_r) |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
114 { |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
115 if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 || |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
116 strcasecmp(charset, "ascii") == 0 || |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
117 strcasecmp(charset, "UTF-8") == 0 || |
753
3521edb6c240
charset_to_ucase_utf8_string() didn't return \0 terminated string with
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
118 strcasecmp(charset, "UTF8") == 0) { |
766 | 119 if (unknown_charset != NULL) |
120 *unknown_charset = FALSE; | |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
121 return _charset_utf8_ucase_strdup(data, size, utf8_size_r); |
766 | 122 } else { |
123 if (unknown_charset != NULL) | |
124 *unknown_charset = TRUE; | |
125 return NULL; | |
753
3521edb6c240
charset_to_ucase_utf8_string() didn't return \0 terminated string with
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
126 } |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
127 } |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
128 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
129 #endif |