Mercurial > dovecot > original-hg > dovecot-1.2
annotate src/lib-charset/charset-iconv.c @ 5502:212bbdc55065 HEAD
Cleanup
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Tue, 03 Apr 2007 13:13:10 +0300 |
parents | e6cb9f75b76a |
children | e5451501ff2f |
rev | line source |
---|---|
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
1 /* Copyright (C) 2002 Timo Sirainen */ |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
2 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
3 #include "lib.h" |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
4 #include "buffer.h" |
579
e524da896d92
Several minor fixes: signess, casting away const, missing static, etc.
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
5 #include "charset-utf8.h" |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
6 |
1300 | 7 #ifdef HAVE_ICONV |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
8 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
9 #include <iconv.h> |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
10 #include <ctype.h> |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
11 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
12 struct charset_translation { |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
13 iconv_t cd; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
14 }; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
15 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
16 struct charset_translation *charset_to_utf8_begin(const char *charset, |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
1991
diff
changeset
|
17 bool *unknown_charset) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
18 { |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
19 struct charset_translation *t; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
20 iconv_t cd; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
21 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
22 if (unknown_charset != NULL) |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
23 *unknown_charset = FALSE; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
24 |
5502 | 25 if (charset_is_utf8(charset)) |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
26 cd = (iconv_t)-1; |
5502 | 27 else { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
28 cd = iconv_open("UTF-8", charset); |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
29 if (cd == (iconv_t)-1) { |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
30 if (unknown_charset != NULL) |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
31 *unknown_charset = TRUE; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
32 return NULL; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
33 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
34 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
35 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
36 t = i_new(struct charset_translation, 1); |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
37 t->cd = cd; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
38 return t; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
39 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
40 |
3879
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
41 void charset_to_utf8_end(struct charset_translation **_t) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
42 { |
3879
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
43 struct charset_translation *t = *_t; |
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
44 |
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
45 *_t = NULL; |
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
46 |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
47 if (t->cd != (iconv_t)-1) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
48 iconv_close(t->cd); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
49 i_free(t); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
50 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
51 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
52 void charset_to_utf8_reset(struct charset_translation *t) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
53 { |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
54 if (t->cd != (iconv_t)-1) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
55 (void)iconv(t->cd, NULL, NULL, NULL, NULL); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
56 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
57 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
58 enum charset_result |
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
59 charset_to_ucase_utf8(struct charset_translation *t, |
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
60 const unsigned char *src, size_t *src_size, |
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
61 buffer_t *dest) |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
62 { |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
63 ICONV_CONST char *ic_srcbuf; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
64 char *ic_destbuf; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
65 size_t srcleft, destpos, destleft, size; |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
66 enum charset_result ret; |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
67 |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
68 destpos = buffer_get_used_size(dest); |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
69 destleft = buffer_get_size(dest) - destpos; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
70 |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
71 if (t->cd == (iconv_t)-1) { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
72 /* no translation needed - just copy it to outbuf uppercased */ |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
73 if (*src_size > destleft) |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
74 *src_size = destleft; |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
75 _charset_utf8_ucase(src, *src_size, dest, destpos); |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
76 return CHARSET_RET_OK; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
77 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
78 |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
79 size = destleft; |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
80 srcleft = *src_size; |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
81 ic_srcbuf = (ICONV_CONST char *) src; |
1471
8f56379c3917
Renamed buffer_*_space() to buffer_*_space_unsafe() and added several
Timo Sirainen <tss@iki.fi>
parents:
1300
diff
changeset
|
82 ic_destbuf = buffer_append_space_unsafe(dest, destleft); |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
83 |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
84 if (iconv(t->cd, &ic_srcbuf, &srcleft, |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
85 &ic_destbuf, &destleft) != (size_t)-1) |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
86 ret = CHARSET_RET_OK; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
87 else if (errno == E2BIG) |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
88 ret = CHARSET_RET_OUTPUT_FULL; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
89 else if (errno == EINVAL) |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
90 ret = CHARSET_RET_INCOMPLETE_INPUT; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
91 else { |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
92 /* should be EILSEQ */ |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
93 return CHARSET_RET_INVALID_INPUT; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
94 } |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
95 size -= destleft; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
96 |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
97 /* give back the memory we didn't use */ |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
98 buffer_set_used_size(dest, buffer_get_used_size(dest) - destleft); |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
99 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
100 *src_size -= srcleft; |
766 | 101 _charset_utf8_ucase((unsigned char *) ic_destbuf - size, size, |
102 dest, destpos); | |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
103 return ret; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
104 } |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
105 |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
106 enum charset_result |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
107 charset_to_ucase_utf8_full(struct charset_translation *t, |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
108 const unsigned char *src, size_t *src_size, |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
109 buffer_t *dest) |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
110 { |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
111 enum charset_result ret; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
112 size_t pos, used, size; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
113 |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
114 for (pos = 0;;) { |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
115 size = *src_size - pos; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
116 ret = charset_to_ucase_utf8(t, src + pos, &size, dest); |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
117 pos += size; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
118 |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
119 if (ret != CHARSET_RET_OUTPUT_FULL) { |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
120 *src_size = pos; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
121 return ret; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
122 } |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
123 |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
124 /* force buffer to grow */ |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
125 used = dest->used; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
126 size = buffer_get_size(dest) - used + 1; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
127 (void)buffer_append_space_unsafe(dest, size); |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
128 buffer_set_used_size(dest, used); |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
129 } |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
130 } |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
131 |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
132 static const char * |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
1991
diff
changeset
|
133 charset_to_utf8_string_int(const char *charset, bool *unknown_charset, |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
134 const unsigned char *data, size_t size, |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
1991
diff
changeset
|
135 size_t *utf8_size_r, bool ucase) |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
136 { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
137 iconv_t cd; |
611
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
138 ICONV_CONST char *inbuf; |
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
139 char *outbuf, *outpos; |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
140 size_t inleft, outleft, outsize, pos; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
141 |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
142 if (charset == NULL || charset_is_utf8(charset)) { |
766 | 143 if (unknown_charset != NULL) |
144 *unknown_charset = FALSE; | |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
145 |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
146 if (!ucase) { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
147 if (utf8_size_r != NULL) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
148 *utf8_size_r = size; |
898
0d5be52d7131
Use unsigned char* when accessing non-NUL terminating strings. Compiler
Timo Sirainen <tss@iki.fi>
parents:
792
diff
changeset
|
149 return t_strndup(data, size); |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
150 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
151 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
152 return _charset_utf8_ucase_strdup(data, size, utf8_size_r); |
766 | 153 } |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
154 |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
155 cd = iconv_open("UTF-8", charset); |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
156 if (cd == (iconv_t)-1) { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
157 if (unknown_charset != NULL) |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
158 *unknown_charset = TRUE; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
159 return NULL; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
160 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
161 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
162 if (unknown_charset != NULL) |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
163 *unknown_charset = FALSE; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
164 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
165 inbuf = (ICONV_CONST char *) data; |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
166 inleft = size; |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
167 |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
168 outsize = outleft = inleft * 2; |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
169 outbuf = outpos = t_buffer_get(outsize + 1); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
170 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
171 while (iconv(cd, &inbuf, &inleft, &outpos, &outleft) == (size_t)-1) { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
172 if (errno != E2BIG) { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
173 /* invalid data */ |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
174 iconv_close(cd); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
175 return NULL; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
176 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
177 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
178 /* output buffer too small, grow it */ |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
179 pos = outsize - outleft; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
180 outsize *= 2; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
181 outleft = outsize - pos; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
182 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
183 outbuf = t_buffer_reget(outbuf, outsize + 1); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
184 outpos = outbuf + pos; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
185 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
186 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
187 if (utf8_size_r != NULL) |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
188 *utf8_size_r = (size_t) (outpos - outbuf); |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
189 *outpos++ = '\0'; |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
190 t_buffer_alloc((size_t) (outpos - outbuf)); |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
191 |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
192 if (ucase) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
193 str_ucase(outbuf); /* FIXME: utf8 */ |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
194 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
195 iconv_close(cd); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
196 return outbuf; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
197 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
198 |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
199 const char * |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
1991
diff
changeset
|
200 charset_to_utf8_string(const char *charset, bool *unknown_charset, |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
201 const unsigned char *data, size_t size, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
202 size_t *utf8_size_r) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
203 { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
204 return charset_to_utf8_string_int(charset, unknown_charset, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
205 data, size, utf8_size_r, FALSE); |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
206 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
207 |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
208 const char * |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
1991
diff
changeset
|
209 charset_to_ucase_utf8_string(const char *charset, bool *unknown_charset, |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
210 const unsigned char *data, size_t size, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
211 size_t *utf8_size_r) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
212 { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
213 return charset_to_utf8_string_int(charset, unknown_charset, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
214 data, size, utf8_size_r, TRUE); |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
215 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
216 |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
217 #endif |