annotate src/lib-charset/charset-iconv.c @ 5502:212bbdc55065 HEAD

Cleanup
author Timo Sirainen <tss@iki.fi>
date Tue, 03 Apr 2007 13:13:10 +0300
parents e6cb9f75b76a
children e5451501ff2f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 /* Copyright (C) 2002 Timo Sirainen */
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
4 #include "buffer.h"
579
e524da896d92 Several minor fixes: signess, casting away const, missing static, etc.
Timo Sirainen <tss@iki.fi>
parents: 568
diff changeset
5 #include "charset-utf8.h"
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
6
1300
952bf533c2ea Better iconv() checking.
Timo Sirainen <tss@iki.fi>
parents: 903
diff changeset
7 #ifdef HAVE_ICONV
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
8
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
9 #include <iconv.h>
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
10 #include <ctype.h>
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
11
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
12 struct charset_translation {
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
13 iconv_t cd;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
14 };
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
15
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
16 struct charset_translation *charset_to_utf8_begin(const char *charset,
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
17 bool *unknown_charset)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
18 {
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
19 struct charset_translation *t;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
20 iconv_t cd;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
21
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
22 if (unknown_charset != NULL)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
23 *unknown_charset = FALSE;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
24
5502
212bbdc55065 Cleanup
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
25 if (charset_is_utf8(charset))
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
26 cd = (iconv_t)-1;
5502
212bbdc55065 Cleanup
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
27 else {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
28 cd = iconv_open("UTF-8", charset);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
29 if (cd == (iconv_t)-1) {
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
30 if (unknown_charset != NULL)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
31 *unknown_charset = TRUE;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
32 return NULL;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
33 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
34 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
35
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
36 t = i_new(struct charset_translation, 1);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
37 t->cd = cd;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
38 return t;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
39 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
40
3879
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
41 void charset_to_utf8_end(struct charset_translation **_t)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
42 {
3879
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
43 struct charset_translation *t = *_t;
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
44
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
45 *_t = NULL;
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
46
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
47 if (t->cd != (iconv_t)-1)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
48 iconv_close(t->cd);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
49 i_free(t);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
50 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
51
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
52 void charset_to_utf8_reset(struct charset_translation *t)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
53 {
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
54 if (t->cd != (iconv_t)-1)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
55 (void)iconv(t->cd, NULL, NULL, NULL, NULL);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
56 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
57
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
58 enum charset_result
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
59 charset_to_ucase_utf8(struct charset_translation *t,
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
60 const unsigned char *src, size_t *src_size,
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
61 buffer_t *dest)
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
62 {
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
63 ICONV_CONST char *ic_srcbuf;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
64 char *ic_destbuf;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
65 size_t srcleft, destpos, destleft, size;
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
66 enum charset_result ret;
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
67
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
68 destpos = buffer_get_used_size(dest);
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
69 destleft = buffer_get_size(dest) - destpos;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
70
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
71 if (t->cd == (iconv_t)-1) {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
72 /* no translation needed - just copy it to outbuf uppercased */
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
73 if (*src_size > destleft)
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
74 *src_size = destleft;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
75 _charset_utf8_ucase(src, *src_size, dest, destpos);
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
76 return CHARSET_RET_OK;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
77 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
78
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
79 size = destleft;
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
80 srcleft = *src_size;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
81 ic_srcbuf = (ICONV_CONST char *) src;
1471
8f56379c3917 Renamed buffer_*_space() to buffer_*_space_unsafe() and added several
Timo Sirainen <tss@iki.fi>
parents: 1300
diff changeset
82 ic_destbuf = buffer_append_space_unsafe(dest, destleft);
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
83
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
84 if (iconv(t->cd, &ic_srcbuf, &srcleft,
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
85 &ic_destbuf, &destleft) != (size_t)-1)
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
86 ret = CHARSET_RET_OK;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
87 else if (errno == E2BIG)
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
88 ret = CHARSET_RET_OUTPUT_FULL;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
89 else if (errno == EINVAL)
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
90 ret = CHARSET_RET_INCOMPLETE_INPUT;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
91 else {
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
92 /* should be EILSEQ */
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
93 return CHARSET_RET_INVALID_INPUT;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
94 }
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
95 size -= destleft;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
96
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
97 /* give back the memory we didn't use */
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
98 buffer_set_used_size(dest, buffer_get_used_size(dest) - destleft);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
99
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
100 *src_size -= srcleft;
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
101 _charset_utf8_ucase((unsigned char *) ic_destbuf - size, size,
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
102 dest, destpos);
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
103 return ret;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
104 }
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
105
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
106 enum charset_result
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
107 charset_to_ucase_utf8_full(struct charset_translation *t,
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
108 const unsigned char *src, size_t *src_size,
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
109 buffer_t *dest)
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
110 {
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
111 enum charset_result ret;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
112 size_t pos, used, size;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
113
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
114 for (pos = 0;;) {
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
115 size = *src_size - pos;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
116 ret = charset_to_ucase_utf8(t, src + pos, &size, dest);
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
117 pos += size;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
118
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
119 if (ret != CHARSET_RET_OUTPUT_FULL) {
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
120 *src_size = pos;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
121 return ret;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
122 }
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
123
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
124 /* force buffer to grow */
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
125 used = dest->used;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
126 size = buffer_get_size(dest) - used + 1;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
127 (void)buffer_append_space_unsafe(dest, size);
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
128 buffer_set_used_size(dest, used);
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
129 }
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
130 }
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
131
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
132 static const char *
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
133 charset_to_utf8_string_int(const char *charset, bool *unknown_charset,
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
134 const unsigned char *data, size_t size,
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
135 size_t *utf8_size_r, bool ucase)
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
136 {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
137 iconv_t cd;
611
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
138 ICONV_CONST char *inbuf;
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
139 char *outbuf, *outpos;
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
140 size_t inleft, outleft, outsize, pos;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
141
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
142 if (charset == NULL || charset_is_utf8(charset)) {
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
143 if (unknown_charset != NULL)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
144 *unknown_charset = FALSE;
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
145
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
146 if (!ucase) {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
147 if (utf8_size_r != NULL)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
148 *utf8_size_r = size;
898
0d5be52d7131 Use unsigned char* when accessing non-NUL terminating strings. Compiler
Timo Sirainen <tss@iki.fi>
parents: 792
diff changeset
149 return t_strndup(data, size);
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
150 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
151
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
152 return _charset_utf8_ucase_strdup(data, size, utf8_size_r);
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
153 }
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
154
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
155 cd = iconv_open("UTF-8", charset);
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
156 if (cd == (iconv_t)-1) {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
157 if (unknown_charset != NULL)
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
158 *unknown_charset = TRUE;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
159 return NULL;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
160 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
161
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
162 if (unknown_charset != NULL)
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
163 *unknown_charset = FALSE;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
164
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
165 inbuf = (ICONV_CONST char *) data;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
166 inleft = size;
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
167
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
168 outsize = outleft = inleft * 2;
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
169 outbuf = outpos = t_buffer_get(outsize + 1);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
170
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
171 while (iconv(cd, &inbuf, &inleft, &outpos, &outleft) == (size_t)-1) {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
172 if (errno != E2BIG) {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
173 /* invalid data */
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
174 iconv_close(cd);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
175 return NULL;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
176 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
177
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
178 /* output buffer too small, grow it */
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
179 pos = outsize - outleft;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
180 outsize *= 2;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
181 outleft = outsize - pos;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
182
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
183 outbuf = t_buffer_reget(outbuf, outsize + 1);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
184 outpos = outbuf + pos;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
185 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
186
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
187 if (utf8_size_r != NULL)
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
188 *utf8_size_r = (size_t) (outpos - outbuf);
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
189 *outpos++ = '\0';
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
190 t_buffer_alloc((size_t) (outpos - outbuf));
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
191
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
192 if (ucase)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
193 str_ucase(outbuf); /* FIXME: utf8 */
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
194
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
195 iconv_close(cd);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
196 return outbuf;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
197 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
198
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
199 const char *
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
200 charset_to_utf8_string(const char *charset, bool *unknown_charset,
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
201 const unsigned char *data, size_t size,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
202 size_t *utf8_size_r)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
203 {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
204 return charset_to_utf8_string_int(charset, unknown_charset,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
205 data, size, utf8_size_r, FALSE);
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
206 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
207
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
208 const char *
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
209 charset_to_ucase_utf8_string(const char *charset, bool *unknown_charset,
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
210 const unsigned char *data, size_t size,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
211 size_t *utf8_size_r)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
212 {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
213 return charset_to_utf8_string_int(charset, unknown_charset,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
214 data, size, utf8_size_r, TRUE);
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
215 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
216
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
217 #endif