annotate src/lib-charset/charset-iconv.c @ 3863:55df57c028d4 HEAD

Added "bool" type and changed all ints that were used as booleans to bool.
author Timo Sirainen <tss@iki.fi>
date Fri, 13 Jan 2006 22:25:57 +0200
parents 689f791b480f
children 928229f8b3e6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 /* Copyright (C) 2002 Timo Sirainen */
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
4 #include "buffer.h"
579
e524da896d92 Several minor fixes: signess, casting away const, missing static, etc.
Timo Sirainen <tss@iki.fi>
parents: 568
diff changeset
5 #include "charset-utf8.h"
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
6
1300
952bf533c2ea Better iconv() checking.
Timo Sirainen <tss@iki.fi>
parents: 903
diff changeset
7 #ifdef HAVE_ICONV
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
8
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
9 #include <iconv.h>
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
10 #include <ctype.h>
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
11
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
12 struct charset_translation {
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
13 iconv_t cd;
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
14 bool ascii;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
15 };
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
16
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
17 struct charset_translation *charset_to_utf8_begin(const char *charset,
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
18 bool *unknown_charset)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
19 {
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
20 struct charset_translation *t;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
21 iconv_t cd;
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
22 bool ascii;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
23
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
24 if (unknown_charset != NULL)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
25 *unknown_charset = FALSE;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
26
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
27 if (strcasecmp(charset, "us-ascii") == 0 ||
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
28 strcasecmp(charset, "ascii") == 0) {
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
29 cd = (iconv_t)-1;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
30 ascii = TRUE;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
31 } else if (strcasecmp(charset, "UTF-8") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
32 strcasecmp(charset, "UTF8") == 0) {
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
33 cd = (iconv_t)-1;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
34 ascii = FALSE;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
35 } else {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
36 ascii = FALSE;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
37 cd = iconv_open("UTF-8", charset);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
38 if (cd == (iconv_t)-1) {
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
39 if (unknown_charset != NULL)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
40 *unknown_charset = TRUE;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
41 return NULL;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
42 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
43 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
44
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
45 t = i_new(struct charset_translation, 1);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
46 t->cd = cd;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
47 t->ascii = ascii;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
48 return t;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
49 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
50
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
51 void charset_to_utf8_end(struct charset_translation *t)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
52 {
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
53 if (t->cd != (iconv_t)-1)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
54 iconv_close(t->cd);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
55 i_free(t);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
56 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
57
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
58 void charset_to_utf8_reset(struct charset_translation *t)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
59 {
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
60 if (t->cd != (iconv_t)-1)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
61 (void)iconv(t->cd, NULL, NULL, NULL, NULL);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
62 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
63
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
64 enum charset_result
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
65 charset_to_ucase_utf8(struct charset_translation *t,
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
66 const unsigned char *src, size_t *src_size,
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
67 buffer_t *dest)
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
68 {
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
69 ICONV_CONST char *ic_srcbuf;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
70 char *ic_destbuf;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
71 size_t srcleft, destpos, destleft, size;
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
72 enum charset_result ret;
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
73
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
74 destpos = buffer_get_used_size(dest);
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
75 destleft = buffer_get_size(dest) - destpos;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
76
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
77 if (t->cd == (iconv_t)-1) {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
78 /* no translation needed - just copy it to outbuf uppercased */
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
79 if (*src_size > destleft)
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
80 *src_size = destleft;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
81 _charset_utf8_ucase(src, *src_size, dest, destpos);
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
82 return CHARSET_RET_OK;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
83 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
84
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
85 size = destleft;
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
86 srcleft = *src_size;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
87 ic_srcbuf = (ICONV_CONST char *) src;
1471
8f56379c3917 Renamed buffer_*_space() to buffer_*_space_unsafe() and added several
Timo Sirainen <tss@iki.fi>
parents: 1300
diff changeset
88 ic_destbuf = buffer_append_space_unsafe(dest, destleft);
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
89
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
90 if (iconv(t->cd, &ic_srcbuf, &srcleft,
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
91 &ic_destbuf, &destleft) != (size_t)-1)
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
92 ret = CHARSET_RET_OK;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
93 else if (errno == E2BIG)
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
94 ret = CHARSET_RET_OUTPUT_FULL;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
95 else if (errno == EINVAL)
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
96 ret = CHARSET_RET_INCOMPLETE_INPUT;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
97 else {
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
98 /* should be EILSEQ */
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
99 return CHARSET_RET_INVALID_INPUT;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
100 }
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
101 size -= destleft;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
102
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
103 /* give back the memory we didn't use */
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
104 buffer_set_used_size(dest, buffer_get_used_size(dest) - destleft);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
105
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
106 *src_size -= srcleft;
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
107 _charset_utf8_ucase((unsigned char *) ic_destbuf - size, size,
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
108 dest, destpos);
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
109 return ret;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
110 }
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
111
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
112 static const char *
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
113 charset_to_utf8_string_int(const char *charset, bool *unknown_charset,
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
114 const unsigned char *data, size_t size,
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
115 size_t *utf8_size_r, bool ucase)
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
116 {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
117 iconv_t cd;
611
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
118 ICONV_CONST char *inbuf;
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
119 char *outbuf, *outpos;
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
120 size_t inleft, outleft, outsize, pos;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
121
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
122 if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
123 strcasecmp(charset, "ascii") == 0 ||
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
124 strcasecmp(charset, "UTF-8") == 0 ||
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
125 strcasecmp(charset, "UTF8") == 0) {
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
126 if (unknown_charset != NULL)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
127 *unknown_charset = FALSE;
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
128
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
129 if (!ucase) {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
130 if (utf8_size_r != NULL)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
131 *utf8_size_r = size;
898
0d5be52d7131 Use unsigned char* when accessing non-NUL terminating strings. Compiler
Timo Sirainen <tss@iki.fi>
parents: 792
diff changeset
132 return t_strndup(data, size);
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
133 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
134
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
135 return _charset_utf8_ucase_strdup(data, size, utf8_size_r);
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
136 }
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
137
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
138 cd = iconv_open("UTF-8", charset);
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
139 if (cd == (iconv_t)-1) {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
140 if (unknown_charset != NULL)
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
141 *unknown_charset = TRUE;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
142 return NULL;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
143 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
144
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
145 if (unknown_charset != NULL)
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
146 *unknown_charset = FALSE;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
147
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
148 inbuf = (ICONV_CONST char *) data;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
149 inleft = size;
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
150
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
151 outsize = outleft = inleft * 2;
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
152 outbuf = outpos = t_buffer_get(outsize + 1);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
153
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
154 while (iconv(cd, &inbuf, &inleft, &outpos, &outleft) == (size_t)-1) {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
155 if (errno != E2BIG) {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
156 /* invalid data */
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
157 iconv_close(cd);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
158 return NULL;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
159 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
160
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
161 /* output buffer too small, grow it */
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
162 pos = outsize - outleft;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
163 outsize *= 2;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
164 outleft = outsize - pos;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
165
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
166 outbuf = t_buffer_reget(outbuf, outsize + 1);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
167 outpos = outbuf + pos;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
168 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
169
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
170 if (utf8_size_r != NULL)
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
171 *utf8_size_r = (size_t) (outpos - outbuf);
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
172 *outpos++ = '\0';
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
173 t_buffer_alloc((size_t) (outpos - outbuf));
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
174
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
175 if (ucase)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
176 str_ucase(outbuf); /* FIXME: utf8 */
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
177
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
178 iconv_close(cd);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
179 return outbuf;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
180 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
181
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
182 const char *
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
183 charset_to_utf8_string(const char *charset, bool *unknown_charset,
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
184 const unsigned char *data, size_t size,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
185 size_t *utf8_size_r)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
186 {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
187 return charset_to_utf8_string_int(charset, unknown_charset,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
188 data, size, utf8_size_r, FALSE);
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
189 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
190
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
191 const char *
3863
55df57c028d4 Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents: 1991
diff changeset
192 charset_to_ucase_utf8_string(const char *charset, bool *unknown_charset,
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
193 const unsigned char *data, size_t size,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
194 size_t *utf8_size_r)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
195 {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
196 return charset_to_utf8_string_int(charset, unknown_charset,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
197 data, size, utf8_size_r, TRUE);
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
198 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
199
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
200 #endif