Mercurial > dovecot > original-hg > dovecot-1.2
annotate src/lib-charset/charset-iconv.c @ 6122:d86581f4a0c6 HEAD
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Fri, 20 Jul 2007 16:22:59 +0300 |
parents | 325667778ae3 |
children | b9c1336fd4e4 |
rev | line source |
---|---|
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
1 /* Copyright (C) 2002 Timo Sirainen */ |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
2 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
3 #include "lib.h" |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
4 #include "buffer.h" |
579
e524da896d92
Several minor fixes: signess, casting away const, missing static, etc.
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
5 #include "charset-utf8.h" |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
6 |
1300 | 7 #ifdef HAVE_ICONV |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
8 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
9 #include <iconv.h> |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
10 #include <ctype.h> |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
11 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
12 struct charset_translation { |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
13 iconv_t cd; |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
14 unsigned int ucase:1; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
15 }; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
16 |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
17 struct charset_translation * |
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
18 charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
19 { |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
20 struct charset_translation *t; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
21 iconv_t cd; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
22 |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
23 if (unknown_charset_r != NULL) |
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
24 *unknown_charset_r = FALSE; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
25 |
5502 | 26 if (charset_is_utf8(charset)) |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
27 cd = (iconv_t)-1; |
5502 | 28 else { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
29 cd = iconv_open("UTF-8", charset); |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
30 if (cd == (iconv_t)-1) { |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
31 if (unknown_charset_r != NULL) |
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
32 *unknown_charset_r = TRUE; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
33 return NULL; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
34 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
35 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
36 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
37 t = i_new(struct charset_translation, 1); |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
38 t->cd = cd; |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
39 t->ucase = ucase; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
40 return t; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
41 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
42 |
3879
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
43 void charset_to_utf8_end(struct charset_translation **_t) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
44 { |
3879
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
45 struct charset_translation *t = *_t; |
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
46 |
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
47 *_t = NULL; |
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
48 |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
49 if (t->cd != (iconv_t)-1) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
50 iconv_close(t->cd); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
51 i_free(t); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
52 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
53 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
54 void charset_to_utf8_reset(struct charset_translation *t) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
55 { |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
56 if (t->cd != (iconv_t)-1) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
57 (void)iconv(t->cd, NULL, NULL, NULL, NULL); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
58 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
59 |
6122
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
60 static enum charset_result |
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
61 charset_to_utf8_try(struct charset_translation *t, |
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
62 const unsigned char *src, size_t *src_size, buffer_t *dest) |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
63 { |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
64 ICONV_CONST char *ic_srcbuf; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
65 char *ic_destbuf; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
66 size_t srcleft, destpos, destleft, size; |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
67 enum charset_result ret; |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
68 |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
69 destpos = dest->used; |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
70 if (t->cd == (iconv_t)-1) { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
71 /* no translation needed - just copy it to outbuf uppercased */ |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
72 if (t->ucase) |
6114
325667778ae3
_charset_utf8_ucase() -> charset_utf8_ucase_write(),
Timo Sirainen <tss@iki.fi>
parents:
6112
diff
changeset
|
73 charset_utf8_ucase_write(dest, destpos, src, *src_size); |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
74 else |
6122
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
75 buffer_append(dest, src, *src_size); |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
76 return CHARSET_RET_OK; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
77 } |
6122
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
78 destleft = buffer_get_size(dest) - destpos; |
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
79 if (destleft < *src_size) { |
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
80 /* The buffer is most likely too small to hold the output, |
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
81 so increase it at least to the input size. */ |
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
82 destleft = *src_size; |
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
83 } |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
84 |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
85 size = destleft; |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
86 srcleft = *src_size; |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
87 ic_srcbuf = (ICONV_CONST char *) src; |
1471
8f56379c3917
Renamed buffer_*_space() to buffer_*_space_unsafe() and added several
Timo Sirainen <tss@iki.fi>
parents:
1300
diff
changeset
|
88 ic_destbuf = buffer_append_space_unsafe(dest, destleft); |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
89 |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
90 if (iconv(t->cd, &ic_srcbuf, &srcleft, |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
91 &ic_destbuf, &destleft) != (size_t)-1) |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
92 ret = CHARSET_RET_OK; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
93 else if (errno == E2BIG) |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
94 ret = CHARSET_RET_OUTPUT_FULL; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
95 else if (errno == EINVAL) |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
96 ret = CHARSET_RET_INCOMPLETE_INPUT; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
97 else { |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
98 /* should be EILSEQ */ |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
99 return CHARSET_RET_INVALID_INPUT; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
100 } |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
101 size -= destleft; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
102 |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
103 /* give back the memory we didn't use */ |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
104 buffer_set_used_size(dest, dest->used - destleft); |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
105 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
106 *src_size -= srcleft; |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
107 if (t->ucase) { |
6114
325667778ae3
_charset_utf8_ucase() -> charset_utf8_ucase_write(),
Timo Sirainen <tss@iki.fi>
parents:
6112
diff
changeset
|
108 charset_utf8_ucase_write(dest, destpos, |
325667778ae3
_charset_utf8_ucase() -> charset_utf8_ucase_write(),
Timo Sirainen <tss@iki.fi>
parents:
6112
diff
changeset
|
109 (unsigned char *)ic_destbuf - size, |
325667778ae3
_charset_utf8_ucase() -> charset_utf8_ucase_write(),
Timo Sirainen <tss@iki.fi>
parents:
6112
diff
changeset
|
110 size); |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
111 } |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
112 return ret; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
113 } |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
114 |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
115 enum charset_result |
6122
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
116 charset_to_utf8(struct charset_translation *t, |
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
117 const unsigned char *src, size_t *src_size, buffer_t *dest) |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
118 { |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
119 enum charset_result ret; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
120 size_t pos, used, size; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
121 |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
122 for (pos = 0;;) { |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
123 size = *src_size - pos; |
6122
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
124 ret = charset_to_utf8_try(t, src + pos, &size, dest); |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
125 pos += size; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
126 |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
127 if (ret != CHARSET_RET_OUTPUT_FULL) { |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
128 *src_size = pos; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
129 return ret; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
130 } |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
131 |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
132 /* force buffer to grow */ |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
133 used = dest->used; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
134 size = buffer_get_size(dest) - used + 1; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
135 (void)buffer_append_space_unsafe(dest, size); |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
136 buffer_set_used_size(dest, used); |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
137 } |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
138 } |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
139 |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
140 static const char * |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
1991
diff
changeset
|
141 charset_to_utf8_string_int(const char *charset, bool *unknown_charset, |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
142 const unsigned char *data, size_t size, |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
1991
diff
changeset
|
143 size_t *utf8_size_r, bool ucase) |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
144 { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
145 iconv_t cd; |
611
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
146 ICONV_CONST char *inbuf; |
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
147 char *outbuf, *outpos; |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
148 size_t inleft, outleft, outsize, pos; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
149 |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
150 if (charset == NULL || charset_is_utf8(charset)) { |
766 | 151 if (unknown_charset != NULL) |
152 *unknown_charset = FALSE; | |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
153 |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
154 if (!ucase) { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
155 if (utf8_size_r != NULL) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
156 *utf8_size_r = size; |
898
0d5be52d7131
Use unsigned char* when accessing non-NUL terminating strings. Compiler
Timo Sirainen <tss@iki.fi>
parents:
792
diff
changeset
|
157 return t_strndup(data, size); |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
158 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
159 |
6114
325667778ae3
_charset_utf8_ucase() -> charset_utf8_ucase_write(),
Timo Sirainen <tss@iki.fi>
parents:
6112
diff
changeset
|
160 return charset_utf8_ucase_strdup(data, size, utf8_size_r); |
766 | 161 } |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
162 |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
163 cd = iconv_open("UTF-8", charset); |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
164 if (cd == (iconv_t)-1) { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
165 if (unknown_charset != NULL) |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
166 *unknown_charset = TRUE; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
167 return NULL; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
168 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
169 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
170 if (unknown_charset != NULL) |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
171 *unknown_charset = FALSE; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
172 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
173 inbuf = (ICONV_CONST char *) data; |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
174 inleft = size; |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
175 |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
176 outsize = outleft = inleft * 2; |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
177 outbuf = outpos = t_buffer_get(outsize + 1); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
178 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
179 while (iconv(cd, &inbuf, &inleft, &outpos, &outleft) == (size_t)-1) { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
180 if (errno != E2BIG) { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
181 /* invalid data */ |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
182 iconv_close(cd); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
183 return NULL; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
184 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
185 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
186 /* output buffer too small, grow it */ |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
187 pos = outsize - outleft; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
188 outsize *= 2; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
189 outleft = outsize - pos; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
190 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
191 outbuf = t_buffer_reget(outbuf, outsize + 1); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
192 outpos = outbuf + pos; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
193 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
194 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
195 if (utf8_size_r != NULL) |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
196 *utf8_size_r = (size_t) (outpos - outbuf); |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
197 *outpos++ = '\0'; |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
198 t_buffer_alloc((size_t) (outpos - outbuf)); |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
199 |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
200 if (ucase) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
201 str_ucase(outbuf); /* FIXME: utf8 */ |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
202 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
203 iconv_close(cd); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
204 return outbuf; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
205 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
206 |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
207 const char * |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
1991
diff
changeset
|
208 charset_to_utf8_string(const char *charset, bool *unknown_charset, |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
209 const unsigned char *data, size_t size, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
210 size_t *utf8_size_r) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
211 { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
212 return charset_to_utf8_string_int(charset, unknown_charset, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
213 data, size, utf8_size_r, FALSE); |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
214 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
215 |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
216 const char * |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
1991
diff
changeset
|
217 charset_to_ucase_utf8_string(const char *charset, bool *unknown_charset, |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
218 const unsigned char *data, size_t size, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
219 size_t *utf8_size_r) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
220 { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
221 return charset_to_utf8_string_int(charset, unknown_charset, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
222 data, size, utf8_size_r, TRUE); |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
223 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
224 |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
225 #endif |