Mercurial > dovecot > original-hg > dovecot-1.2
annotate src/lib-charset/charset-iconv.c @ 6131:5f56b2eb32b3 HEAD
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
comparing.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Fri, 20 Jul 2007 17:27:02 +0300 |
parents | 0d3583b02a32 |
children | d01522d276f6 |
rev | line source |
---|---|
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
1 /* Copyright (C) 2002-2007 Timo Sirainen */ |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
2 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
3 #include "lib.h" |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
4 #include "buffer.h" |
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
5 #include "unichar.h" |
579
e524da896d92
Several minor fixes: signess, casting away const, missing static, etc.
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
6 #include "charset-utf8.h" |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
7 |
1300 | 8 #ifdef HAVE_ICONV |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
9 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
10 #include <iconv.h> |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
11 #include <ctype.h> |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
12 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
13 struct charset_translation { |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
14 iconv_t cd; |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
15 unsigned int ucase:1; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
16 }; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
17 |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
18 struct charset_translation * |
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
19 charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
20 { |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
21 struct charset_translation *t; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
22 iconv_t cd; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
23 |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
24 if (unknown_charset_r != NULL) |
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
25 *unknown_charset_r = FALSE; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
26 |
5502 | 27 if (charset_is_utf8(charset)) |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
28 cd = (iconv_t)-1; |
5502 | 29 else { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
30 cd = iconv_open("UTF-8", charset); |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
31 if (cd == (iconv_t)-1) { |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
32 if (unknown_charset_r != NULL) |
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
33 *unknown_charset_r = TRUE; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
34 return NULL; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
35 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
36 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
37 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
38 t = i_new(struct charset_translation, 1); |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
39 t->cd = cd; |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
40 t->ucase = ucase; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
41 return t; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
42 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
43 |
3879
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
44 void charset_to_utf8_end(struct charset_translation **_t) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
45 { |
3879
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
46 struct charset_translation *t = *_t; |
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
47 |
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
48 *_t = NULL; |
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
49 |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
50 if (t->cd != (iconv_t)-1) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
51 iconv_close(t->cd); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
52 i_free(t); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
53 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
54 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
55 void charset_to_utf8_reset(struct charset_translation *t) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
56 { |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
57 if (t->cd != (iconv_t)-1) |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
58 (void)iconv(t->cd, NULL, NULL, NULL, NULL); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
59 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
60 |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
61 static bool |
6122
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
62 charset_to_utf8_try(struct charset_translation *t, |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
63 const unsigned char *src, size_t *src_size, buffer_t *dest, |
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
64 enum charset_result *result) |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
65 { |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
66 ICONV_CONST char *ic_srcbuf; |
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
67 char tmpbuf[8192], *ic_destbuf; |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
68 size_t srcleft, destleft; |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
69 bool ret = TRUE; |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
70 |
1991
689f791b480f
iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents:
1471
diff
changeset
|
71 if (t->cd == (iconv_t)-1) { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
72 /* no translation needed - just copy it to outbuf uppercased */ |
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
73 *result = CHARSET_RET_OK; |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
74 if (!t->ucase) { |
6122
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
75 buffer_append(dest, src, *src_size); |
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
76 return TRUE; |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
77 } |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
78 |
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
79 if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0) |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
80 *result = CHARSET_RET_INVALID_INPUT; |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
81 return TRUE; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
82 } |
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
83 if (!t->ucase) { |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
84 destleft = buffer_get_size(dest) - dest->used; |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
85 if (destleft < *src_size) { |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
86 /* The buffer is most likely too small to hold the |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
87 output, so increase it at least to the input size. */ |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
88 destleft = *src_size; |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
89 } |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
90 ic_destbuf = buffer_append_space_unsafe(dest, destleft); |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
91 } else { |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
92 destleft = sizeof(tmpbuf); |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
93 ic_destbuf = tmpbuf; |
6122
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
94 } |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
95 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
96 srcleft = *src_size; |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
97 ic_srcbuf = (ICONV_CONST char *) src; |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
98 |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
99 if (iconv(t->cd, &ic_srcbuf, &srcleft, |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
100 &ic_destbuf, &destleft) != (size_t)-1) |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
101 *result = CHARSET_RET_OK; |
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
102 else if (errno == E2BIG) { |
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
103 /* set result just to avoid compiler warning */ |
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
104 *result = CHARSET_RET_INCOMPLETE_INPUT; |
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
105 ret = FALSE; |
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
106 } else if (errno == EINVAL) |
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
107 *result = CHARSET_RET_INCOMPLETE_INPUT; |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
108 else { |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
109 /* should be EILSEQ */ |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
110 *result = CHARSET_RET_INVALID_INPUT; |
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
111 return TRUE; |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
112 } |
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
113 *src_size -= srcleft; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
114 |
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
115 if (!t->ucase) { |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
116 /* give back the memory we didn't use */ |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
117 buffer_set_used_size(dest, dest->used - destleft); |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
118 } else { |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
119 size_t tmpsize = sizeof(tmpbuf) - destleft; |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
120 |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
121 /* we just converted data to UTF-8, it can't be invalid */ |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
122 if (uni_utf8_to_decomposed_titlecase(tmpbuf, tmpsize, dest) < 0) |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
123 i_unreached(); |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
5502
diff
changeset
|
124 } |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
125 return ret; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
126 } |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
127 |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
128 enum charset_result |
6122
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
129 charset_to_utf8(struct charset_translation *t, |
d86581f4a0c6
charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents:
6114
diff
changeset
|
130 const unsigned char *src, size_t *src_size, buffer_t *dest) |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
131 { |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
132 enum charset_result result; |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
133 size_t pos, used, size; |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
134 bool ret; |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
135 |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
136 for (pos = 0;;) { |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
137 size = *src_size - pos; |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
138 ret = charset_to_utf8_try(t, src + pos, &size, dest, &result); |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
139 pos += size; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
140 |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
141 if (ret) { |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
142 *src_size = pos; |
6125
b9c1336fd4e4
Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents:
6122
diff
changeset
|
143 return result; |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
144 } |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
145 |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
146 /* force buffer to grow */ |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
147 used = dest->used; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
148 size = buffer_get_size(dest) - used + 1; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
149 (void)buffer_append_space_unsafe(dest, size); |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
150 buffer_set_used_size(dest, used); |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
151 } |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
152 } |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
153 |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
154 #endif |