annotate src/lib-charset/charset-iconv.c @ 6131:5f56b2eb32b3 HEAD

Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8 comparing.
author Timo Sirainen <tss@iki.fi>
date Fri, 20 Jul 2007 17:27:02 +0300
parents 0d3583b02a32
children d01522d276f6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
1 /* Copyright (C) 2002-2007 Timo Sirainen */
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
4 #include "buffer.h"
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
5 #include "unichar.h"
579
e524da896d92 Several minor fixes: signess, casting away const, missing static, etc.
Timo Sirainen <tss@iki.fi>
parents: 568
diff changeset
6 #include "charset-utf8.h"
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
7
1300
952bf533c2ea Better iconv() checking.
Timo Sirainen <tss@iki.fi>
parents: 903
diff changeset
8 #ifdef HAVE_ICONV
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
9
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
10 #include <iconv.h>
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
11 #include <ctype.h>
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
12
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
13 struct charset_translation {
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
14 iconv_t cd;
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 5502
diff changeset
15 unsigned int ucase:1;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
16 };
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
17
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 5502
diff changeset
18 struct charset_translation *
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 5502
diff changeset
19 charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
20 {
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
21 struct charset_translation *t;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
22 iconv_t cd;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
23
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 5502
diff changeset
24 if (unknown_charset_r != NULL)
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 5502
diff changeset
25 *unknown_charset_r = FALSE;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
26
5502
212bbdc55065 Cleanup
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
27 if (charset_is_utf8(charset))
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
28 cd = (iconv_t)-1;
5502
212bbdc55065 Cleanup
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
29 else {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
30 cd = iconv_open("UTF-8", charset);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
31 if (cd == (iconv_t)-1) {
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 5502
diff changeset
32 if (unknown_charset_r != NULL)
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 5502
diff changeset
33 *unknown_charset_r = TRUE;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
34 return NULL;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
35 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
36 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
37
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
38 t = i_new(struct charset_translation, 1);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
39 t->cd = cd;
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 5502
diff changeset
40 t->ucase = ucase;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
41 return t;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
42 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
43
3879
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
44 void charset_to_utf8_end(struct charset_translation **_t)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
45 {
3879
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
46 struct charset_translation *t = *_t;
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
47
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
48 *_t = NULL;
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
49
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
50 if (t->cd != (iconv_t)-1)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
51 iconv_close(t->cd);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
52 i_free(t);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
53 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
54
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
55 void charset_to_utf8_reset(struct charset_translation *t)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
56 {
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
57 if (t->cd != (iconv_t)-1)
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
58 (void)iconv(t->cd, NULL, NULL, NULL, NULL);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
59 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
60
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
61 static bool
6122
d86581f4a0c6 charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents: 6114
diff changeset
62 charset_to_utf8_try(struct charset_translation *t,
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
63 const unsigned char *src, size_t *src_size, buffer_t *dest,
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
64 enum charset_result *result)
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
65 {
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
66 ICONV_CONST char *ic_srcbuf;
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
67 char tmpbuf[8192], *ic_destbuf;
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
68 size_t srcleft, destleft;
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
69 bool ret = TRUE;
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
70
1991
689f791b480f iconv_t isn't necessarily pointer.
Timo Sirainen <tss@iki.fi>
parents: 1471
diff changeset
71 if (t->cd == (iconv_t)-1) {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
72 /* no translation needed - just copy it to outbuf uppercased */
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
73 *result = CHARSET_RET_OK;
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
74 if (!t->ucase) {
6122
d86581f4a0c6 charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents: 6114
diff changeset
75 buffer_append(dest, src, *src_size);
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
76 return TRUE;
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
77 }
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
78
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
79 if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
80 *result = CHARSET_RET_INVALID_INPUT;
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
81 return TRUE;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
82 }
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
83 if (!t->ucase) {
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
84 destleft = buffer_get_size(dest) - dest->used;
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
85 if (destleft < *src_size) {
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
86 /* The buffer is most likely too small to hold the
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
87 output, so increase it at least to the input size. */
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
88 destleft = *src_size;
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
89 }
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
90 ic_destbuf = buffer_append_space_unsafe(dest, destleft);
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
91 } else {
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
92 destleft = sizeof(tmpbuf);
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
93 ic_destbuf = tmpbuf;
6122
d86581f4a0c6 charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents: 6114
diff changeset
94 }
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
95
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
96 srcleft = *src_size;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
97 ic_srcbuf = (ICONV_CONST char *) src;
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
98
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
99 if (iconv(t->cd, &ic_srcbuf, &srcleft,
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
100 &ic_destbuf, &destleft) != (size_t)-1)
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
101 *result = CHARSET_RET_OK;
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
102 else if (errno == E2BIG) {
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
103 /* set result just to avoid compiler warning */
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
104 *result = CHARSET_RET_INCOMPLETE_INPUT;
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
105 ret = FALSE;
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
106 } else if (errno == EINVAL)
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
107 *result = CHARSET_RET_INCOMPLETE_INPUT;
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
108 else {
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
109 /* should be EILSEQ */
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
110 *result = CHARSET_RET_INVALID_INPUT;
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
111 return TRUE;
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
112 }
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
113 *src_size -= srcleft;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
114
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
115 if (!t->ucase) {
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
116 /* give back the memory we didn't use */
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
117 buffer_set_used_size(dest, dest->used - destleft);
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
118 } else {
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
119 size_t tmpsize = sizeof(tmpbuf) - destleft;
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
120
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
121 /* we just converted data to UTF-8, it can't be invalid */
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
122 if (uni_utf8_to_decomposed_titlecase(tmpbuf, tmpsize, dest) < 0)
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
123 i_unreached();
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 5502
diff changeset
124 }
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
125 return ret;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
126 }
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
127
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
128 enum charset_result
6122
d86581f4a0c6 charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents: 6114
diff changeset
129 charset_to_utf8(struct charset_translation *t,
d86581f4a0c6 charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents: 6114
diff changeset
130 const unsigned char *src, size_t *src_size, buffer_t *dest)
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
131 {
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
132 enum charset_result result;
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
133 size_t pos, used, size;
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
134 bool ret;
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
135
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
136 for (pos = 0;;) {
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
137 size = *src_size - pos;
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
138 ret = charset_to_utf8_try(t, src + pos, &size, dest, &result);
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
139 pos += size;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
140
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
141 if (ret) {
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
142 *src_size = pos;
6125
b9c1336fd4e4 Removed CHARSET_RET_OUTPUT_FULL, it can't happen anymore.
Timo Sirainen <tss@iki.fi>
parents: 6122
diff changeset
143 return result;
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
144 }
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
145
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
146 /* force buffer to grow */
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
147 used = dest->used;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
148 size = buffer_get_size(dest) - used + 1;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
149 (void)buffer_append_space_unsafe(dest, size);
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
150 buffer_set_used_size(dest, used);
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
151 }
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
152 }
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
153
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
154 #endif