Mercurial > dovecot > original-hg > dovecot-1.2
annotate src/lib-charset/charset-iconv.c @ 898:0d5be52d7131 HEAD
Use unsigned char* when accessing non-NUL terminating strings. Compiler
warnings would then notify about accidentally passing them to functions which
require them NUL-terminated. Changed a few functions to use void* to avoid
unneeded casting.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 04 Jan 2003 19:26:29 +0200 |
parents | d573c53946ac |
children | fd8888f6f037 |
rev | line source |
---|---|
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
1 /* Copyright (C) 2002 Timo Sirainen */ |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
2 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
3 #include "lib.h" |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
4 #include "buffer.h" |
579
e524da896d92
Several minor fixes: signess, casting away const, missing static, etc.
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
5 #include "charset-utf8.h" |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
6 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
7 #ifdef HAVE_ICONV_H |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
8 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
9 #include <iconv.h> |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
10 #include <ctype.h> |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
11 |
611
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
12 #ifdef __sun__ |
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
13 # define ICONV_CONST const |
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
14 #else |
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
15 # define ICONV_CONST |
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
16 #endif |
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
17 |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
18 struct _CharsetTranslation { |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
19 iconv_t cd; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
20 int ascii; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
21 }; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
22 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
23 CharsetTranslation *charset_to_utf8_begin(const char *charset, |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
24 int *unknown_charset) |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
25 { |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
26 CharsetTranslation *t; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
27 iconv_t cd; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
28 int ascii; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
29 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
30 if (unknown_charset != NULL) |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
31 *unknown_charset = FALSE; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
32 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
33 if (strcasecmp(charset, "us-ascii") == 0 || |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
34 strcasecmp(charset, "ascii") == 0) { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
35 cd = NULL; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
36 ascii = TRUE; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
37 } else if (strcasecmp(charset, "UTF-8") == 0 || |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
38 strcasecmp(charset, "UTF8") == 0) { |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
39 cd = NULL; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
40 ascii = FALSE; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
41 } else { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
42 ascii = FALSE; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
43 cd = iconv_open("UTF-8", charset); |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
44 if (cd == (iconv_t)-1) { |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
45 if (unknown_charset != NULL) |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
46 *unknown_charset = TRUE; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
47 return NULL; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
48 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
49 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
50 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
51 t = i_new(CharsetTranslation, 1); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
52 t->cd = cd; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
53 t->ascii = ascii; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
54 return t; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
55 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
56 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
57 void charset_to_utf8_end(CharsetTranslation *t) |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
58 { |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
59 if (t->cd != NULL) |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
60 iconv_close(t->cd); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
61 i_free(t); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
62 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
63 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
64 void charset_to_utf8_reset(CharsetTranslation *t) |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
65 { |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
66 if (t->cd != NULL) |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
67 (void)iconv(t->cd, NULL, NULL, NULL, NULL); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
68 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
69 |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
70 CharsetResult |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
71 charset_to_ucase_utf8(CharsetTranslation *t, |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
72 const unsigned char *src, size_t *src_size, Buffer *dest) |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
73 { |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
74 ICONV_CONST char *ic_srcbuf; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
75 char *ic_destbuf; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
76 size_t srcleft, destpos, destleft, size; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
77 CharsetResult ret; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
78 |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
79 destpos = buffer_get_used_size(dest); |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
80 destleft = buffer_get_size(dest) - destpos; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
81 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
82 if (t->cd == NULL) { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
83 /* no translation needed - just copy it to outbuf uppercased */ |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
84 if (*src_size > destleft) |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
85 *src_size = destleft; |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
86 _charset_utf8_ucase(src, *src_size, dest, destpos); |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
87 return CHARSET_RET_OK; |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
88 } |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
89 |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
90 size = destleft; |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
91 srcleft = *src_size; |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
92 ic_srcbuf = (ICONV_CONST char *) src; |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
93 ic_destbuf = buffer_append_space(dest, destleft); |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
94 |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
95 if (iconv(t->cd, &ic_srcbuf, &srcleft, |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
96 &ic_destbuf, &destleft) != (size_t)-1) |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
97 ret = CHARSET_RET_OK; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
98 else if (errno == E2BIG) |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
99 ret = CHARSET_RET_OUTPUT_FULL; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
100 else if (errno == EINVAL) |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
101 ret = CHARSET_RET_INCOMPLETE_INPUT; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
102 else { |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
103 /* should be EILSEQ */ |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
104 return CHARSET_RET_INVALID_INPUT; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
105 } |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
106 size -= destleft; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
107 |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
108 /* give back the memory we didn't use */ |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
109 buffer_set_used_size(dest, buffer_get_used_size(dest) - destleft); |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
110 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
111 *src_size -= srcleft; |
766 | 112 _charset_utf8_ucase((unsigned char *) ic_destbuf - size, size, |
113 dest, destpos); | |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
114 return ret; |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
115 } |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
116 |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
117 static const char * |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
118 charset_to_utf8_string_int(const char *charset, int *unknown_charset, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
119 const unsigned char *data, size_t size, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
120 size_t *utf8_size_r, int ucase) |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
121 { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
122 iconv_t cd; |
611
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
123 ICONV_CONST char *inbuf; |
9373933b1be1
Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents:
609
diff
changeset
|
124 char *outbuf, *outpos; |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
125 size_t inleft, outleft, outsize, pos; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
126 |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
579
diff
changeset
|
127 if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 || |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
128 strcasecmp(charset, "ascii") == 0 || |
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
129 strcasecmp(charset, "UTF-8") == 0 || |
766 | 130 strcasecmp(charset, "UTF8") == 0) { |
131 if (unknown_charset != NULL) | |
132 *unknown_charset = FALSE; | |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
133 |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
134 if (!ucase) { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
135 if (utf8_size_r != NULL) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
136 *utf8_size_r = size; |
898
0d5be52d7131
Use unsigned char* when accessing non-NUL terminating strings. Compiler
Timo Sirainen <tss@iki.fi>
parents:
792
diff
changeset
|
137 return t_strndup(data, size); |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
138 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
139 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
140 return _charset_utf8_ucase_strdup(data, size, utf8_size_r); |
766 | 141 } |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
142 |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
608
diff
changeset
|
143 cd = iconv_open("UTF-8", charset); |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
144 if (cd == (iconv_t)-1) { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
145 if (unknown_charset != NULL) |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
146 *unknown_charset = TRUE; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
147 return NULL; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
148 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
149 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
150 if (unknown_charset != NULL) |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
151 *unknown_charset = FALSE; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
152 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
153 inbuf = (ICONV_CONST char *) data; |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
154 inleft = size; |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
155 |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
156 outsize = outleft = inleft * 2; |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
157 outbuf = outpos = t_buffer_get(outsize + 1); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
158 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
159 while (iconv(cd, &inbuf, &inleft, &outpos, &outleft) == (size_t)-1) { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
160 if (errno != E2BIG) { |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
161 /* invalid data */ |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
162 iconv_close(cd); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
163 return NULL; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
164 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
165 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
166 /* output buffer too small, grow it */ |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
167 pos = outsize - outleft; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
168 outsize *= 2; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
169 outleft = outsize - pos; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
170 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
171 outbuf = t_buffer_reget(outbuf, outsize + 1); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
172 outpos = outbuf + pos; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
173 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
174 |
785
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
175 if (utf8_size_r != NULL) |
d96cbba73a8b
Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents:
766
diff
changeset
|
176 *utf8_size_r = (size_t) (outpos - outbuf); |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
177 *outpos++ = '\0'; |
765
553f050c8313
Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents:
753
diff
changeset
|
178 t_buffer_alloc((size_t) (outpos - outbuf)); |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
179 |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
180 if (ucase) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
181 str_ucase(outbuf); /* FIXME: utf8 */ |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
182 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
183 iconv_close(cd); |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
184 return outbuf; |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
185 } |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
186 |
792
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
187 const char * |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
188 charset_to_utf8_string(const char *charset, int *unknown_charset, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
189 const unsigned char *data, size_t size, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
190 size_t *utf8_size_r) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
191 { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
192 return charset_to_utf8_string_int(charset, unknown_charset, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
193 data, size, utf8_size_r, FALSE); |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
194 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
195 |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
196 const char * |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
197 charset_to_ucase_utf8_string(const char *charset, int *unknown_charset, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
198 const unsigned char *data, size_t size, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
199 size_t *utf8_size_r) |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
200 { |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
201 return charset_to_utf8_string_int(charset, unknown_charset, |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
202 data, size, utf8_size_r, TRUE); |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
203 } |
d573c53946ac
Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents:
785
diff
changeset
|
204 |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
205 #endif |