annotate src/lib-charset/charset-iconv.c @ 898:0d5be52d7131 HEAD

Use unsigned char* when accessing non-NUL terminating strings. Compiler warnings would then notify about accidentally passing them to functions which require them NUL-terminated. Changed a few functions to use void* to avoid unneeded casting.
author Timo Sirainen <tss@iki.fi>
date Sat, 04 Jan 2003 19:26:29 +0200
parents d573c53946ac
children fd8888f6f037
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 /* Copyright (C) 2002 Timo Sirainen */
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
4 #include "buffer.h"
579
e524da896d92 Several minor fixes: signess, casting away const, missing static, etc.
Timo Sirainen <tss@iki.fi>
parents: 568
diff changeset
5 #include "charset-utf8.h"
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
6
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
7 #ifdef HAVE_ICONV_H
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
8
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
9 #include <iconv.h>
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
10 #include <ctype.h>
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
11
611
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
12 #ifdef __sun__
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
13 # define ICONV_CONST const
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
14 #else
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
15 # define ICONV_CONST
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
16 #endif
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
17
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
18 struct _CharsetTranslation {
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
19 iconv_t cd;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
20 int ascii;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
21 };
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
22
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
23 CharsetTranslation *charset_to_utf8_begin(const char *charset,
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
24 int *unknown_charset)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
25 {
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
26 CharsetTranslation *t;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
27 iconv_t cd;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
28 int ascii;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
29
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
30 if (unknown_charset != NULL)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
31 *unknown_charset = FALSE;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
32
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
33 if (strcasecmp(charset, "us-ascii") == 0 ||
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
34 strcasecmp(charset, "ascii") == 0) {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
35 cd = NULL;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
36 ascii = TRUE;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
37 } else if (strcasecmp(charset, "UTF-8") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
38 strcasecmp(charset, "UTF8") == 0) {
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
39 cd = NULL;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
40 ascii = FALSE;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
41 } else {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
42 ascii = FALSE;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
43 cd = iconv_open("UTF-8", charset);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
44 if (cd == (iconv_t)-1) {
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
45 if (unknown_charset != NULL)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
46 *unknown_charset = TRUE;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
47 return NULL;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
48 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
49 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
50
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
51 t = i_new(CharsetTranslation, 1);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
52 t->cd = cd;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
53 t->ascii = ascii;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
54 return t;
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
55 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
56
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
57 void charset_to_utf8_end(CharsetTranslation *t)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
58 {
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
59 if (t->cd != NULL)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
60 iconv_close(t->cd);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
61 i_free(t);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
62 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
63
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
64 void charset_to_utf8_reset(CharsetTranslation *t)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
65 {
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
66 if (t->cd != NULL)
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
67 (void)iconv(t->cd, NULL, NULL, NULL, NULL);
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
68 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
69
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
70 CharsetResult
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
71 charset_to_ucase_utf8(CharsetTranslation *t,
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
72 const unsigned char *src, size_t *src_size, Buffer *dest)
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
73 {
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
74 ICONV_CONST char *ic_srcbuf;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
75 char *ic_destbuf;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
76 size_t srcleft, destpos, destleft, size;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
77 CharsetResult ret;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
78
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
79 destpos = buffer_get_used_size(dest);
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
80 destleft = buffer_get_size(dest) - destpos;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
81
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
82 if (t->cd == NULL) {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
83 /* no translation needed - just copy it to outbuf uppercased */
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
84 if (*src_size > destleft)
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
85 *src_size = destleft;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
86 _charset_utf8_ucase(src, *src_size, dest, destpos);
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
87 return CHARSET_RET_OK;
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
88 }
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
89
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
90 size = destleft;
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
91 srcleft = *src_size;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
92 ic_srcbuf = (ICONV_CONST char *) src;
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
93 ic_destbuf = buffer_append_space(dest, destleft);
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
94
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
95 if (iconv(t->cd, &ic_srcbuf, &srcleft,
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
96 &ic_destbuf, &destleft) != (size_t)-1)
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
97 ret = CHARSET_RET_OK;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
98 else if (errno == E2BIG)
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
99 ret = CHARSET_RET_OUTPUT_FULL;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
100 else if (errno == EINVAL)
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
101 ret = CHARSET_RET_INCOMPLETE_INPUT;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
102 else {
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
103 /* should be EILSEQ */
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
104 return CHARSET_RET_INVALID_INPUT;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
105 }
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
106 size -= destleft;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
107
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
108 /* give back the memory we didn't use */
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
109 buffer_set_used_size(dest, buffer_get_used_size(dest) - destleft);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
110
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
111 *src_size -= srcleft;
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
112 _charset_utf8_ucase((unsigned char *) ic_destbuf - size, size,
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
113 dest, destpos);
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
114 return ret;
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
115 }
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
116
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
117 static const char *
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
118 charset_to_utf8_string_int(const char *charset, int *unknown_charset,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
119 const unsigned char *data, size_t size,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
120 size_t *utf8_size_r, int ucase)
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
121 {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
122 iconv_t cd;
611
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
123 ICONV_CONST char *inbuf;
9373933b1be1 Removed warnings with Solaris' iconv()
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
124 char *outbuf, *outpos;
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
125 size_t inleft, outleft, outsize, pos;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
126
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 579
diff changeset
127 if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
128 strcasecmp(charset, "ascii") == 0 ||
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
129 strcasecmp(charset, "UTF-8") == 0 ||
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
130 strcasecmp(charset, "UTF8") == 0) {
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
131 if (unknown_charset != NULL)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
132 *unknown_charset = FALSE;
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
133
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
134 if (!ucase) {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
135 if (utf8_size_r != NULL)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
136 *utf8_size_r = size;
898
0d5be52d7131 Use unsigned char* when accessing non-NUL terminating strings. Compiler
Timo Sirainen <tss@iki.fi>
parents: 792
diff changeset
137 return t_strndup(data, size);
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
138 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
139
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
140 return _charset_utf8_ucase_strdup(data, size, utf8_size_r);
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 765
diff changeset
141 }
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
142
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
143 cd = iconv_open("UTF-8", charset);
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
144 if (cd == (iconv_t)-1) {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
145 if (unknown_charset != NULL)
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
146 *unknown_charset = TRUE;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
147 return NULL;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
148 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
149
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
150 if (unknown_charset != NULL)
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
151 *unknown_charset = FALSE;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
152
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
153 inbuf = (ICONV_CONST char *) data;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
154 inleft = size;
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
155
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
156 outsize = outleft = inleft * 2;
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
157 outbuf = outpos = t_buffer_get(outsize + 1);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
158
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
159 while (iconv(cd, &inbuf, &inleft, &outpos, &outleft) == (size_t)-1) {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
160 if (errno != E2BIG) {
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
161 /* invalid data */
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
162 iconv_close(cd);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
163 return NULL;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
164 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
165
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
166 /* output buffer too small, grow it */
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
167 pos = outsize - outleft;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
168 outsize *= 2;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
169 outleft = outsize - pos;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
170
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
171 outbuf = t_buffer_reget(outbuf, outsize + 1);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
172 outpos = outbuf + pos;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
173 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
174
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
175 if (utf8_size_r != NULL)
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
176 *utf8_size_r = (size_t) (outpos - outbuf);
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
177 *outpos++ = '\0';
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
178 t_buffer_alloc((size_t) (outpos - outbuf));
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
179
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
180 if (ucase)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
181 str_ucase(outbuf); /* FIXME: utf8 */
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
182
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
183 iconv_close(cd);
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
184 return outbuf;
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
185 }
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
186
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
187 const char *
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
188 charset_to_utf8_string(const char *charset, int *unknown_charset,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
189 const unsigned char *data, size_t size,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
190 size_t *utf8_size_r)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
191 {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
192 return charset_to_utf8_string_int(charset, unknown_charset,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
193 data, size, utf8_size_r, FALSE);
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
194 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
195
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
196 const char *
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
197 charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
198 const unsigned char *data, size_t size,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
199 size_t *utf8_size_r)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
200 {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
201 return charset_to_utf8_string_int(charset, unknown_charset,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
202 data, size, utf8_size_r, TRUE);
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
203 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
204
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
205 #endif