Mercurial > dovecot > core-2.2
annotate src/lib-charset/charset-utf8.h @ 608:debb8468514e HEAD
SEARCH CHARSET now works properly with message bodies, and in general body
searching works more correctly by decoding base64/qp data. Non-text MIME
parts are currently not included in search, that could be made optional.
Also the body is parsed separately for each keyword, that could be
optimized.
Changed base64_decode() behaviour so that it can accept non-base64 data as
well, ie. line feeds etc.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Wed, 13 Nov 2002 13:08:18 +0200 |
parents | f2aa58c2afd0 |
children | 553f050c8313 |
rev | line source |
---|---|
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
1 #ifndef __CHARSET_UTF8_H |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
2 #define __CHARSET_UTF8_H |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
3 |
608
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
4 typedef struct _CharsetTranslation CharsetTranslation; |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
5 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
6 /* Begin translation to UTF-8. */ |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
7 CharsetTranslation *charset_to_utf8_begin(const char *charset, |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
8 int *unknown_charset); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
9 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
10 void charset_to_utf8_end(CharsetTranslation *t); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
11 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
12 void charset_to_utf8_reset(CharsetTranslation *t); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
13 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
14 /* Convert inbuf to UTF-8. inbuf and inbuf_size is updated to specify beginning |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
15 of data that was not written to outbuf, either because of inbuf ended with |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
16 incomplete character sequence or because the outbuf got full. Returns TRUE |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
17 if no conversion errors were detected. */ |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
18 int charset_to_ucase_utf8(CharsetTranslation *t, |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
19 const unsigned char **inbuf, size_t *insize, |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
20 unsigned char *outbuf, size_t *outsize); |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
21 |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
22 /* Simple wrapper for above functions. size is updated to strlen() of |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
23 returned UTF-8 string. */ |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
24 const char * |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
25 charset_to_ucase_utf8_string(const char *charset, int *unknown_charset, |
debb8468514e
SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents:
568
diff
changeset
|
26 const unsigned char *buf, size_t *size); |
568
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
27 |
f2aa58c2afd0
SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
28 #endif |