annotate src/lib-charset/charset-utf8.h @ 18144:7459c0891a85

lib-charset: UTF-8 -> UTF-8 translation was never returning CHARSET_RET_INCOMPLETE_INPUT Instead the incomplete input was just being modified into broken output.
author Timo Sirainen <tss@iki.fi>
date Sat, 10 Jan 2015 04:25:21 +0200
parents c976a9c01613
children 0e74934072e0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6410
e4eb71ae8e96 Changed .h ifdef/defines to use <NAME>_H format.
Timo Sirainen <tss@iki.fi>
parents: 6132
diff changeset
1 #ifndef CHARSET_UTF8_H
e4eb71ae8e96 Changed .h ifdef/defines to use <NAME>_H format.
Timo Sirainen <tss@iki.fi>
parents: 6132
diff changeset
2 #define CHARSET_UTF8_H
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3
15053
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 7912
diff changeset
4 #include "unichar.h"
6132
d01522d276f6 charset_to_utf8_begin() API change.
Timo Sirainen <tss@iki.fi>
parents: 6131
diff changeset
5
15053
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 7912
diff changeset
6 struct charset_translation;
6132
d01522d276f6 charset_to_utf8_begin() API change.
Timo Sirainen <tss@iki.fi>
parents: 6131
diff changeset
7
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 792
diff changeset
8 enum charset_result {
765
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
9 CHARSET_RET_OK = 1,
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
10 CHARSET_RET_INCOMPLETE_INPUT = -1,
553f050c8313 Added buffer API. Point is to hide all buffer writing behind this API which
Timo Sirainen <tss@iki.fi>
parents: 608
diff changeset
11 CHARSET_RET_INVALID_INPUT = -2
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 792
diff changeset
12 };
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 568
diff changeset
13
6132
d01522d276f6 charset_to_utf8_begin() API change.
Timo Sirainen <tss@iki.fi>
parents: 6131
diff changeset
14 /* Begin translation to UTF-8. Returns -1 if charset is unknown. */
15053
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 7912
diff changeset
15 int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer,
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 7912
diff changeset
16 struct charset_translation **t_r)
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 7912
diff changeset
17 ATTR_NULL(2);
3879
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
18 void charset_to_utf8_end(struct charset_translation **t);
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 792
diff changeset
19 void charset_to_utf8_reset(struct charset_translation *t);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 568
diff changeset
20
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
21 /* Returns TRUE if charset is UTF-8 or ASCII */
7912
81806d402514 Added more consts, ATTR_CONSTs and ATTR_PUREs.
Timo Sirainen <tss@iki.fi>
parents: 6908
diff changeset
22 bool charset_is_utf8(const char *charset) ATTR_PURE;
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
23
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
24 /* Translate src to UTF-8. src_size is updated to contain the number of
6122
d86581f4a0c6 charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents: 6114
diff changeset
25 characters actually translated from src. */
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 792
diff changeset
26 enum charset_result
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
27 charset_to_utf8(struct charset_translation *t,
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
28 const unsigned char *src, size_t *src_size, buffer_t *dest);
608
debb8468514e SEARCH CHARSET now works properly with message bodies, and in general body
Timo Sirainen <tss@iki.fi>
parents: 568
diff changeset
29
6908
a340d3379b90 Added charset_to_utf8_str()
Timo Sirainen <tss@iki.fi>
parents: 6410
diff changeset
30 /* Translate a single string to UTF8. */
15053
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 7912
diff changeset
31 int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer,
6908
a340d3379b90 Added charset_to_utf8_str()
Timo Sirainen <tss@iki.fi>
parents: 6410
diff changeset
32 const char *input, string_t *output,
15053
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 7912
diff changeset
33 enum charset_result *result_r) ATTR_NULL(2);
6908
a340d3379b90 Added charset_to_utf8_str()
Timo Sirainen <tss@iki.fi>
parents: 6410
diff changeset
34
18144
7459c0891a85 lib-charset: UTF-8 -> UTF-8 translation was never returning CHARSET_RET_INCOMPLETE_INPUT
Timo Sirainen <tss@iki.fi>
parents: 15053
diff changeset
35 /* INTERNAL: */
7459c0891a85 lib-charset: UTF-8 -> UTF-8 translation was never returning CHARSET_RET_INCOMPLETE_INPUT
Timo Sirainen <tss@iki.fi>
parents: 15053
diff changeset
36 enum charset_result
7459c0891a85 lib-charset: UTF-8 -> UTF-8 translation was never returning CHARSET_RET_INCOMPLETE_INPUT
Timo Sirainen <tss@iki.fi>
parents: 15053
diff changeset
37 charset_utf8_to_utf8(normalizer_func_t *normalizer,
7459c0891a85 lib-charset: UTF-8 -> UTF-8 translation was never returning CHARSET_RET_INCOMPLETE_INPUT
Timo Sirainen <tss@iki.fi>
parents: 15053
diff changeset
38 const unsigned char *src, size_t *src_size, buffer_t *dest);
7459c0891a85 lib-charset: UTF-8 -> UTF-8 translation was never returning CHARSET_RET_INCOMPLETE_INPUT
Timo Sirainen <tss@iki.fi>
parents: 15053
diff changeset
39
568
f2aa58c2afd0 SEARCH CHARSET support. Currently we do it through iconv() and only ASCII
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
40 #endif