Mercurial > dovecot > core-2.2
annotate src/lib-charset/charset-utf8.c @ 6131:5f56b2eb32b3 HEAD
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
comparing.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Fri, 20 Jul 2007 17:27:02 +0300 |
parents | 0d3583b02a32 |
children | d01522d276f6 |
rev | line source |
---|---|
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
1 /* Copyright (C) 2002 Timo Sirainen */ |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
2 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
3 #include "lib.h" |
766 | 4 #include "buffer.h" |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
5 #include "charset-utf8.h" |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
6 |
766 | 7 #include <ctype.h> |
8 | |
4605
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
9 bool charset_is_utf8(const char *charset) |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
10 { |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
11 return strcasecmp(charset, "us-ascii") == 0 || |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
12 strcasecmp(charset, "ascii") == 0 || |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
13 strcasecmp(charset, "UTF-8") == 0 || |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
14 strcasecmp(charset, "UTF8") == 0; |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
15 } |
e6cb9f75b76a
Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
16 |
1300 | 17 #ifndef HAVE_ICONV |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
18 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
19 #include <ctype.h> |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
20 |
961 | 21 struct charset_translation { |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
22 int dummy; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
23 }; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
24 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
25 static struct charset_translation ascii_translation, utf8_translation; |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
26 static struct charset_translation ascii_translation_uc, utf8_translation_uc; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
27 |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
28 struct charset_translation * |
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
29 charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r) |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
30 { |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
31 if (unknown_charset_r != NULL) |
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
32 *unknown_charset_r = FALSE; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
33 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
34 if (strcasecmp(charset, "us-ascii") == 0 || |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
35 strcasecmp(charset, "ascii") == 0) |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
36 return ucase ? &ascii_translation_uc : &ascii_translation; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
37 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
38 if (strcasecmp(charset, "UTF-8") == 0 || |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
39 strcasecmp(charset, "UTF8") == 0) |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
40 return ucase ? &utf8_translation_uc : &utf8_translation; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
41 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
42 /* no support for charsets that need translation */ |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
43 if (unknown_charset_r != NULL) |
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
44 *unknown_charset_r = TRUE; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
45 return NULL; |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
46 } |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
47 |
3879
928229f8b3e6
deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents:
3863
diff
changeset
|
48 void charset_to_utf8_end(struct charset_translation **t __attr_unused__) |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
49 { |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
50 } |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
51 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
52 void charset_to_utf8_reset(struct charset_translation *t __attr_unused__) |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
53 { |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
54 } |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
55 |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
56 enum charset_result |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
57 charset_to_utf8(struct charset_translation *t, |
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
58 const unsigned char *src, size_t *src_size, buffer_t *dest) |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
59 { |
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
60 if (t != &utf8_translation_uc && t != &ascii_translation_uc) { |
6112
e5451501ff2f
charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents:
4605
diff
changeset
|
61 buffer_append(dest, src, *src_size); |
6131
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
62 return CHARSET_RET_OK; |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
63 } |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
64 if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0) |
5f56b2eb32b3
Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents:
6126
diff
changeset
|
65 return CHARSET_RET_INVALID_INPUT; |
766 | 66 return CHARSET_RET_OK; |
609
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
67 } |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
68 |
5470c0cb13a7
We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff
changeset
|
69 #endif |