annotate src/lib-charset/charset-utf8.c @ 6131:5f56b2eb32b3 HEAD

Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8 comparing.
author Timo Sirainen <tss@iki.fi>
date Fri, 20 Jul 2007 17:27:02 +0300
parents 0d3583b02a32
children d01522d276f6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 /* Copyright (C) 2002 Timo Sirainen */
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
4 #include "buffer.h"
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
5 #include "charset-utf8.h"
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
6
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
7 #include <ctype.h>
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
8
4605
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
9 bool charset_is_utf8(const char *charset)
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
10 {
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
11 return strcasecmp(charset, "us-ascii") == 0 ||
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
12 strcasecmp(charset, "ascii") == 0 ||
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
13 strcasecmp(charset, "UTF-8") == 0 ||
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
14 strcasecmp(charset, "UTF8") == 0;
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
15 }
e6cb9f75b76a Added charset_is_utf8() and charset_to_ucase_utf8_full().
Timo Sirainen <tss@iki.fi>
parents: 3879
diff changeset
16
1300
952bf533c2ea Better iconv() checking.
Timo Sirainen <tss@iki.fi>
parents: 961
diff changeset
17 #ifndef HAVE_ICONV
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
18
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
19 #include <ctype.h>
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
20
961
41b6754c2e35 Didn't compile without iconv.
Timo Sirainen <tss@iki.fi>
parents: 903
diff changeset
21 struct charset_translation {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
22 int dummy;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
23 };
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
24
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
25 static struct charset_translation ascii_translation, utf8_translation;
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
26 static struct charset_translation ascii_translation_uc, utf8_translation_uc;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
27
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
28 struct charset_translation *
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
29 charset_to_utf8_begin(const char *charset, bool ucase, bool *unknown_charset_r)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
30 {
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
31 if (unknown_charset_r != NULL)
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
32 *unknown_charset_r = FALSE;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
33
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
34 if (strcasecmp(charset, "us-ascii") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
35 strcasecmp(charset, "ascii") == 0)
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
36 return ucase ? &ascii_translation_uc : &ascii_translation;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
37
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
38 if (strcasecmp(charset, "UTF-8") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
39 strcasecmp(charset, "UTF8") == 0)
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
40 return ucase ? &utf8_translation_uc : &utf8_translation;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
41
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
42 /* no support for charsets that need translation */
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
43 if (unknown_charset_r != NULL)
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
44 *unknown_charset_r = TRUE;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
45 return NULL;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
46 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
47
3879
928229f8b3e6 deinit, unref, destroy, close, free, etc. functions now take a pointer to
Timo Sirainen <tss@iki.fi>
parents: 3863
diff changeset
48 void charset_to_utf8_end(struct charset_translation **t __attr_unused__)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
49 {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
50 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
51
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
52 void charset_to_utf8_reset(struct charset_translation *t __attr_unused__)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
53 {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
54 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
55
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
56 enum charset_result
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
57 charset_to_utf8(struct charset_translation *t,
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
58 const unsigned char *src, size_t *src_size, buffer_t *dest)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
59 {
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
60 if (t != &utf8_translation_uc && t != &ascii_translation_uc) {
6112
e5451501ff2f charset_to_utf8_begin() now takes bool ucase parameter. Changed
Timo Sirainen <tss@iki.fi>
parents: 4605
diff changeset
61 buffer_append(dest, src, *src_size);
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
62 return CHARSET_RET_OK;
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
63 }
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
64 if (uni_utf8_to_decomposed_titlecase(src, *src_size, dest) < 0)
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6126
diff changeset
65 return CHARSET_RET_INVALID_INPUT;
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
66 return CHARSET_RET_OK;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
67 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
68
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
69 #endif