annotate src/lib-charset/charset-utf8.c @ 961:41b6754c2e35 HEAD

Didn't compile without iconv.
author Timo Sirainen <tss@iki.fi>
date Mon, 13 Jan 2003 22:56:44 +0200
parents fd8888f6f037
children 952bf533c2ea
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
1 /* Copyright (C) 2002 Timo Sirainen */
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
4 #include "buffer.h"
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
5 #include "charset-utf8.h"
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
6
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
7 #include <ctype.h>
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
8
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
9 void _charset_utf8_ucase(const unsigned char *src, size_t src_size,
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
10 buffer_t *dest, size_t destpos)
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
11 {
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
12 char *destbuf;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
13 size_t i;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
14
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
15 destbuf = buffer_get_space(dest, destpos, src_size);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
16 for (i = 0; i < src_size; i++)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
17 destbuf[i] = i_toupper(src[i]); /* FIXME: utf8 */
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
18 }
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
19
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
20 const char *_charset_utf8_ucase_strdup(const unsigned char *data, size_t size,
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
21 size_t *utf8_size_r)
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
22 {
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
23 buffer_t *dest;
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
24
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
25 dest = buffer_create_dynamic(data_stack_pool, size, (size_t)-1);
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
26 _charset_utf8_ucase(data, size, dest, 0);
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
27 if (utf8_size_r != NULL)
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
28 *utf8_size_r = buffer_get_used_size(dest);
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
29 buffer_append_c(dest, '\0');
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
30 return buffer_free_without_data(dest);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
31 }
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
32
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
33
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
34 #ifndef HAVE_ICONV_H
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
35
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
36 #include <ctype.h>
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
37
961
41b6754c2e35 Didn't compile without iconv.
Timo Sirainen <tss@iki.fi>
parents: 903
diff changeset
38 struct charset_translation {
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
39 int dummy;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
40 };
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
41
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
42 static struct charset_translation ascii_translation, utf8_translation;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
43
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
44 struct charset_translation *charset_to_utf8_begin(const char *charset,
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
45 int *unknown_charset)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
46 {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
47 if (unknown_charset != NULL)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
48 *unknown_charset = FALSE;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
49
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
50 if (strcasecmp(charset, "us-ascii") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
51 strcasecmp(charset, "ascii") == 0)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
52 return &ascii_translation;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
53
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
54 if (strcasecmp(charset, "UTF-8") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
55 strcasecmp(charset, "UTF8") == 0)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
56 return &utf8_translation;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
57
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
58 /* no support for charsets that need translation */
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
59 if (unknown_charset != NULL)
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
60 *unknown_charset = TRUE;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
61 return NULL;
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
62 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
63
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
64 void charset_to_utf8_end(struct charset_translation *t __attr_unused__)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
65 {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
66 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
67
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
68 void charset_to_utf8_reset(struct charset_translation *t __attr_unused__)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
69 {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
70 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
71
903
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
72 enum charset_result
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
73 charset_to_ucase_utf8(struct charset_translation *t __attr_unused__,
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
74 const unsigned char *src, size_t *src_size,
fd8888f6f037 Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents: 898
diff changeset
75 buffer_t *dest)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
76 {
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
77 size_t destpos, destleft;
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
78
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
79 destpos = buffer_get_used_size(dest);
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
80 destleft = buffer_get_size(dest) - destpos;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
81
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
82 /* no translation needed - just copy it to outbuf uppercased */
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
83 if (*src_size > destleft)
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
84 *src_size = destleft;
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
85 _charset_utf8_ucase(src, *src_size, dest, destpos);
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
86 return CHARSET_RET_OK;
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
87 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
88
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
89 const char *
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
90 charset_to_utf8_string(const char *charset, int *unknown_charset,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
91 const unsigned char *data, size_t size,
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
92 size_t *utf8_size_r)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
93 {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
94 if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
95 strcasecmp(charset, "ascii") == 0 ||
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
96 strcasecmp(charset, "UTF-8") == 0 ||
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
97 strcasecmp(charset, "UTF8") == 0) {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
98 if (unknown_charset != NULL)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
99 *unknown_charset = FALSE;
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
100 if (utf8_size_r != NULL)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
101 *utf8_size_r = size;
898
0d5be52d7131 Use unsigned char* when accessing non-NUL terminating strings. Compiler
Timo Sirainen <tss@iki.fi>
parents: 792
diff changeset
102 return t_strndup(data, size);
792
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
103 } else {
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
104 if (unknown_charset != NULL)
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
105 *unknown_charset = TRUE;
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
106 return NULL;
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
107 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
108 }
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
109
d573c53946ac Full not-too-well-tested support for SORT extension. Required a few
Timo Sirainen <tss@iki.fi>
parents: 785
diff changeset
110 const char *
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
111 charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
112 const unsigned char *data, size_t size,
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
113 size_t *utf8_size_r)
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
114 {
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
115 if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
116 strcasecmp(charset, "ascii") == 0 ||
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
117 strcasecmp(charset, "UTF-8") == 0 ||
753
3521edb6c240 charset_to_ucase_utf8_string() didn't return \0 terminated string with
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
118 strcasecmp(charset, "UTF8") == 0) {
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
119 if (unknown_charset != NULL)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
120 *unknown_charset = FALSE;
785
d96cbba73a8b Don't use Buffers with read-only data, just makes it more difficult without
Timo Sirainen <tss@iki.fi>
parents: 766
diff changeset
121 return _charset_utf8_ucase_strdup(data, size, utf8_size_r);
766
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
122 } else {
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
123 if (unknown_charset != NULL)
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
124 *unknown_charset = TRUE;
03832c7f389b Compiles again without iconv()
Timo Sirainen <tss@iki.fi>
parents: 753
diff changeset
125 return NULL;
753
3521edb6c240 charset_to_ucase_utf8_string() didn't return \0 terminated string with
Timo Sirainen <tss@iki.fi>
parents: 609
diff changeset
126 }
609
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
127 }
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
128
5470c0cb13a7 We can support UTF-8 charset too without any translations.
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
129 #endif