annotate src/lib-mail/message-header-decode.c @ 22713:cb108f786fb4

Updated copyright notices to include the year 2018.
author Stephan Bosch <stephan.bosch@dovecot.fi>
date Mon, 01 Jan 2018 22:42:08 +0100
parents c09853ffcc46
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
22713
cb108f786fb4 Updated copyright notices to include the year 2018.
Stephan Bosch <stephan.bosch@dovecot.fi>
parents: 21822
diff changeset
1 /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
2
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
3 #include "lib.h"
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
4 #include "base64.h"
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
5 #include "buffer.h"
6131
5f56b2eb32b3 Use uni_utf8_to_decomposed_titlecase() to have proper case-insensitive UTF-8
Timo Sirainen <tss@iki.fi>
parents: 6123
diff changeset
6 #include "unichar.h"
6118
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
7 #include "charset-utf8.h"
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
8 #include "quoted-printable.h"
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
9 #include "message-header-decode.h"
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
10
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
11 static size_t
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
12 message_header_decode_encoded(const unsigned char *data, size_t size,
21322
5ab8dc1a4a6f global: Change string position/length from unsigned int to size_t
Timo Sirainen <timo.sirainen@dovecot.fi>
parents: 19552
diff changeset
13 buffer_t *decodebuf, size_t *charsetlen_r)
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
14 {
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
15 #define QCOUNT 3
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
16 unsigned int num = 0;
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
17 size_t i, start_pos[QCOUNT];
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
18
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
19 /* data should contain "charset?encoding?text?=" */
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
20 for (i = 0; i < size; i++) {
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
21 if (data[i] == '?') {
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
22 start_pos[num++] = i;
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
23 if (num == QCOUNT)
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
24 break;
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
25 }
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
26 }
21822
c09853ffcc46 lib-mail: Fix read overflow / crash in message_header_decode()
Timo Sirainen <timo.sirainen@dovecot.fi>
parents: 21390
diff changeset
27 if (i+1 >= size || data[i+1] != '=') {
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
28 /* invalid block */
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
29 return 0;
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
30 }
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
31
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
32 buffer_append(decodebuf, data, start_pos[0]);
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
33 buffer_append_c(decodebuf, '\0');
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
34 *charsetlen_r = decodebuf->used;
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
35
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
36 switch (data[start_pos[0]+1]) {
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
37 case 'q':
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
38 case 'Q':
15453
0be51d94b0d9 lib-mail: Detect errors in quoted-printable input.
Timo Sirainen <tss@iki.fi>
parents: 15053
diff changeset
39 if (quoted_printable_q_decode(data + start_pos[1] + 1,
0be51d94b0d9 lib-mail: Detect errors in quoted-printable input.
Timo Sirainen <tss@iki.fi>
parents: 15053
diff changeset
40 start_pos[2] - start_pos[1] - 1,
0be51d94b0d9 lib-mail: Detect errors in quoted-printable input.
Timo Sirainen <tss@iki.fi>
parents: 15053
diff changeset
41 decodebuf) < 0) {
0be51d94b0d9 lib-mail: Detect errors in quoted-printable input.
Timo Sirainen <tss@iki.fi>
parents: 15053
diff changeset
42 /* we skipped over some invalid data */
0be51d94b0d9 lib-mail: Detect errors in quoted-printable input.
Timo Sirainen <tss@iki.fi>
parents: 15053
diff changeset
43 }
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
44 break;
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
45 case 'b':
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
46 case 'B':
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
47 if (base64_decode(data + start_pos[1] + 1,
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
48 start_pos[2] - start_pos[1] - 1,
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
49 NULL, decodebuf) < 0) {
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
50 /* contains invalid data. show what we got so far. */
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
51 }
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
52 break;
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
53 default:
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
54 /* unknown encoding */
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
55 return 0;
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
56 }
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
57
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
58 return start_pos[2] + 2;
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
59 }
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
60
21322
5ab8dc1a4a6f global: Change string position/length from unsigned int to size_t
Timo Sirainen <timo.sirainen@dovecot.fi>
parents: 19552
diff changeset
61 static bool is_only_lwsp(const unsigned char *data, size_t size)
9934
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
62 {
21322
5ab8dc1a4a6f global: Change string position/length from unsigned int to size_t
Timo Sirainen <timo.sirainen@dovecot.fi>
parents: 19552
diff changeset
63 size_t i;
9934
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
64
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
65 for (i = 0; i < size; i++) {
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
66 if (!(data[i] == ' ' || data[i] == '\t' ||
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
67 data[i] == '\r' || data[i] == '\n'))
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
68 return FALSE;
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
69 }
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
70 return TRUE;
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
71 }
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
72
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
73 void message_header_decode(const unsigned char *data, size_t size,
1038
60646878858e Function typedefs now define them as functions, not function pointers.
Timo Sirainen <tss@iki.fi>
parents: 953
diff changeset
74 message_header_decode_callback_t *callback,
953
411006be3c66 Naming change for function typedefs.
Timo Sirainen <tss@iki.fi>
parents: 903
diff changeset
75 void *context)
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
76 {
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
77 buffer_t *decodebuf = NULL;
21322
5ab8dc1a4a6f global: Change string position/length from unsigned int to size_t
Timo Sirainen <timo.sirainen@dovecot.fi>
parents: 19552
diff changeset
78 size_t charsetlen = 0;
6950
63e225ab7361 If header contains invalid MIME input, preserve the =? in output.
Timo Sirainen <tss@iki.fi>
parents: 6910
diff changeset
79 size_t pos, start_pos, ret;
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
80
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
81 /* =?charset?Q|B?text?= */
11039
0f98525e4567 Removed dead code.
Timo Sirainen <tss@iki.fi>
parents: 10582
diff changeset
82 start_pos = 0;
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
83 for (pos = 0; pos + 1 < size; ) {
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
84 if (data[pos] != '=' || data[pos+1] != '?') {
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
85 pos++;
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
86 continue;
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
87 }
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
88
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
89 /* encoded string beginning */
9934
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
90 if (pos != start_pos &&
9444d7964766 message_header_decode*() should ignore LWSP between two encoded-words.
Timo Sirainen <tss@iki.fi>
parents: 9662
diff changeset
91 !is_only_lwsp(data+start_pos, pos-start_pos)) {
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
92 /* send the unencoded data so far */
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
93 if (!callback(data + start_pos, pos - start_pos,
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
94 NULL, context)) {
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
95 start_pos = size;
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
96 break;
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
97 }
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
98 }
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
99
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
100 if (decodebuf == NULL) {
6537
58d9f94b9919 message_header_decode(): Don't use t_push/t_pop. The callback may want to
Timo Sirainen <tss@iki.fi>
parents: 6429
diff changeset
101 decodebuf = buffer_create_dynamic(default_pool,
58d9f94b9919 message_header_decode(): Don't use t_push/t_pop. The callback may want to
Timo Sirainen <tss@iki.fi>
parents: 6429
diff changeset
102 size - pos);
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
103 } else {
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
104 buffer_set_used_size(decodebuf, 0);
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
105 }
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
106
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
107 pos += 2;
6950
63e225ab7361 If header contains invalid MIME input, preserve the =? in output.
Timo Sirainen <tss@iki.fi>
parents: 6910
diff changeset
108 ret = message_header_decode_encoded(data + pos, size - pos,
63e225ab7361 If header contains invalid MIME input, preserve the =? in output.
Timo Sirainen <tss@iki.fi>
parents: 6910
diff changeset
109 decodebuf, &charsetlen);
63e225ab7361 If header contains invalid MIME input, preserve the =? in output.
Timo Sirainen <tss@iki.fi>
parents: 6910
diff changeset
110 if (ret == 0) {
63e225ab7361 If header contains invalid MIME input, preserve the =? in output.
Timo Sirainen <tss@iki.fi>
parents: 6910
diff changeset
111 start_pos = pos-2;
63e225ab7361 If header contains invalid MIME input, preserve the =? in output.
Timo Sirainen <tss@iki.fi>
parents: 6910
diff changeset
112 continue;
63e225ab7361 If header contains invalid MIME input, preserve the =? in output.
Timo Sirainen <tss@iki.fi>
parents: 6910
diff changeset
113 }
63e225ab7361 If header contains invalid MIME input, preserve the =? in output.
Timo Sirainen <tss@iki.fi>
parents: 6910
diff changeset
114 pos += ret;
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
115
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
116 if (decodebuf->used > charsetlen) {
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
117 /* decodebuf contains <charset> NUL <text> */
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
118 if (!callback(CONST_PTR_OFFSET(decodebuf->data,
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
119 charsetlen),
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
120 decodebuf->used - charsetlen,
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
121 decodebuf->data, context)) {
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
122 start_pos = size;
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
123 break;
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
124 }
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
125 }
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
126
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
127 start_pos = pos;
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
128 }
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
129
5517
2608b9e18bbb Rewrite/cleanup
Timo Sirainen <tss@iki.fi>
parents: 5503
diff changeset
130 if (size != start_pos) {
21822
c09853ffcc46 lib-mail: Fix read overflow / crash in message_header_decode()
Timo Sirainen <timo.sirainen@dovecot.fi>
parents: 21390
diff changeset
131 i_assert(size > start_pos);
5503
b7573b83f999 Minor optimization.
Timo Sirainen <tss@iki.fi>
parents: 4906
diff changeset
132 (void)callback(data + start_pos, size - start_pos,
b7573b83f999 Minor optimization.
Timo Sirainen <tss@iki.fi>
parents: 4906
diff changeset
133 NULL, context);
b7573b83f999 Minor optimization.
Timo Sirainen <tss@iki.fi>
parents: 4906
diff changeset
134 }
6538
1b7d91f7b151 Fix for last change: don't crash if temp buffer wasn't created..
Timo Sirainen <tss@iki.fi>
parents: 6537
diff changeset
135 if (decodebuf != NULL)
1b7d91f7b151 Fix for last change: don't crash if temp buffer wasn't created..
Timo Sirainen <tss@iki.fi>
parents: 6537
diff changeset
136 buffer_free(&decodebuf);
783
d826ab8c8d62 Quoted-Printable decoding was buggy. Extracted RFC2047 header decoding into
Timo Sirainen <tss@iki.fi>
parents:
diff changeset
137 }
6118
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
138
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
139 struct decode_utf8_context {
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
140 buffer_t *dest;
15053
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 14682
diff changeset
141 normalizer_func_t *normalizer;
6121
157e31742eeb message_header_decode_utf8() returns not TRUE if it actually changed
Timo Sirainen <tss@iki.fi>
parents: 6118
diff changeset
142 unsigned int changed:1;
6118
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
143 };
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
144
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
145 static bool
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
146 decode_utf8_callback(const unsigned char *data, size_t size,
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
147 const char *charset, void *context)
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
148 {
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
149 struct decode_utf8_context *ctx = context;
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
150 struct charset_translation *t;
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
151
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
152 if (charset == NULL || charset_is_utf8(charset)) {
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
153 /* ASCII / UTF-8 */
15053
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 14682
diff changeset
154 if (ctx->normalizer != NULL) {
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 14682
diff changeset
155 (void)ctx->normalizer(data, size, ctx->dest);
6118
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
156 } else {
13015
c392158f374d message [header] decoder: Output only valid UTF-8 data.
Timo Sirainen <tss@iki.fi>
parents: 12782
diff changeset
157 if (uni_utf8_get_valid_data(data, size, ctx->dest))
c392158f374d message [header] decoder: Output only valid UTF-8 data.
Timo Sirainen <tss@iki.fi>
parents: 12782
diff changeset
158 buffer_append(ctx->dest, data, size);
6118
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
159 }
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
160 return TRUE;
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
161 }
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
162
15053
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 14682
diff changeset
163 if (charset_to_utf8_begin(charset, ctx->normalizer, &t) < 0) {
6954
aee731c09668 If we encounter an unknown charset, get the ASCII (or UTF-8 actually)
Timo Sirainen <tss@iki.fi>
parents: 6950
diff changeset
164 /* data probably still contains some valid ASCII characters.
aee731c09668 If we encounter an unknown charset, get the ASCII (or UTF-8 actually)
Timo Sirainen <tss@iki.fi>
parents: 6950
diff changeset
165 append them. */
aee731c09668 If we encounter an unknown charset, get the ASCII (or UTF-8 actually)
Timo Sirainen <tss@iki.fi>
parents: 6950
diff changeset
166 if (uni_utf8_get_valid_data(data, size, ctx->dest))
aee731c09668 If we encounter an unknown charset, get the ASCII (or UTF-8 actually)
Timo Sirainen <tss@iki.fi>
parents: 6950
diff changeset
167 buffer_append(ctx->dest, data, size);
6118
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
168 return TRUE;
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
169 }
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
170
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
171 /* ignore any errors */
6122
d86581f4a0c6 charset_to_utf8() isn't used anymore, so renamed charset_to_utf8_full() to it.
Timo Sirainen <tss@iki.fi>
parents: 6121
diff changeset
172 (void)charset_to_utf8(t, data, &size, ctx->dest);
6118
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
173 charset_to_utf8_end(&t);
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
174 return TRUE;
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
175 }
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
176
14635
45952eee7ad4 lib-mail: message_header_decode_utf8() API changed to not give return value.
Timo Sirainen <tss@iki.fi>
parents: 14133
diff changeset
177 void message_header_decode_utf8(const unsigned char *data, size_t size,
15053
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 14682
diff changeset
178 buffer_t *dest, normalizer_func_t *normalizer)
6118
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
179 {
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
180 struct decode_utf8_context ctx;
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
181
21389
59437f8764c6 global: Replaced all instances of memset(p, 0, sizeof(*p)) with the new i_zero() macro.
Stephan Bosch <stephan.bosch@dovecot.fi>
parents: 21322
diff changeset
182 i_zero(&ctx);
6118
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
183 ctx.dest = dest;
15053
c976a9c01613 Replaced "decomposed titlecase" conversions with more generic normalizer function.
Timo Sirainen <tss@iki.fi>
parents: 14682
diff changeset
184 ctx.normalizer = normalizer;
6118
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
185 message_header_decode(data, size, decode_utf8_callback, &ctx);
841b052e269e Added message_header_decode_utf8(). Use it in message decoder instead of
Timo Sirainen <tss@iki.fi>
parents: 5517
diff changeset
186 }