Mercurial > dovecot > original-hg > dovecot-1.2
annotate src/lib-imap/imap-base-subject.c @ 9532:00cd9aacd03c HEAD
Updated copyright notices to include year 2010.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Mon, 25 Jan 2010 01:18:58 +0200 |
parents | b9faf4db2a9f |
children |
rev | line source |
---|---|
9532
00cd9aacd03c
Updated copyright notices to include year 2010.
Timo Sirainen <tss@iki.fi>
parents:
8590
diff
changeset
|
1 /* Copyright (c) 2002-2010 Dovecot authors, see the included COPYING file */ |
793 | 2 |
924
4f697dde0fca
THREAD=REFERENCES implementation. Doesn't crash, but I'm not sure how
Timo Sirainen <tss@iki.fi>
parents:
903
diff
changeset
|
3 /* Implementated against draft-ietf-imapext-sort-10 and |
4f697dde0fca
THREAD=REFERENCES implementation. Doesn't crash, but I'm not sure how
Timo Sirainen <tss@iki.fi>
parents:
903
diff
changeset
|
4 draft-ietf-imapext-thread-12 */ |
4f697dde0fca
THREAD=REFERENCES implementation. Doesn't crash, but I'm not sure how
Timo Sirainen <tss@iki.fi>
parents:
903
diff
changeset
|
5 |
793 | 6 #include "lib.h" |
7 #include "buffer.h" | |
8 #include "charset-utf8.h" | |
9 #include "message-header-decode.h" | |
10 #include "imap-base-subject.h" | |
11 | |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
12 static void pack_whitespace(buffer_t *buf) |
793 | 13 { |
14 char *data, *dest; | |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
15 bool last_lwsp; |
793 | 16 |
4451
1a35d53c18fc
Array API redesigned to work using unions. It now provides type safety
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
17 data = buffer_get_modifiable_data(buf, NULL); |
793 | 18 |
19 /* check if we need to do anything */ | |
20 while (*data != '\0') { | |
985 | 21 if (*data == '\t' || *data == '\n' || *data == '\r' || |
793 | 22 (*data == ' ' && (data[1] == ' ' || data[1] == '\t'))) |
23 break; | |
24 data++; | |
25 } | |
26 | |
27 if (*data == '\0') | |
28 return; | |
29 | |
30 /* @UNSAFE: convert/pack the whitespace */ | |
31 dest = data; last_lwsp = FALSE; | |
32 while (*data != '\0') { | |
985 | 33 if (*data == '\t' || *data == ' ' || |
34 *data == '\r' || *data == '\n') { | |
793 | 35 if (!last_lwsp) { |
36 *dest++ = ' '; | |
37 last_lwsp = TRUE; | |
38 } | |
39 } else { | |
40 *dest++ = *data; | |
41 last_lwsp = FALSE; | |
42 } | |
43 data++; | |
44 } | |
45 *dest = '\0'; | |
46 | |
4451
1a35d53c18fc
Array API redesigned to work using unions. It now provides type safety
Timo Sirainen <tss@iki.fi>
parents:
3879
diff
changeset
|
47 data = buffer_get_modifiable_data(buf, NULL); |
793 | 48 buffer_set_used_size(buf, (size_t) (dest - data)+1); |
49 } | |
50 | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
51 static void remove_subj_trailers(buffer_t *buf, size_t start_pos, |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
52 bool *is_reply_or_forward_r) |
793 | 53 { |
54 const char *data; | |
55 size_t orig_size, size; | |
56 | |
57 /* subj-trailer = "(fwd)" / WSP */ | |
58 data = buffer_get_data(buf, &orig_size); | |
59 | |
7856
3c8736e8f08d
imap_get_base_subject_cased(): Subject trailers weren't removed correctly.
Timo Sirainen <tss@iki.fi>
parents:
7086
diff
changeset
|
60 if (orig_size < 1) /* size includes trailing \0 */ |
793 | 61 return; |
62 | |
7856
3c8736e8f08d
imap_get_base_subject_cased(): Subject trailers weren't removed correctly.
Timo Sirainen <tss@iki.fi>
parents:
7086
diff
changeset
|
63 for (size = orig_size-1; size > start_pos; ) { |
3c8736e8f08d
imap_get_base_subject_cased(): Subject trailers weren't removed correctly.
Timo Sirainen <tss@iki.fi>
parents:
7086
diff
changeset
|
64 if (data[size-1] == ' ') |
793 | 65 size--; |
924
4f697dde0fca
THREAD=REFERENCES implementation. Doesn't crash, but I'm not sure how
Timo Sirainen <tss@iki.fi>
parents:
903
diff
changeset
|
66 else if (size >= 5 && |
7856
3c8736e8f08d
imap_get_base_subject_cased(): Subject trailers weren't removed correctly.
Timo Sirainen <tss@iki.fi>
parents:
7086
diff
changeset
|
67 memcmp(data + size - 5, "(FWD)", 5) == 0) { |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
68 if (is_reply_or_forward_r != NULL) |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
69 *is_reply_or_forward_r = TRUE; |
793 | 70 size -= 5; |
924
4f697dde0fca
THREAD=REFERENCES implementation. Doesn't crash, but I'm not sure how
Timo Sirainen <tss@iki.fi>
parents:
903
diff
changeset
|
71 } else { |
793 | 72 break; |
924
4f697dde0fca
THREAD=REFERENCES implementation. Doesn't crash, but I'm not sure how
Timo Sirainen <tss@iki.fi>
parents:
903
diff
changeset
|
73 } |
793 | 74 } |
75 | |
7856
3c8736e8f08d
imap_get_base_subject_cased(): Subject trailers weren't removed correctly.
Timo Sirainen <tss@iki.fi>
parents:
7086
diff
changeset
|
76 if (size != orig_size-1) { |
793 | 77 buffer_set_used_size(buf, size); |
78 buffer_append_c(buf, '\0'); | |
79 } | |
80 } | |
81 | |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
82 static bool remove_blob(const char **datap) |
793 | 83 { |
84 const char *data = *datap; | |
85 | |
86 if (*data != '[') | |
87 return FALSE; | |
88 | |
959
f66455b629cd
Blobs weren't removed correctly, resulting in incorrect SORT SUBJECT and
Timo Sirainen <tss@iki.fi>
parents:
924
diff
changeset
|
89 data++; |
793 | 90 while (*data != '\0' && *data != '[' && *data != ']') |
91 data++; | |
92 | |
93 if (*data != ']') | |
94 return FALSE; | |
95 | |
96 data++; | |
97 if (*data == ' ') | |
98 data++; | |
99 | |
100 *datap = data; | |
101 return TRUE; | |
102 } | |
103 | |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
104 static bool remove_subj_leader(buffer_t *buf, size_t *start_pos, |
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
105 bool *is_reply_or_forward_r) |
793 | 106 { |
107 const char *data, *orig_data; | |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
108 bool ret = FALSE; |
793 | 109 |
110 /* subj-leader = (*subj-blob subj-refwd) / WSP | |
111 | |
112 subj-blob = "[" *BLOBCHAR "]" *WSP | |
113 subj-refwd = ("re" / ("fw" ["d"])) *WSP [subj-blob] ":" | |
114 | |
115 BLOBCHAR = %x01-5a / %x5c / %x5e-7f | |
116 ; any CHAR except '[' and ']' */ | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
117 orig_data = buffer_get_data(buf, NULL); |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
118 orig_data += *start_pos; |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
119 data = orig_data; |
793 | 120 |
121 if (*data == ' ') { | |
122 /* independent from checks below - always removed */ | |
2526 | 123 data++; orig_data++; |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
124 *start_pos += 1; |
793 | 125 ret = TRUE; |
126 } | |
127 | |
128 while (*data == '[') { | |
129 if (!remove_blob(&data)) | |
130 return ret; | |
131 } | |
132 | |
7856
3c8736e8f08d
imap_get_base_subject_cased(): Subject trailers weren't removed correctly.
Timo Sirainen <tss@iki.fi>
parents:
7086
diff
changeset
|
133 if (strncmp(data, "RE", 2) == 0) |
793 | 134 data += 2; |
7856
3c8736e8f08d
imap_get_base_subject_cased(): Subject trailers weren't removed correctly.
Timo Sirainen <tss@iki.fi>
parents:
7086
diff
changeset
|
135 else if (strncmp(data, "FWD", 3) == 0) |
793 | 136 data += 3; |
7856
3c8736e8f08d
imap_get_base_subject_cased(): Subject trailers weren't removed correctly.
Timo Sirainen <tss@iki.fi>
parents:
7086
diff
changeset
|
137 else if (strncmp(data, "FW", 2) == 0) |
793 | 138 data += 2; |
139 else | |
140 return ret; | |
141 | |
142 if (*data == ' ') | |
143 data++; | |
144 | |
145 if (*data == '[' && !remove_blob(&data)) | |
146 return ret; | |
147 | |
148 if (*data != ':') | |
149 return ret; | |
150 | |
151 data++; | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
152 *start_pos += (size_t)(data - orig_data); |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
153 if (is_reply_or_forward_r != NULL) |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
154 *is_reply_or_forward_r = TRUE; |
793 | 155 return TRUE; |
156 } | |
157 | |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
158 static bool remove_blob_when_nonempty(buffer_t *buf, size_t *start_pos) |
793 | 159 { |
160 const char *data, *orig_data; | |
161 | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
162 orig_data = buffer_get_data(buf, NULL); |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
163 orig_data += *start_pos; |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
164 data = orig_data; |
793 | 165 if (*data == '[' && remove_blob(&data) && *data != '\0') { |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
166 *start_pos += (size_t)(data - orig_data); |
793 | 167 return TRUE; |
168 } | |
169 | |
170 return FALSE; | |
171 } | |
172 | |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
173 static bool remove_subj_fwd_hdr(buffer_t *buf, size_t *start_pos, |
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
174 bool *is_reply_or_forward_r) |
793 | 175 { |
176 const char *data; | |
177 size_t size; | |
178 | |
179 /* subj-fwd = subj-fwd-hdr subject subj-fwd-trl | |
180 subj-fwd-hdr = "[fwd:" | |
181 subj-fwd-trl = "]" */ | |
182 data = buffer_get_data(buf, &size); | |
183 | |
7856
3c8736e8f08d
imap_get_base_subject_cased(): Subject trailers weren't removed correctly.
Timo Sirainen <tss@iki.fi>
parents:
7086
diff
changeset
|
184 if (strncmp(data + *start_pos, "[FWD:", 5) != 0) |
793 | 185 return FALSE; |
186 | |
187 if (data[size-2] != ']') | |
188 return FALSE; | |
189 | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
190 if (is_reply_or_forward_r != NULL) |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
191 *is_reply_or_forward_r = TRUE; |
924
4f697dde0fca
THREAD=REFERENCES implementation. Doesn't crash, but I'm not sure how
Timo Sirainen <tss@iki.fi>
parents:
903
diff
changeset
|
192 |
793 | 193 buffer_set_used_size(buf, size-2); |
194 buffer_append_c(buf, '\0'); | |
195 | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
196 *start_pos += 5; |
793 | 197 return TRUE; |
198 } | |
199 | |
924
4f697dde0fca
THREAD=REFERENCES implementation. Doesn't crash, but I'm not sure how
Timo Sirainen <tss@iki.fi>
parents:
903
diff
changeset
|
200 const char *imap_get_base_subject_cased(pool_t pool, const char *subject, |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
201 bool *is_reply_or_forward_r) |
793 | 202 { |
903
fd8888f6f037
Naming style changes, finally got tired of most of the typedefs. Also the
Timo Sirainen <tss@iki.fi>
parents:
898
diff
changeset
|
203 buffer_t *buf; |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
204 size_t start_pos, subject_len; |
3863
55df57c028d4
Added "bool" type and changed all ints that were used as booleans to bool.
Timo Sirainen <tss@iki.fi>
parents:
2708
diff
changeset
|
205 bool found; |
793 | 206 |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
207 if (is_reply_or_forward_r != NULL) |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
208 *is_reply_or_forward_r = FALSE; |
924
4f697dde0fca
THREAD=REFERENCES implementation. Doesn't crash, but I'm not sure how
Timo Sirainen <tss@iki.fi>
parents:
903
diff
changeset
|
209 |
793 | 210 subject_len = strlen(subject); |
2708
f1e9f3ec8135
Buffer API change: we no longer support limited sized buffers where
Timo Sirainen <tss@iki.fi>
parents:
2526
diff
changeset
|
211 buf = buffer_create_dynamic(pool, subject_len); |
793 | 212 |
213 /* (1) Convert any RFC 2047 encoded-words in the subject to | |
214 UTF-8. Convert all tabs and continuations to space. | |
215 Convert all multiple spaces to a single space. */ | |
6119
9607369b6bce
Use message_header_decode_utf8() instead of implementing our own.
Timo Sirainen <tss@iki.fi>
parents:
6112
diff
changeset
|
216 message_header_decode_utf8((const unsigned char *)subject, subject_len, |
9607369b6bce
Use message_header_decode_utf8() instead of implementing our own.
Timo Sirainen <tss@iki.fi>
parents:
6112
diff
changeset
|
217 buf, TRUE); |
793 | 218 buffer_append_c(buf, '\0'); |
219 | |
220 pack_whitespace(buf); | |
221 | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
222 start_pos = 0; |
793 | 223 do { |
224 /* (2) Remove all trailing text of the subject that matches | |
225 the subj-trailer ABNF, repeat until no more matches are | |
226 possible. */ | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
227 remove_subj_trailers(buf, start_pos, is_reply_or_forward_r); |
793 | 228 |
229 do { | |
230 /* (3) Remove all prefix text of the subject that | |
231 matches the subj-leader ABNF. */ | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
232 found = remove_subj_leader(buf, &start_pos, |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
233 is_reply_or_forward_r); |
793 | 234 |
235 /* (4) If there is prefix text of the subject that | |
236 matches the subj-blob ABNF, and removing that prefix | |
237 leaves a non-empty subj-base, then remove the prefix | |
238 text. */ | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
239 found = remove_blob_when_nonempty(buf, &start_pos) || |
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
240 found; |
793 | 241 |
242 /* (5) Repeat (3) and (4) until no matches remain. */ | |
243 } while (found); | |
244 | |
245 /* (6) If the resulting text begins with the subj-fwd-hdr ABNF | |
246 and ends with the subj-fwd-trl ABNF, remove the | |
247 subj-fwd-hdr and subj-fwd-trl and repeat from step (2). */ | |
2411
c8fa857c4e08
Drop using buffer_set_start_pos(). Also some coding style cleanups.
Timo Sirainen <tss@iki.fi>
parents:
1797
diff
changeset
|
248 } while (remove_subj_fwd_hdr(buf, &start_pos, is_reply_or_forward_r)); |
793 | 249 |
250 /* (7) The resulting text is the "base subject" used in the | |
251 SORT. */ | |
7856
3c8736e8f08d
imap_get_base_subject_cased(): Subject trailers weren't removed correctly.
Timo Sirainen <tss@iki.fi>
parents:
7086
diff
changeset
|
252 return (const char *)buf->data + start_pos; |
793 | 253 } |