Mercurial > dovecot > original-hg > dovecot-1.2
comparison src/lib-index/mbox/mbox-append.c @ 22:a946ce1f09b7 HEAD
mbox fixes, not fully working yet but almost :)
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 24 Aug 2002 05:04:45 +0300 |
parents | 82b7de533f98 |
children | 55e09f36d23d |
comparison
equal
deleted
inserted
replaced
21:163675942b83 | 22:a946ce1f09b7 |
---|---|
1 /* Copyright (C) 2002 Timo Sirainen */ | 1 /* Copyright (C) 2002 Timo Sirainen */ |
2 | 2 |
3 #include "lib.h" | 3 #include "lib.h" |
4 #include "mmap-util.h" | |
5 #include "ioloop.h" | 4 #include "ioloop.h" |
5 #include "iobuffer.h" | |
6 #include "hex-binary.h" | |
7 #include "md5.h" | |
6 #include "mbox-index.h" | 8 #include "mbox-index.h" |
7 #include "mail-index-util.h" | 9 #include "mail-index-util.h" |
8 | |
9 #include <time.h> | |
10 #include <ctype.h> | |
11 #include <unistd.h> | |
12 #include <sys/mman.h> | |
13 | |
14 static const char *months[] = { | |
15 "Jan", "Feb", "Mar", "Apr", "May", "Jun", | |
16 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" | |
17 }; | |
18 | 10 |
19 static MailIndexRecord * | 11 static MailIndexRecord * |
20 mail_index_record_append(MailIndex *index, time_t internal_date, | 12 mail_index_record_append(MailIndex *index, time_t internal_date, |
21 size_t full_virtual_size) | 13 size_t full_virtual_size) |
22 { | 14 { |
31 return NULL; | 23 return NULL; |
32 | 24 |
33 return rec; | 25 return rec; |
34 } | 26 } |
35 | 27 |
36 static time_t from_line_parse_date(const char *msg, size_t size) | 28 static void mbox_read_message(IOBuffer *inbuf, unsigned int *virtual_size) |
37 { | 29 { |
38 const char *msg_end; | 30 unsigned char *msg; |
39 struct tm tm; | 31 unsigned int i, size, startpos, vsize; |
40 int i; | 32 |
41 | 33 /* read until "[\r]\nFrom " is found */ |
42 /* From <sender> <date> <moreinfo> */ | 34 startpos = 0; vsize = 0; |
43 if (strncmp(msg, "From ", 5) != 0) | 35 while (io_buffer_read_data(inbuf, &msg, &size, startpos) >= 0) { |
44 return 0; | 36 for (i = startpos; i < size; i++) { |
45 | 37 if (msg[i] == '\n') { |
46 msg_end = msg + size; | 38 if (i == 0 || msg[i-1] != '\r') { |
47 | 39 /* missing CR */ |
48 /* skip sender */ | 40 vsize++; |
49 msg += 5; | 41 } |
50 while (*msg != ' ' && msg < msg_end) msg++; | 42 } else if (msg[i] == ' ' && i >= 5) { |
51 while (*msg == ' ' && msg < msg_end) msg++; | 43 /* See if it's space after "From" */ |
52 | 44 if (msg[i-5] == '\n' && msg[i-4] == 'F' && |
53 /* next 24 chars are the date in asctime() format, | 45 msg[i-3] == 'r' && msg[i-2] == 'o' && |
54 eg. "Thu Nov 29 22:33:52 2001" */ | 46 msg[i-1] == 'm') { |
55 if (msg+24 > msg_end) | 47 /* yes, see if we had \r too */ |
56 return 0; | 48 i -= 5; |
57 | 49 if (i > 0 && msg[i-1] == '\r') |
58 memset(&tm, 0, sizeof(tm)); | 50 i--; |
59 | 51 else |
60 /* skip weekday */ | 52 vsize--; |
61 msg += 4; | 53 break; |
62 | 54 } |
63 /* month */ | 55 } |
64 for (i = 0; i < 12; i++) { | 56 } |
65 if (strncasecmp(months[i], msg, 3) == 0) { | 57 |
66 tm.tm_mon = i; | 58 if (i < size) { |
59 startpos = i; | |
67 break; | 60 break; |
68 } | 61 } |
69 } | 62 |
70 | 63 if (i > 0) { |
71 if (i == 12 || msg[3] != ' ') | 64 startpos = i < 7 ? i : 7; |
72 return 0; | 65 i -= startpos; |
73 msg += 4; | 66 |
74 | 67 io_buffer_skip(inbuf, i); |
75 /* day */ | 68 vsize += i; |
76 if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ' ') | 69 } |
77 return 0; | 70 } |
78 tm.tm_mday = (msg[0]-'0') * 10 + (msg[1]-'0'); | 71 |
79 msg += 3; | 72 io_buffer_skip(inbuf, startpos); |
80 | 73 vsize += startpos; |
81 /* hour */ | 74 |
82 if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ':') | 75 *virtual_size = vsize; |
83 return 0; | 76 } |
84 tm.tm_hour = (msg[0]-'0') * 10 + (msg[1]-'0'); | 77 |
85 msg += 3; | 78 static int mbox_index_append_next(MailIndex *index, IOBuffer *inbuf) |
86 | |
87 /* minute */ | |
88 if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ':') | |
89 return 0; | |
90 tm.tm_min = (msg[0]-'0') * 10 + (msg[1]-'0'); | |
91 msg += 3; | |
92 | |
93 /* second */ | |
94 if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ' ') | |
95 return 0; | |
96 tm.tm_sec = (msg[0]-'0') * 10 + (msg[1]-'0'); | |
97 msg += 3; | |
98 | |
99 /* year */ | |
100 if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || | |
101 !i_isdigit(msg[2]) || !i_isdigit(msg[3])) | |
102 return 0; | |
103 tm.tm_year = (msg[0]-'0') * 1000 + (msg[1]-'0') * 100 + | |
104 (msg[2]-'0') * 10 + (msg[3]-'0') - 1900; | |
105 | |
106 tm.tm_isdst = -1; | |
107 return mktime(&tm); | |
108 } | |
109 | |
110 static void header_func(MessagePart *part __attr_unused__, | |
111 const char *name, unsigned int name_len, | |
112 const char *value, unsigned int value_len, | |
113 void *context) | |
114 { | |
115 MailIndexRecord *rec = context; | |
116 | |
117 rec->msg_flags |= mbox_header_get_flags(name, name_len, | |
118 value, value_len); | |
119 } | |
120 | |
121 static int mbox_index_append_data(MailIndex *index, const char *msg, | |
122 off_t offset, size_t physical_size, | |
123 size_t virtual_size) | |
124 { | 79 { |
125 MailIndexRecord *rec; | 80 MailIndexRecord *rec; |
126 MailIndexUpdate *update; | 81 MailIndexUpdate *update; |
82 MboxHeaderContext ctx; | |
127 time_t internal_date; | 83 time_t internal_date; |
128 char location[MAX_INT_STRLEN]; | 84 off_t start_offset, stop_offset, old_size; |
129 unsigned int i; | 85 unsigned char *data, md5_digest[16]; |
130 | 86 unsigned int size, pos, virtual_size; |
131 internal_date = from_line_parse_date(msg, physical_size); | 87 const char *location; |
88 | |
89 /* get the From-line */ | |
90 pos = 0; | |
91 while (io_buffer_read_data(inbuf, &data, &size, pos) >= 0) { | |
92 for (; pos < size; pos++) { | |
93 if (data[pos] == '\n') | |
94 break; | |
95 } | |
96 | |
97 if (pos < size) | |
98 break; | |
99 } | |
100 | |
101 if (pos == size || size <= 5 || strncmp(data, "From ", 5) != 0) { | |
102 /* a) no \n found, or line too long | |
103 b) not a From-line */ | |
104 index_set_error(index, "Error indexing mbox file %s: " | |
105 "From-line not found where expected", | |
106 index->mbox_path); | |
107 index->set_flags |= MAIL_INDEX_FLAG_FSCK; | |
108 return FALSE; | |
109 } | |
110 | |
111 /* parse the From-line */ | |
112 internal_date = mbox_from_parse_date(data, size); | |
132 if (internal_date <= 0) | 113 if (internal_date <= 0) |
133 internal_date = ioloop_time; | 114 internal_date = ioloop_time; |
134 | 115 |
135 /* skip the From-line */ | 116 io_buffer_skip(inbuf, pos+1); |
136 for (i = 0; i < physical_size; i++) { | 117 start_offset = inbuf->offset; |
137 if (msg[i] == '\n') { | 118 |
138 i++; | 119 /* now, find the ending "[\r]\nFrom " */ |
139 break; | 120 mbox_read_message(inbuf, &virtual_size); |
140 } | 121 stop_offset = inbuf->offset; |
141 } | 122 |
142 | 123 /* add message to index */ |
143 if (i == physical_size) | |
144 return FALSE; | |
145 | |
146 msg += i; | |
147 offset += i; | |
148 physical_size -= i; | |
149 virtual_size -= i; | |
150 if (i > 0 && msg[i-1] != '\r') | |
151 virtual_size--; | |
152 | |
153 rec = mail_index_record_append(index, internal_date, virtual_size); | 124 rec = mail_index_record_append(index, internal_date, virtual_size); |
154 if (rec == NULL) | 125 if (rec == NULL) |
155 return FALSE; | 126 return FALSE; |
156 | 127 |
157 update = index->update_begin(index, rec); | 128 update = index->update_begin(index, rec); |
158 | 129 |
159 /* location = offset to beginning of message */ | 130 /* location = offset to beginning of message */ |
160 i_snprintf(location, sizeof(location), "%lu", (unsigned long) offset); | 131 location = binary_to_hex((unsigned char *) &start_offset, |
132 sizeof(start_offset)); | |
161 index->update_field(update, FIELD_TYPE_LOCATION, location, 0); | 133 index->update_field(update, FIELD_TYPE_LOCATION, location, 0); |
162 | 134 |
163 /* parse the header and cache wanted fields */ | 135 /* parse the header and cache wanted fields. get the message flags |
164 mail_index_update_headers(update, msg, physical_size, header_func, rec); | 136 from Status and X-Status fields. temporarily limit the buffer size |
137 so the message body is parsed properly (FIXME: does this have | |
138 side effects?) */ | |
139 mbox_header_init_context(&ctx); | |
140 | |
141 old_size = inbuf->size; | |
142 inbuf->size = stop_offset; | |
143 io_buffer_seek(inbuf, start_offset); | |
144 | |
145 mail_index_update_headers(update, inbuf, mbox_header_func, &ctx); | |
146 | |
147 inbuf->size = old_size; | |
148 io_buffer_seek(inbuf, stop_offset); | |
149 | |
150 /* save message flags */ | |
151 rec->msg_flags |= ctx.flags; | |
152 | |
153 /* save MD5 */ | |
154 md5_final(&ctx.md5, md5_digest); | |
155 index->update_field(update, FIELD_TYPE_MD5, | |
156 binary_to_hex(md5_digest, sizeof(md5_digest)), 0); | |
165 | 157 |
166 if (!index->update_end(update)) { | 158 if (!index->update_end(update)) { |
167 /* failed - delete the record */ | 159 /* failed - delete the record */ |
168 (void)index->expunge(index, rec, 0, FALSE); | 160 (void)index->expunge(index, rec, 0, FALSE); |
169 return FALSE; | 161 return FALSE; |
170 } | 162 } |
171 | 163 |
172 return TRUE; | 164 return TRUE; |
173 } | 165 } |
174 | 166 |
175 int mbox_index_append_mmaped(MailIndex *index, const char *data, | 167 int mbox_index_append(MailIndex *index, IOBuffer *inbuf) |
176 size_t data_size, off_t start_offset) | 168 { |
177 { | 169 if (inbuf->offset == inbuf->size) { |
178 const char *data_start, *data_end, *start, *cr; | |
179 size_t size, vsize; | |
180 off_t pos; | |
181 int missing_cr_count; | |
182 | |
183 /* we should start with "From ". if we don't, something's messed up | |
184 and we should check the whole file instead. */ | |
185 if (strncmp(data, "From ", 5) != 0) { | |
186 index->set_flags |= MAIL_INDEX_FLAG_FSCK; | |
187 return FALSE; | |
188 } | |
189 | |
190 /* each message ends at "\nFrom ". first get the size of the message, | |
191 then parse it. calculate the missing CR count as well. */ | |
192 start = data; cr = NULL; missing_cr_count = 0; | |
193 | |
194 data_start = data; | |
195 data_end = data + data_size; | |
196 for (; data != data_end; data++) { | |
197 if (*data == '\r') | |
198 cr = data; | |
199 else if (*data == '\n') { | |
200 if (cr != data-1) | |
201 missing_cr_count++; | |
202 | |
203 if (data+6 < data_end && data[1] == 'F' && | |
204 data[2] == 'r' && data[3] == 'o' && | |
205 data[4] == 'm' && data[5] == ' ') { | |
206 /* end of message */ | |
207 pos = (off_t) (start - data_start) + | |
208 start_offset; | |
209 size = (size_t) (data - start) + 1; | |
210 vsize = size + missing_cr_count; | |
211 if (!mbox_index_append_data(index, start, pos, | |
212 size, vsize)) | |
213 return FALSE; | |
214 | |
215 missing_cr_count = 0; | |
216 start = data+1; | |
217 } | |
218 } | |
219 } | |
220 | |
221 /* last message */ | |
222 pos = (off_t) (start - data_start); | |
223 size = (size_t) (data - start); | |
224 vsize = size + missing_cr_count; | |
225 return mbox_index_append_data(index, start, pos, size, vsize); | |
226 } | |
227 | |
228 int mbox_index_append(MailIndex *index, int fd, const char *path) | |
229 { | |
230 void *mmap_base; | |
231 size_t mmap_length; | |
232 off_t pos, end_pos; | |
233 int ret; | |
234 | |
235 /* get our current position */ | |
236 pos = lseek(fd, 0, SEEK_CUR); | |
237 | |
238 /* get the size of the file */ | |
239 end_pos = lseek(fd, 0, SEEK_END); | |
240 | |
241 if (pos == -1 || end_pos == -1) { | |
242 index_set_error(index, "lseek() failed with mbox file %s: %m", | |
243 path); | |
244 return FALSE; | |
245 } | |
246 | |
247 if (pos == end_pos) { | |
248 /* no new data */ | 170 /* no new data */ |
249 return TRUE; | 171 return TRUE; |
250 } | 172 } |
251 | 173 |
252 if (!index->set_lock(index, MAIL_LOCK_EXCLUSIVE)) | 174 if (!index->set_lock(index, MAIL_LOCK_EXCLUSIVE)) |
253 return FALSE; | 175 return FALSE; |
254 | 176 |
255 /* mmap() the file */ | 177 for (;;) { |
256 mmap_length = end_pos-pos; | 178 if (inbuf->offset != 0) { |
257 mmap_base = mmap(NULL, mmap_length, PROT_READ, MAP_SHARED, fd, pos); | 179 /* we're at the [\r]\n before the From-line, |
258 if (mmap_base == MAP_FAILED) { | 180 skip it */ |
259 index_set_error(index, "mmap() failed with mbox file %s: %m", | 181 if (!mbox_skip_crlf(inbuf)) { |
260 path); | 182 index_set_error(index, |
261 return FALSE; | 183 "Error indexing mbox file %s: " |
262 } | 184 "LF not found where expected", |
263 | 185 index->mbox_path); |
264 (void)madvise(mmap_base, mmap_length, MADV_SEQUENTIAL); | 186 |
265 | 187 index->set_flags |= MAIL_INDEX_FLAG_FSCK; |
266 ret = mbox_index_append_mmaped(index, mmap_base, mmap_length, pos); | 188 return FALSE; |
267 (void)munmap(mmap_base, mmap_length); | 189 } |
268 return ret; | 190 } |
269 } | 191 |
192 if (inbuf->offset == inbuf->size) | |
193 break; | |
194 | |
195 if (!mbox_index_append_next(index, inbuf)) | |
196 return FALSE; | |
197 } | |
198 | |
199 return TRUE; | |
200 } |