comparison src/lib-index/mbox/mbox-append.c @ 22:a946ce1f09b7 HEAD

mbox fixes, not fully working yet but almost :)
author Timo Sirainen <tss@iki.fi>
date Sat, 24 Aug 2002 05:04:45 +0300
parents 82b7de533f98
children 55e09f36d23d
comparison
equal deleted inserted replaced
21:163675942b83 22:a946ce1f09b7
1 /* Copyright (C) 2002 Timo Sirainen */ 1 /* Copyright (C) 2002 Timo Sirainen */
2 2
3 #include "lib.h" 3 #include "lib.h"
4 #include "mmap-util.h"
5 #include "ioloop.h" 4 #include "ioloop.h"
5 #include "iobuffer.h"
6 #include "hex-binary.h"
7 #include "md5.h"
6 #include "mbox-index.h" 8 #include "mbox-index.h"
7 #include "mail-index-util.h" 9 #include "mail-index-util.h"
8
9 #include <time.h>
10 #include <ctype.h>
11 #include <unistd.h>
12 #include <sys/mman.h>
13
14 static const char *months[] = {
15 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
16 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
17 };
18 10
19 static MailIndexRecord * 11 static MailIndexRecord *
20 mail_index_record_append(MailIndex *index, time_t internal_date, 12 mail_index_record_append(MailIndex *index, time_t internal_date,
21 size_t full_virtual_size) 13 size_t full_virtual_size)
22 { 14 {
31 return NULL; 23 return NULL;
32 24
33 return rec; 25 return rec;
34 } 26 }
35 27
36 static time_t from_line_parse_date(const char *msg, size_t size) 28 static void mbox_read_message(IOBuffer *inbuf, unsigned int *virtual_size)
37 { 29 {
38 const char *msg_end; 30 unsigned char *msg;
39 struct tm tm; 31 unsigned int i, size, startpos, vsize;
40 int i; 32
41 33 /* read until "[\r]\nFrom " is found */
42 /* From <sender> <date> <moreinfo> */ 34 startpos = 0; vsize = 0;
43 if (strncmp(msg, "From ", 5) != 0) 35 while (io_buffer_read_data(inbuf, &msg, &size, startpos) >= 0) {
44 return 0; 36 for (i = startpos; i < size; i++) {
45 37 if (msg[i] == '\n') {
46 msg_end = msg + size; 38 if (i == 0 || msg[i-1] != '\r') {
47 39 /* missing CR */
48 /* skip sender */ 40 vsize++;
49 msg += 5; 41 }
50 while (*msg != ' ' && msg < msg_end) msg++; 42 } else if (msg[i] == ' ' && i >= 5) {
51 while (*msg == ' ' && msg < msg_end) msg++; 43 /* See if it's space after "From" */
52 44 if (msg[i-5] == '\n' && msg[i-4] == 'F' &&
53 /* next 24 chars are the date in asctime() format, 45 msg[i-3] == 'r' && msg[i-2] == 'o' &&
54 eg. "Thu Nov 29 22:33:52 2001" */ 46 msg[i-1] == 'm') {
55 if (msg+24 > msg_end) 47 /* yes, see if we had \r too */
56 return 0; 48 i -= 5;
57 49 if (i > 0 && msg[i-1] == '\r')
58 memset(&tm, 0, sizeof(tm)); 50 i--;
59 51 else
60 /* skip weekday */ 52 vsize--;
61 msg += 4; 53 break;
62 54 }
63 /* month */ 55 }
64 for (i = 0; i < 12; i++) { 56 }
65 if (strncasecmp(months[i], msg, 3) == 0) { 57
66 tm.tm_mon = i; 58 if (i < size) {
59 startpos = i;
67 break; 60 break;
68 } 61 }
69 } 62
70 63 if (i > 0) {
71 if (i == 12 || msg[3] != ' ') 64 startpos = i < 7 ? i : 7;
72 return 0; 65 i -= startpos;
73 msg += 4; 66
74 67 io_buffer_skip(inbuf, i);
75 /* day */ 68 vsize += i;
76 if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ' ') 69 }
77 return 0; 70 }
78 tm.tm_mday = (msg[0]-'0') * 10 + (msg[1]-'0'); 71
79 msg += 3; 72 io_buffer_skip(inbuf, startpos);
80 73 vsize += startpos;
81 /* hour */ 74
82 if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ':') 75 *virtual_size = vsize;
83 return 0; 76 }
84 tm.tm_hour = (msg[0]-'0') * 10 + (msg[1]-'0'); 77
85 msg += 3; 78 static int mbox_index_append_next(MailIndex *index, IOBuffer *inbuf)
86
87 /* minute */
88 if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ':')
89 return 0;
90 tm.tm_min = (msg[0]-'0') * 10 + (msg[1]-'0');
91 msg += 3;
92
93 /* second */
94 if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) || msg[2] != ' ')
95 return 0;
96 tm.tm_sec = (msg[0]-'0') * 10 + (msg[1]-'0');
97 msg += 3;
98
99 /* year */
100 if (!i_isdigit(msg[0]) || !i_isdigit(msg[1]) ||
101 !i_isdigit(msg[2]) || !i_isdigit(msg[3]))
102 return 0;
103 tm.tm_year = (msg[0]-'0') * 1000 + (msg[1]-'0') * 100 +
104 (msg[2]-'0') * 10 + (msg[3]-'0') - 1900;
105
106 tm.tm_isdst = -1;
107 return mktime(&tm);
108 }
109
110 static void header_func(MessagePart *part __attr_unused__,
111 const char *name, unsigned int name_len,
112 const char *value, unsigned int value_len,
113 void *context)
114 {
115 MailIndexRecord *rec = context;
116
117 rec->msg_flags |= mbox_header_get_flags(name, name_len,
118 value, value_len);
119 }
120
121 static int mbox_index_append_data(MailIndex *index, const char *msg,
122 off_t offset, size_t physical_size,
123 size_t virtual_size)
124 { 79 {
125 MailIndexRecord *rec; 80 MailIndexRecord *rec;
126 MailIndexUpdate *update; 81 MailIndexUpdate *update;
82 MboxHeaderContext ctx;
127 time_t internal_date; 83 time_t internal_date;
128 char location[MAX_INT_STRLEN]; 84 off_t start_offset, stop_offset, old_size;
129 unsigned int i; 85 unsigned char *data, md5_digest[16];
130 86 unsigned int size, pos, virtual_size;
131 internal_date = from_line_parse_date(msg, physical_size); 87 const char *location;
88
89 /* get the From-line */
90 pos = 0;
91 while (io_buffer_read_data(inbuf, &data, &size, pos) >= 0) {
92 for (; pos < size; pos++) {
93 if (data[pos] == '\n')
94 break;
95 }
96
97 if (pos < size)
98 break;
99 }
100
101 if (pos == size || size <= 5 || strncmp(data, "From ", 5) != 0) {
102 /* a) no \n found, or line too long
103 b) not a From-line */
104 index_set_error(index, "Error indexing mbox file %s: "
105 "From-line not found where expected",
106 index->mbox_path);
107 index->set_flags |= MAIL_INDEX_FLAG_FSCK;
108 return FALSE;
109 }
110
111 /* parse the From-line */
112 internal_date = mbox_from_parse_date(data, size);
132 if (internal_date <= 0) 113 if (internal_date <= 0)
133 internal_date = ioloop_time; 114 internal_date = ioloop_time;
134 115
135 /* skip the From-line */ 116 io_buffer_skip(inbuf, pos+1);
136 for (i = 0; i < physical_size; i++) { 117 start_offset = inbuf->offset;
137 if (msg[i] == '\n') { 118
138 i++; 119 /* now, find the ending "[\r]\nFrom " */
139 break; 120 mbox_read_message(inbuf, &virtual_size);
140 } 121 stop_offset = inbuf->offset;
141 } 122
142 123 /* add message to index */
143 if (i == physical_size)
144 return FALSE;
145
146 msg += i;
147 offset += i;
148 physical_size -= i;
149 virtual_size -= i;
150 if (i > 0 && msg[i-1] != '\r')
151 virtual_size--;
152
153 rec = mail_index_record_append(index, internal_date, virtual_size); 124 rec = mail_index_record_append(index, internal_date, virtual_size);
154 if (rec == NULL) 125 if (rec == NULL)
155 return FALSE; 126 return FALSE;
156 127
157 update = index->update_begin(index, rec); 128 update = index->update_begin(index, rec);
158 129
159 /* location = offset to beginning of message */ 130 /* location = offset to beginning of message */
160 i_snprintf(location, sizeof(location), "%lu", (unsigned long) offset); 131 location = binary_to_hex((unsigned char *) &start_offset,
132 sizeof(start_offset));
161 index->update_field(update, FIELD_TYPE_LOCATION, location, 0); 133 index->update_field(update, FIELD_TYPE_LOCATION, location, 0);
162 134
163 /* parse the header and cache wanted fields */ 135 /* parse the header and cache wanted fields. get the message flags
164 mail_index_update_headers(update, msg, physical_size, header_func, rec); 136 from Status and X-Status fields. temporarily limit the buffer size
137 so the message body is parsed properly (FIXME: does this have
138 side effects?) */
139 mbox_header_init_context(&ctx);
140
141 old_size = inbuf->size;
142 inbuf->size = stop_offset;
143 io_buffer_seek(inbuf, start_offset);
144
145 mail_index_update_headers(update, inbuf, mbox_header_func, &ctx);
146
147 inbuf->size = old_size;
148 io_buffer_seek(inbuf, stop_offset);
149
150 /* save message flags */
151 rec->msg_flags |= ctx.flags;
152
153 /* save MD5 */
154 md5_final(&ctx.md5, md5_digest);
155 index->update_field(update, FIELD_TYPE_MD5,
156 binary_to_hex(md5_digest, sizeof(md5_digest)), 0);
165 157
166 if (!index->update_end(update)) { 158 if (!index->update_end(update)) {
167 /* failed - delete the record */ 159 /* failed - delete the record */
168 (void)index->expunge(index, rec, 0, FALSE); 160 (void)index->expunge(index, rec, 0, FALSE);
169 return FALSE; 161 return FALSE;
170 } 162 }
171 163
172 return TRUE; 164 return TRUE;
173 } 165 }
174 166
175 int mbox_index_append_mmaped(MailIndex *index, const char *data, 167 int mbox_index_append(MailIndex *index, IOBuffer *inbuf)
176 size_t data_size, off_t start_offset) 168 {
177 { 169 if (inbuf->offset == inbuf->size) {
178 const char *data_start, *data_end, *start, *cr;
179 size_t size, vsize;
180 off_t pos;
181 int missing_cr_count;
182
183 /* we should start with "From ". if we don't, something's messed up
184 and we should check the whole file instead. */
185 if (strncmp(data, "From ", 5) != 0) {
186 index->set_flags |= MAIL_INDEX_FLAG_FSCK;
187 return FALSE;
188 }
189
190 /* each message ends at "\nFrom ". first get the size of the message,
191 then parse it. calculate the missing CR count as well. */
192 start = data; cr = NULL; missing_cr_count = 0;
193
194 data_start = data;
195 data_end = data + data_size;
196 for (; data != data_end; data++) {
197 if (*data == '\r')
198 cr = data;
199 else if (*data == '\n') {
200 if (cr != data-1)
201 missing_cr_count++;
202
203 if (data+6 < data_end && data[1] == 'F' &&
204 data[2] == 'r' && data[3] == 'o' &&
205 data[4] == 'm' && data[5] == ' ') {
206 /* end of message */
207 pos = (off_t) (start - data_start) +
208 start_offset;
209 size = (size_t) (data - start) + 1;
210 vsize = size + missing_cr_count;
211 if (!mbox_index_append_data(index, start, pos,
212 size, vsize))
213 return FALSE;
214
215 missing_cr_count = 0;
216 start = data+1;
217 }
218 }
219 }
220
221 /* last message */
222 pos = (off_t) (start - data_start);
223 size = (size_t) (data - start);
224 vsize = size + missing_cr_count;
225 return mbox_index_append_data(index, start, pos, size, vsize);
226 }
227
228 int mbox_index_append(MailIndex *index, int fd, const char *path)
229 {
230 void *mmap_base;
231 size_t mmap_length;
232 off_t pos, end_pos;
233 int ret;
234
235 /* get our current position */
236 pos = lseek(fd, 0, SEEK_CUR);
237
238 /* get the size of the file */
239 end_pos = lseek(fd, 0, SEEK_END);
240
241 if (pos == -1 || end_pos == -1) {
242 index_set_error(index, "lseek() failed with mbox file %s: %m",
243 path);
244 return FALSE;
245 }
246
247 if (pos == end_pos) {
248 /* no new data */ 170 /* no new data */
249 return TRUE; 171 return TRUE;
250 } 172 }
251 173
252 if (!index->set_lock(index, MAIL_LOCK_EXCLUSIVE)) 174 if (!index->set_lock(index, MAIL_LOCK_EXCLUSIVE))
253 return FALSE; 175 return FALSE;
254 176
255 /* mmap() the file */ 177 for (;;) {
256 mmap_length = end_pos-pos; 178 if (inbuf->offset != 0) {
257 mmap_base = mmap(NULL, mmap_length, PROT_READ, MAP_SHARED, fd, pos); 179 /* we're at the [\r]\n before the From-line,
258 if (mmap_base == MAP_FAILED) { 180 skip it */
259 index_set_error(index, "mmap() failed with mbox file %s: %m", 181 if (!mbox_skip_crlf(inbuf)) {
260 path); 182 index_set_error(index,
261 return FALSE; 183 "Error indexing mbox file %s: "
262 } 184 "LF not found where expected",
263 185 index->mbox_path);
264 (void)madvise(mmap_base, mmap_length, MADV_SEQUENTIAL); 186
265 187 index->set_flags |= MAIL_INDEX_FLAG_FSCK;
266 ret = mbox_index_append_mmaped(index, mmap_base, mmap_length, pos); 188 return FALSE;
267 (void)munmap(mmap_base, mmap_length); 189 }
268 return ret; 190 }
269 } 191
192 if (inbuf->offset == inbuf->size)
193 break;
194
195 if (!mbox_index_append_next(index, inbuf))
196 return FALSE;
197 }
198
199 return TRUE;
200 }