Mercurial > dovecot > original-hg > dovecot-1.2
changeset 2146:3a33250e6a2d HEAD
Complain if file isn't in mbox format. Complain if From-line wasn't found
from expected location. Parser should handle now correctly any kind of mbox
no matter how corrupted.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Wed, 16 Jun 2004 08:38:23 +0300 |
parents | fd00d1b2fd14 |
children | 8a3e09139b4c |
files | src/lib-storage/index/mbox/istream-raw-mbox.c src/lib-storage/index/mbox/istream-raw-mbox.h src/lib-storage/index/mbox/mbox-mail.c src/lib-storage/index/mbox/mbox-sync-rewrite.c src/lib-storage/index/mbox/mbox-sync-update.c src/lib-storage/index/mbox/mbox-sync.c |
diffstat | 6 files changed, 172 insertions(+), 96 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-storage/index/mbox/istream-raw-mbox.c Wed Jun 16 08:36:59 2004 +0300 +++ b/src/lib-storage/index/mbox/istream-raw-mbox.c Wed Jun 16 08:38:23 2004 +0300 @@ -14,6 +14,11 @@ uoff_t from_offset, hdr_offset, body_offset, mail_size; struct istream *input; + uoff_t input_peak_offset; + + unsigned int corrupted:1; + unsigned int eom:1; + unsigned int next_eof:1; }; static void _close(struct _iostream *stream __attr_unused__) @@ -93,6 +98,7 @@ /* we'll skip over From-line */ rstream->istream.istream.v_offset += line_pos+1; + i_stream_skip(rstream->input, line_pos+1); rstream->hdr_offset = rstream->istream.istream.v_offset; return 0; } @@ -105,8 +111,19 @@ const char *fromp; char *sender, eoh_char; time_t received_time; - size_t i, pos, new_pos; - ssize_t ret; + size_t i, pos, new_pos, from_start_pos; + ssize_t ret = 0; + + if (rstream->eom) { + if (rstream->body_offset == (uoff_t)-1) { + /* missing \n from headers */ + rstream->body_offset = + stream->istream.v_offset + + (stream->pos - stream->skip); + } + stream->istream.eof = rstream->next_eof; + return -1; + } i_stream_seek(rstream->input, stream->istream.v_offset); @@ -117,90 +134,90 @@ do { ret = i_stream_read(rstream->input); buf = i_stream_get_data(rstream->input, &pos); - } while (ret > 0 && pos <= 6); + } while (ret > 0 && + stream->istream.v_offset + pos <= rstream->input_peak_offset); + + if (ret < 0) { + if (ret == -2) + return -2; + + /* we've read the whole file, final byte should be + the \n trailer */ + if (pos > 0 && buf[pos-1] == '\n') + pos--; - if (pos == 0 || (pos == 1 && buf[0] == '\n')) { - /* EOF */ - stream->pos = 0; - stream->istream.eof = TRUE; - rstream->mail_size = stream->istream.v_offset - + i_assert(pos >= stream->pos); + ret = pos == stream->pos ? -1 : + (ssize_t)(pos - stream->pos); + + stream->buffer = buf; + stream->pos = pos; + + rstream->eom = TRUE; + rstream->next_eof = TRUE; + rstream->mail_size = stream->istream.v_offset + pos - rstream->hdr_offset; - return -1; + return ret < 0 ? _read(stream) : ret; } if (stream->istream.v_offset == rstream->from_offset) { + /* beginning of message, we haven't yet read our From-line */ if (mbox_read_from_line(rstream) < 0) { stream->pos = 0; stream->istream.eof = TRUE; + rstream->corrupted = TRUE; return -1; } - return _read(stream); - } - i = 0; - - if (pos >= 31) { - if (memcmp(buf, "\nFrom ", 6) == 0) { - if (mbox_from_parse(buf+6, pos-6, - &received_time, &sender) == 0) { - rstream->next_received_time = received_time; - rstream->mail_size = stream->istream.v_offset - - rstream->hdr_offset; - - i_free(rstream->next_sender); - rstream->next_sender = sender; - i_assert(stream->pos == 0); - return -1; - } - - /* we don't want to get stuck at invalid From-line */ - i += 6; - } - } else if (ret == -1) { - /* last few bytes, can't contain From-line */ - if (buf[pos-1] == '\n') { - /* last LF doesn't belong to last message */ - pos--; - } - - if (rstream->body_offset == (uoff_t)-1) { - /* find body_offset */ - for (; i < pos; i++) { - if (buf[i] == '\n' && i > 0 && - buf[i-1] == '\n') { - rstream->body_offset = - stream->istream.v_offset + - i + 1; - break; - } - } - } - - ret = pos <= stream->pos ? -1 : - (ssize_t) (pos - stream->pos); - - rstream->mail_size = stream->istream.v_offset + pos - - rstream->hdr_offset; - - stream->buffer = buf; - stream->pos = pos; - stream->istream.eof = ret == -1; - return ret; + /* got it. we don't want to return it however, + so start again from headers */ + buf = i_stream_get_data(rstream->input, &pos); + if (pos == 0) + return _read(stream); } /* See if we have From-line here - note that it works right only because all characters are different in mbox_from. */ + fromp = mbox_from; from_start_pos = 0; eoh_char = rstream->body_offset == (uoff_t)-1 ? '\n' : '\0'; - for (fromp = mbox_from; i < pos; i++) { + for (i = 0; i < pos; i++) { if (buf[i] == eoh_char && i > 0 && buf[i-1] == '\n') { rstream->body_offset = stream->istream.v_offset + i + 1; eoh_char = '\0'; } if (buf[i] == *fromp) { if (*++fromp == '\0') { - /* potential From-line - stop here */ + /* potential From-line, see if we have the + rest of the line buffered. + FIXME: if From-line is longer than input + buffer, we break. probably irrelevant.. */ i++; - break; + from_start_pos = i; + fromp = mbox_from; + } else if (from_start_pos != 0) { + /* we have the whole From-line here now. + See if it's a valid one. */ + if (mbox_from_parse(buf + from_start_pos, + pos - from_start_pos, + &received_time, + &sender) == 0) { + /* yep, we stop here. */ + rstream->next_received_time = + received_time; + i_free(rstream->next_sender); + rstream->next_sender = sender; + rstream->eom = TRUE; + + /* rewind "\nFrom " */ + from_start_pos -= 6; + + rstream->mail_size = + stream->istream.v_offset + + from_start_pos - + rstream->hdr_offset; + break; + } + from_start_pos = 0; } } else { fromp = mbox_from; @@ -208,27 +225,36 @@ fromp++; } } - new_pos = i - (fromp - mbox_from); + + /* we want to go at least one byte further next time */ + rstream->input_peak_offset = stream->istream.v_offset + i; - ret = new_pos <= stream->pos ? -1 : - (ssize_t) (pos - stream->pos); + if (from_start_pos != 0) { + /* we're waiting for the \n at the end of From-line */ + new_pos = from_start_pos; + } else { + /* leave out the beginnings of potential From-line */ + new_pos = i - (fromp - mbox_from); + } + i_assert(new_pos > stream->pos); + ret = new_pos - stream->pos; + stream->buffer = buf; stream->pos = new_pos; - - if (i < pos) { - /* beginning from From-line, try again - FIXME: loops forever if we don't skip forward */ - ret = 0; - } - return ret; } static void _seek(struct _istream *stream, uoff_t v_offset) { + struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream; + stream->istream.v_offset = v_offset; stream->skip = stream->pos = 0; stream->buffer = NULL; + + rstream->input_peak_offset = 0; + rstream->eom = FALSE; + rstream->next_eof = FALSE; } struct istream *i_stream_create_raw_mbox(pool_t pool, struct istream *input) @@ -322,7 +348,7 @@ offset = stream->v_offset; i_stream_seek(stream, rstream->hdr_offset); while (rstream->body_offset == (uoff_t)-1) { - i_stream_get_data(rstream->input, &pos); + i_stream_get_data(stream, &pos); i_stream_skip(stream, pos); if (_read(&rstream->istream) < 0) @@ -409,22 +435,33 @@ if (stream->v_offset != rstream->from_offset) i_stream_seek(stream, rstream->from_offset); i_stream_seek(rstream->input, rstream->from_offset); + + rstream->input_peak_offset = 0; + rstream->eom = FALSE; + rstream->next_eof = FALSE; } -void istream_raw_mbox_seek(struct istream *stream, uoff_t offset) +int istream_raw_mbox_seek(struct istream *stream, uoff_t offset) { struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream->real_stream; + int check; + + rstream->corrupted = FALSE; + rstream->eom = FALSE; + rstream->next_eof = FALSE; + rstream->input_peak_offset = 0; if (rstream->mail_size != (uoff_t)-1 && rstream->hdr_offset + rstream->mail_size == offset) { istream_raw_mbox_next(stream, (uoff_t)-1); - return; + return 0; } if (offset == rstream->from_offset) { /* back to beginning of current message */ offset = rstream->hdr_offset; + check = offset == 0; } else { rstream->body_offset = (uoff_t)-1; rstream->mail_size = (uoff_t)-1; @@ -438,10 +475,15 @@ rstream->from_offset = offset; rstream->hdr_offset = offset; + check = TRUE; } i_stream_seek(stream, offset); i_stream_seek(rstream->input, offset); + + if (check) + (void)_read(&rstream->istream); + return rstream->corrupted ? -1 : 0; } void istream_raw_mbox_flush(struct istream *stream)
--- a/src/lib-storage/index/mbox/istream-raw-mbox.h Wed Jun 16 08:36:59 2004 +0300 +++ b/src/lib-storage/index/mbox/istream-raw-mbox.h Wed Jun 16 08:38:23 2004 +0300 @@ -29,8 +29,9 @@ void istream_raw_mbox_next(struct istream *stream, uoff_t body_size); /* Seek to message at given offset. offset must point to beginning of - "\nFrom ", or 0 for beginning of file. */ -void istream_raw_mbox_seek(struct istream *stream, uoff_t offset); + "\nFrom ", or 0 for beginning of file. Returns -1 if it offset doesn't + contain a valid From-line. */ +int istream_raw_mbox_seek(struct istream *stream, uoff_t offset); /* Flush all buffering. Call if you modify the mbox. */ void istream_raw_mbox_flush(struct istream *stream);
--- a/src/lib-storage/index/mbox/mbox-mail.c Wed Jun 16 08:36:59 2004 +0300 +++ b/src/lib-storage/index/mbox/mbox-mail.c Wed Jun 16 08:38:23 2004 +0300 @@ -16,6 +16,7 @@ { struct index_mailbox *ibox = mail->ibox; const void *data; + uint64_t offset; if (ibox->mbox_lock_type == F_UNLCK) { if (mbox_sync(ibox, FALSE, TRUE) < 0) @@ -34,7 +35,14 @@ return -1; } - istream_raw_mbox_seek(ibox->mbox_stream, *((const uint64_t *)data)); + offset = *((const uint64_t *)data); + if (istream_raw_mbox_seek(ibox->mbox_stream, offset) < 0) { + mail_storage_set_critical(ibox->box.storage, + "Cached message offset %s is invalid for mbox file %s", + dec2str(offset), ibox->path); + mail_index_mark_corrupted(ibox->index); + return -1; + } return 0; }
--- a/src/lib-storage/index/mbox/mbox-sync-rewrite.c Wed Jun 16 08:36:59 2004 +0300 +++ b/src/lib-storage/index/mbox/mbox-sync-rewrite.c Wed Jun 16 08:38:23 2004 +0300 @@ -54,6 +54,8 @@ const unsigned char *data; void *p; + i_assert(size < SSIZE_T_MAX); + /* Append at the end of X-Keywords header, or X-UID if it doesn't exist */ pos = ctx->hdr_pos[MBOX_HDR_X_KEYWORDS] != (size_t)-1 ? @@ -216,7 +218,7 @@ uint32_t old_prev_msg_uid; uoff_t offset; - i_stream_seek(sync_ctx->file_input, mails[idx].offset); + i_stream_seek(sync_ctx->input, mails[idx].offset); memset(&mail_ctx, 0, sizeof(mail_ctx)); mail_ctx.sync_ctx = sync_ctx; @@ -228,10 +230,10 @@ /* mbox_sync_parse_next_mail() checks that UIDs are growing, so we have to fool it. */ - old_prev_msg_uid = sync_ctx->prev_msg_uid; - sync_ctx->prev_msg_uid = mails[idx].uid-1; + old_prev_msg_uid = sync_ctx->prev_msg_uid; + sync_ctx->prev_msg_uid = mails[idx].uid-1; - mbox_sync_parse_next_mail(sync_ctx->file_input, &mail_ctx, TRUE); + mbox_sync_parse_next_mail(sync_ctx->input, &mail_ctx, TRUE); if (mails[idx].space != 0) mbox_sync_update_header_from(&mail_ctx, &mails[idx]); else { @@ -242,7 +244,7 @@ } i_assert(mail_ctx.mail.space == mails[idx].space); - sync_ctx->prev_msg_uid = old_prev_msg_uid; + sync_ctx->prev_msg_uid = old_prev_msg_uid; if (mail_ctx.mail.space <= 0) mbox_sync_headers_add_space(&mail_ctx, extra_per_mail); @@ -257,7 +259,7 @@ /* now we have to move it. first move the body of the message, then write the header and leave the extra space to beginning of headers. */ - offset = sync_ctx->file_input->v_offset; + offset = sync_ctx->input->v_offset; if (mbox_move(sync_ctx, offset + mails[idx+1].space, offset, *end_offset - offset - mails[idx+1].space) < 0) return -1; @@ -284,7 +286,8 @@ struct mbox_sync_mail_context mail_ctx; uint32_t old_prev_msg_uid; - i_stream_seek(sync_ctx->file_input, mails[idx].offset); + i_assert(start_offset < end_offset); + i_stream_seek(sync_ctx->input, mails[idx].offset); memset(&mail_ctx, 0, sizeof(mail_ctx)); mail_ctx.sync_ctx = sync_ctx; @@ -299,12 +302,12 @@ old_prev_msg_uid = sync_ctx->prev_msg_uid; sync_ctx->prev_msg_uid = mails[idx].uid-1; - mbox_sync_parse_next_mail(sync_ctx->file_input, &mail_ctx, TRUE); + mbox_sync_parse_next_mail(sync_ctx->input, &mail_ctx, TRUE); mbox_sync_update_header_from(&mail_ctx, &mails[idx]); sync_ctx->prev_msg_uid = old_prev_msg_uid; - mbox_sync_headers_add_space(&mail_ctx, end_offset - start_offset); + mbox_sync_headers_add_space(&mail_ctx,end_offset - start_offset); if (pwrite_full(sync_ctx->fd, str_data(mail_ctx.header), str_len(mail_ctx.header), start_offset) < 0) {
--- a/src/lib-storage/index/mbox/mbox-sync-update.c Wed Jun 16 08:36:59 2004 +0300 +++ b/src/lib-storage/index/mbox/mbox-sync-update.c Wed Jun 16 08:38:23 2004 +0300 @@ -105,6 +105,12 @@ old_hdr_size = ctx->body_offset - ctx->hdr_offset; new_hdr_size = str_len(ctx->header); + if (new_hdr_size > 0 && + str_data(ctx->header)[new_hdr_size-1] != '\n') { + /* broken header - doesn't end with \n. fix it. */ + str_append_c(ctx->header, '\n'); + } + if (ctx->mail.uid == ctx->sync_ctx->first_uid && ctx->hdr_pos[MBOX_HDR_X_IMAPBASE] == (size_t)-1) { if (ctx->sync_ctx->base_uid_validity == 0) {
--- a/src/lib-storage/index/mbox/mbox-sync.c Wed Jun 16 08:36:59 2004 +0300 +++ b/src/lib-storage/index/mbox/mbox-sync.c Wed Jun 16 08:38:23 2004 +0300 @@ -112,8 +112,13 @@ /* same as before. we'll have to fix mbox stream to contain correct from_offset, hdr_offset and body_offset. so, seek to from_offset and read through the header. */ - istream_raw_mbox_seek(sync_ctx->input, old_from_offset); - (void)istream_raw_mbox_get_body_offset(sync_ctx->input); + if (istream_raw_mbox_seek(sync_ctx->input, old_from_offset) < 0) { + mail_storage_set_critical(ibox->box.storage, + "Message offset %s changed unexpectedly for mbox file " + "%s", dec2str(old_from_offset), sync_ctx->ibox->path); + return 0; + } + (void)istream_raw_mbox_get_body_offset(sync_ctx->input); i_stream_seek(sync_ctx->input, old_offset); return 1; } @@ -299,8 +304,8 @@ if (rec != NULL && rec->uid != uid) { /* new UID in the middle of the mailbox - shouldn't happen */ mail_storage_set_critical(sync_ctx->ibox->box.storage, - "mbox sync: UID inserted in the middle of mailbox " - "(%u > %u)", rec->uid, uid); + "mbox sync: UID inserted in the middle of mailbox %s " + "(%u > %u)", sync_ctx->ibox->path, rec->uid, uid); mail_index_mark_corrupted(sync_ctx->ibox->index); return -1; } @@ -620,7 +625,13 @@ /* set to -1, since they're always increased later */ sync_ctx->seq = sync_ctx->idx_seq = seq-1; - istream_raw_mbox_seek(sync_ctx->input, offset); + if (istream_raw_mbox_seek(sync_ctx->input, offset) < 0) { + mail_storage_set_critical(sync_ctx->ibox->box.storage, + "Cached message offset %s is invalid for mbox file %s", + dec2str(offset), sync_ctx->ibox->path); + mail_index_mark_corrupted(sync_ctx->ibox->index); + return -1; + } (void)istream_raw_mbox_get_body_offset(sync_ctx->input); return 0; } @@ -634,9 +645,14 @@ uoff_t offset; int ret, expunged; - if (min_message_count != 0) - istream_raw_mbox_seek(sync_ctx->input, 0); - else { + if (min_message_count != 0) { + if (istream_raw_mbox_seek(sync_ctx->input, 0) < 0) { + /* doesn't begin with a From-line */ + mail_storage_set_error(sync_ctx->ibox->box.storage, + "Mailbox isn't a valid mbox file"); + return -1; + } + } else { /* we sync only what we need to. jump to first record that needs updating */ if (sync_ctx->sync_rec.uid1 == 0) {