Mercurial > dovecot > original-hg > dovecot-1.2
changeset 7479:7871b6219480 HEAD
mbox: Don't stop at From_-lines in the message bodies. Use Content-Length:
header to figure out if it belongs to a message body or not.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sun, 04 May 2008 16:57:58 +0300 |
parents | 0eb6a0c01001 |
children | ad0f32abda6d |
files | src/lib-storage/index/mbox/istream-raw-mbox.c src/lib-storage/index/mbox/istream-raw-mbox.h src/lib-storage/index/mbox/mbox-mail.c |
diffstat | 3 files changed, 133 insertions(+), 67 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-storage/index/mbox/istream-raw-mbox.c Sun May 04 14:35:51 2008 +0300 +++ b/src/lib-storage/index/mbox/istream-raw-mbox.c Sun May 04 16:57:58 2008 +0300 @@ -238,15 +238,19 @@ FIXME: if From-line is longer than input buffer, we break. probably irrelevant.. */ i++; - from_after_pos = i; - from_start_pos = i - 6; - if (from_start_pos > 0 && - buf[from_start_pos-1] == '\r') { - /* CR also belongs to it. */ - crlf_ending = TRUE; - from_start_pos--; - } else { - crlf_ending = FALSE; + if (rstream->hdr_offset + rstream->mail_size == + stream->istream.v_offset + i - 6 || + rstream->mail_size == (uoff_t)-1) { + from_after_pos = i; + from_start_pos = i - 6; + if (from_start_pos > 0 && + buf[from_start_pos-1] == '\r') { + /* CR also belongs to it. */ + crlf_ending = TRUE; + from_start_pos--; + } else { + crlf_ending = FALSE; + } } fromp = mbox_from; } else if (from_start_pos != (size_t)-1) { @@ -290,6 +294,17 @@ new_pos--; } + if (stream->istream.v_offset - + rstream->hdr_offset + new_pos > rstream->mail_size) { + /* istream_raw_mbox_set_next_offset() used invalid + cached next_offset? */ + i_error("Unexpectedly lost From-line at %"PRIuUOFF_T, + rstream->hdr_offset + rstream->mail_size); + rstream->eof = TRUE; + rstream->corrupted = TRUE; + return -1; + } + stream->buffer = buf; if (new_pos == stream->pos) { if (stream->istream.eof || ret > 0) @@ -379,8 +394,7 @@ char *sender; /* minimal: "From x Thu Nov 29 22:33:52 2001" = 31 chars */ - if (i_stream_read_data(rstream->istream.parent, &data, &size, 30) == -1) - return -1; + (void)i_stream_read_data(rstream->istream.parent, &data, &size, 30); if ((size == 1 && data[0] == '\n') || (size == 2 && data[0] == '\r' && data[1] == '\n')) { @@ -469,33 +483,41 @@ return rstream->body_offset; } -uoff_t istream_raw_mbox_get_body_size(struct istream *stream, uoff_t body_size) +uoff_t istream_raw_mbox_get_body_size(struct istream *stream, + uoff_t expected_body_size) { struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream->real_stream; const unsigned char *data; size_t size; - uoff_t old_offset; + uoff_t old_offset, body_size; i_assert(rstream->hdr_offset != (uoff_t)-1); i_assert(rstream->body_offset != (uoff_t)-1); - if (rstream->mail_size != (uoff_t)-1) { - return rstream->mail_size - - (rstream->body_offset - rstream->hdr_offset); - } - + body_size = rstream->mail_size == (uoff_t)-1 ? (uoff_t)-1 : + rstream->mail_size - (rstream->body_offset - + rstream->hdr_offset); old_offset = stream->v_offset; - if (body_size != (uoff_t)-1) { + if (expected_body_size != (uoff_t)-1) { + /* if we already have the existing body size, use it as long as + it's >= expected body_size. otherwise the previous parsing + may have stopped at a From_-line that belongs to the body. */ + if (body_size != (uoff_t)-1 && body_size >= expected_body_size) + return body_size; + i_stream_seek(rstream->istream.parent, - rstream->body_offset + body_size); + rstream->body_offset + expected_body_size); if (istream_raw_mbox_is_valid_from(rstream) > 0) { - rstream->mail_size = body_size + + rstream->mail_size = expected_body_size + (rstream->body_offset - rstream->hdr_offset); i_stream_seek(stream, old_offset); - return body_size; + return expected_body_size; } + /* invalid expected_body_size */ } + if (body_size != (uoff_t)-1) + return body_size; /* have to read through the message body */ while (i_stream_read_data(stream, &data, &size, 0) > 0) @@ -535,12 +557,13 @@ return rstream->crlf_ending; } -void istream_raw_mbox_next(struct istream *stream, uoff_t body_size) +void istream_raw_mbox_next(struct istream *stream, uoff_t expected_body_size) { struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream->real_stream; + uoff_t body_size; - body_size = istream_raw_mbox_get_body_size(stream, body_size); + body_size = istream_raw_mbox_get_body_size(stream, expected_body_size); rstream->mail_size = (uoff_t)-1; rstream->received_time = rstream->next_received_time; @@ -606,6 +629,14 @@ return rstream->corrupted ? -1 : 0; } +void istream_raw_mbox_set_next_offset(struct istream *stream, uoff_t offset) +{ + struct raw_mbox_istream *rstream = + (struct raw_mbox_istream *)stream->real_stream; + + rstream->mail_size = offset - rstream->hdr_offset; +} + bool istream_raw_mbox_is_eof(struct istream *stream) { struct raw_mbox_istream *rstream =
--- a/src/lib-storage/index/mbox/istream-raw-mbox.h Sun May 04 14:35:51 2008 +0300 +++ b/src/lib-storage/index/mbox/istream-raw-mbox.h Sun May 04 16:57:58 2008 +0300 @@ -12,10 +12,11 @@ /* Return offset to beginning of the body. */ uoff_t istream_raw_mbox_get_body_offset(struct istream *stream); -/* Return the number of bytes in the body of this message. If body_size isn't - (uoff_t)-1, we'll use it as potentially valid body size to avoid actually - reading through the whole message. */ -uoff_t istream_raw_mbox_get_body_size(struct istream *stream, uoff_t body_size); +/* Return the number of bytes in the body of this message. If + expected_body_size isn't (uoff_t)-1, we'll use it as potentially valid body + size to avoid actually reading through the whole message. */ +uoff_t istream_raw_mbox_get_body_size(struct istream *stream, + uoff_t expected_body_size); /* Return received time of current message, or (time_t)-1 if the timestamp is broken. */ @@ -26,14 +27,18 @@ /* Return TRUE if the empty line between this and the next mail contains CR. */ bool istream_raw_mbox_has_crlf_ending(struct istream *stream); -/* Jump to next message. If body_size isn't (uoff_t)-1, we'll use it as - potentially valid body size. */ -void istream_raw_mbox_next(struct istream *stream, uoff_t body_size); +/* Jump to next message. If expected_body_size isn't (uoff_t)-1, we'll use it + as potentially valid body size. */ +void istream_raw_mbox_next(struct istream *stream, uoff_t expected_body_size); /* Seek to message at given offset. offset must point to beginning of "\nFrom ", or 0 for beginning of file. Returns -1 if it offset doesn't contain a valid From-line. */ int istream_raw_mbox_seek(struct istream *stream, uoff_t offset); +/* Set next message's start offset. If this isn't set, read stops at the next + valid From_-line, even if it belongs to the current message's body + (Content-Length: header can be used to determine that). */ +void istream_raw_mbox_set_next_offset(struct istream *stream, uoff_t offset); /* Returns TRUE if we've read the whole mbox. */ bool istream_raw_mbox_is_eof(struct istream *stream);
--- a/src/lib-storage/index/mbox/mbox-mail.c Sun May 04 14:35:51 2008 +0300 +++ b/src/lib-storage/index/mbox/mbox-mail.c Sun May 04 16:57:58 2008 +0300 @@ -166,12 +166,37 @@ return index_mail_get_special(_mail, field, value_r); } +static bool +mbox_mail_get_next_offset(struct index_mail *mail, uoff_t *next_offset_r) +{ + struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox; + struct mail *_mail = &mail->mail.mail; + const struct mail_index_header *hdr; + + hdr = mail_index_get_header(mail->trans->trans_view); + if (_mail->seq >= hdr->messages_count) { + if (_mail->seq != hdr->messages_count) { + /* we're appending a new message */ + return FALSE; + } + + /* last message, use the synced mbox size */ + int trailer_size; + + trailer_size = (mbox->storage->storage.flags & + MAIL_STORAGE_FLAG_SAVE_CRLF) != 0 ? 2 : 1; + *next_offset_r = hdr->sync_size - trailer_size; + return TRUE; + } + return mbox_file_lookup_offset(mbox, mail->trans->trans_view, + _mail->seq + 1, next_offset_r); +} + static int mbox_mail_get_physical_size(struct mail *_mail, uoff_t *size_r) { struct index_mail *mail = (struct index_mail *)_mail; struct index_mail_data *data = &mail->data; struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox; - const struct mail_index_header *hdr; struct istream *input; struct message_size hdr_size; uoff_t old_offset, body_offset, body_size, next_offset; @@ -194,26 +219,10 @@ /* use the next message's offset to avoid reading through the entire message body to find out its size */ - hdr = mail_index_get_header(mail->trans->trans_view); - if (_mail->seq >= hdr->messages_count) { - if (_mail->seq == hdr->messages_count) { - /* last message, use the synced mbox size */ - int trailer_size; - - trailer_size = (mbox->storage->storage.flags & - MAIL_STORAGE_FLAG_SAVE_CRLF) != 0 ? - 2 : 1; - body_size = hdr->sync_size - body_offset - trailer_size; - } else { - /* we're appending a new message */ - body_size = (uoff_t)-1; - } - } else if (mbox_file_lookup_offset(mbox, mail->trans->trans_view, - _mail->seq + 1, &next_offset) > 0) { + if (mbox_mail_get_next_offset(mail, &next_offset)) body_size = next_offset - body_offset; - } else { + else body_size = (uoff_t)-1; - } /* verify that the calculated body size is correct */ body_size = istream_raw_mbox_get_body_size(mbox->mbox_stream, @@ -226,31 +235,52 @@ return 0; } +static int mbox_mail_init_stream(struct index_mail *mail) +{ + struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox; + struct istream *raw_stream; + uoff_t hdr_offset, next_offset; + + if (mbox_mail_seek(mail) < 0) + return -1; + + if (!mbox_mail_get_next_offset(mail, &next_offset)) { + if (mbox_mail_seek(mail) < 0) + return -1; + if (!mbox_mail_get_next_offset(mail, &next_offset)) { + i_warning("mbox %s: Can't find next message offset", + mbox->path); + next_offset = (uoff_t)-1; + } + } + + raw_stream = mbox->mbox_stream; + hdr_offset = istream_raw_mbox_get_header_offset(raw_stream); + i_stream_seek(raw_stream, hdr_offset); + + if (next_offset != (uoff_t)-1) + istream_raw_mbox_set_next_offset(raw_stream, next_offset); + + raw_stream = i_stream_create_limit(raw_stream, (uoff_t)-1); + mail->data.stream = + i_stream_create_header_filter(raw_stream, + HEADER_FILTER_EXCLUDE | HEADER_FILTER_NO_CR, + mbox_hide_headers, mbox_hide_headers_count, + null_header_filter_callback, NULL); + i_stream_unref(&raw_stream); + return 0; +} + static int mbox_mail_get_stream(struct mail *_mail, struct message_size *hdr_size, struct message_size *body_size, struct istream **stream_r) { struct index_mail *mail = (struct index_mail *)_mail; - struct index_mail_data *data = &mail->data; - struct mbox_mailbox *mbox = (struct mbox_mailbox *)mail->ibox; - struct istream *raw_stream; - uoff_t offset; - if (data->stream == NULL) { - if (mbox_mail_seek(mail) < 0) + if (mail->data.stream == NULL) { + if (mbox_mail_init_stream(mail) < 0) return -1; - - raw_stream = mbox->mbox_stream; - offset = istream_raw_mbox_get_header_offset(raw_stream); - i_stream_seek(raw_stream, offset); - raw_stream = i_stream_create_limit(raw_stream, (uoff_t)-1); - data->stream = - i_stream_create_header_filter(raw_stream, - HEADER_FILTER_EXCLUDE | HEADER_FILTER_NO_CR, - mbox_hide_headers, mbox_hide_headers_count, - null_header_filter_callback, NULL); - i_stream_unref(&raw_stream); } return index_mail_init_stream(mail, hdr_size, body_size, stream_r);