view src/lib-storage/index/mbox/istream-raw-mbox.c @ 6429:65c69a53a7be HEAD

Replaced my Copyright notices. The year range always ends with 2007 now. My name was replaced with "Dovecot authors". In many cases I didn't really even own the copyright, so this is more correct.
author Timo Sirainen <tss@iki.fi>
date Sun, 16 Sep 2007 14:34:22 +0300
parents a8b515e1a26f
children db6afac39fde
line wrap: on
line source

/* Copyright (c) 2003-2007 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "buffer.h"
#include "istream-internal.h"
#include "istream-raw-mbox.h"
#include "mbox-from.h"

struct raw_mbox_istream {
	struct istream_private istream;

	time_t received_time, next_received_time;
	char *sender, *next_sender;

	uoff_t from_offset, hdr_offset, body_offset, mail_size;
	struct istream *input;
	uoff_t input_peak_offset;

	unsigned int one_mail_only:1;
	unsigned int crlf_ending:1;
	unsigned int corrupted:1;
	unsigned int eof:1;
};

static void i_stream_raw_mbox_destroy(struct iostream_private *stream)
{
	struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;

	i_free(rstream->sender);
	i_free(rstream->next_sender);

	i_stream_seek(rstream->input, rstream->istream.istream.v_offset);
	i_stream_unref(&rstream->input);
}

static void
i_stream_raw_mbox_set_max_buffer_size(struct iostream_private *stream,
				      size_t max_size)
{
	struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;

	i_stream_set_max_buffer_size(rstream->input, max_size);
}

static int mbox_read_from_line(struct raw_mbox_istream *rstream)
{
	const unsigned char *buf, *p;
	char *sender;
	time_t received_time;
	size_t pos, line_pos;
	int skip;

	buf = i_stream_get_data(rstream->input, &pos);
	i_assert(pos > 0);

	/* from_offset points to "\nFrom ", so unless we're at the beginning
	   of the file, skip the initial \n */
	skip = rstream->from_offset != 0;
	if (skip && *buf == '\r')
		skip++;

	while ((p = memchr(buf+skip, '\n', pos-skip)) == NULL) {
		if (i_stream_read(rstream->input) < 0) {
			/* EOF - shouldn't happen */
			return -1;
		}
		buf = i_stream_get_data(rstream->input, &pos);
		i_assert(pos > 0);
	}
	line_pos = (size_t)(p - buf);

	if (rstream->from_offset != 0) {
		buf += skip;
		pos -= skip;
	}

	/* beginning of mbox */
	if (memcmp(buf, "From ", 5) != 0 ||
	    mbox_from_parse(buf+5, pos-5, &received_time, &sender) < 0) {
		/* broken From - should happen only at beginning of
		   file if this isn't a mbox.. */
		return -1;
	}

	if (rstream->istream.istream.v_offset == rstream->from_offset) {
		rstream->received_time = received_time;
		i_free(rstream->sender);
		rstream->sender = sender;
	} else {
		rstream->next_received_time = received_time;
		i_free(rstream->next_sender);
		rstream->next_sender = sender;
	}

	/* we'll skip over From-line */
	rstream->istream.istream.v_offset += line_pos+1;
	i_stream_skip(rstream->input, line_pos+1);
	rstream->hdr_offset = rstream->istream.istream.v_offset;
	return 0;
}

static void handle_end_of_mail(struct raw_mbox_istream *rstream, size_t pos)
{
	rstream->mail_size = rstream->istream.istream.v_offset + pos -
		rstream->hdr_offset;

	if (rstream->hdr_offset + rstream->mail_size < rstream->body_offset) {
		/* a) Header didn't have ending \n
		   b) "headers\n\nFrom ..", the second \n belongs to next
		   message which we didn't know at the time yet.

		   The +2 check is for CR+LF linefeeds */
		uoff_t new_body_offset =
			rstream->hdr_offset + rstream->mail_size;
		i_assert(rstream->body_offset == (uoff_t)-1 ||
			 rstream->body_offset == new_body_offset + 1 ||
			 rstream->body_offset == new_body_offset + 2);
		rstream->body_offset = new_body_offset;
	}
}

static ssize_t i_stream_raw_mbox_read(struct istream_private *stream)
{
	static const char *mbox_from = "\nFrom ";
	struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
	const unsigned char *buf;
	const char *fromp;
	char *sender;
	time_t received_time;
	size_t i, pos, new_pos, from_start_pos, from_after_pos;
	ssize_t ret = 0;
	int eoh_char;
	bool crlf_ending = FALSE;

	i_assert(stream->istream.v_offset >= rstream->from_offset);

	if (stream->istream.eof)
		return -1;

	i_stream_seek(rstream->input, stream->istream.v_offset);

	stream->pos -= stream->skip;
	stream->skip = 0;
	stream->buffer = NULL;

	ret = 0;
	do {
		buf = i_stream_get_data(rstream->input, &pos);
		if (pos > 1 && stream->istream.v_offset + pos >
		    rstream->input_peak_offset) {
			/* fake our read count. needed because if in the end
			   we have only one character in buffer and we skip it
			   (as potential CR), we want to get back to this
			   i_stream_raw_mbox_read() to read more data. */
			ret = pos;
			break;
		}
		ret = i_stream_read(rstream->input);
	} while (ret > 0);
	stream->istream.stream_errno = rstream->input->stream_errno;

	if (ret < 0) {
		if (ret == -2) {
			if (stream->istream.v_offset + pos ==
			    rstream->input_peak_offset) {
				stream->buffer = buf;
				return -2;
			}
		} else if (stream->istream.v_offset != 0 || pos == 0) {
			/* we've read the whole file, final byte should be
			   the \n trailer */
			if (pos > 0 && buf[pos-1] == '\n') {
				pos--;
				if (pos > 0 && buf[pos-1] == '\r') {
					crlf_ending = TRUE;
					pos--;
				}
			}

			i_assert(pos >= stream->pos);
			ret = pos == stream->pos ? -1 :
				(ssize_t)(pos - stream->pos);

			stream->buffer = buf;
			stream->pos = pos;

			if (stream->istream.v_offset == rstream->from_offset) {
				/* haven't seen From-line yet, so this mbox
				   stream is now at EOF */
				rstream->eof = TRUE;
			}
			stream->istream.eof = TRUE;
			rstream->crlf_ending = crlf_ending;
			handle_end_of_mail(rstream, pos);
			return ret < 0 ? i_stream_raw_mbox_read(stream) : ret;
		}
	}

	if (stream->istream.v_offset == rstream->from_offset) {
		/* beginning of message, we haven't yet read our From-line */
		if (pos == 2 && ret > 0) {
			/* we're at the end of file with CR+LF linefeeds?
			   need more data to verify it. */
			rstream->input_peak_offset =
				stream->istream.v_offset + pos;
			return i_stream_raw_mbox_read(stream);
		}
		if (mbox_read_from_line(rstream) < 0) {
			stream->pos = 0;
			rstream->eof = TRUE;
			rstream->corrupted = TRUE;
			return -1;
		}

		/* got it. we don't want to return it however,
		   so start again from headers */
		buf = i_stream_get_data(rstream->input, &pos);
		if (pos == 0)
			return i_stream_raw_mbox_read(stream);
	}

	/* See if we have From-line here - note that it works right only
	   because all characters are different in mbox_from. */
        fromp = mbox_from; from_start_pos = from_after_pos = (size_t)-1;
	eoh_char = rstream->body_offset == (uoff_t)-1 ? '\n' : -1;
	for (i = stream->pos; i < pos; i++) {
		if (buf[i] == eoh_char &&
		    ((i > 0 && buf[i-1] == '\n') ||
                     (i > 1 && buf[i-1] == '\r' && buf[i-2] == '\n') ||
		     stream->istream.v_offset + i == rstream->hdr_offset)) {
			rstream->body_offset = stream->istream.v_offset + i + 1;
			eoh_char = -1;
		}
		if (buf[i] == *fromp) {
			if (*++fromp == '\0') {
				/* potential From-line, see if we have the
				   rest of the line buffered.
				   FIXME: if From-line is longer than input
				   buffer, we break. probably irrelevant.. */
				if (!rstream->one_mail_only) {
					i++;
					from_after_pos = i;
					from_start_pos = i - 6;
					if (from_start_pos > 0 &&
					    buf[from_start_pos-1] == '\r') {
						/* CR also belongs to it. */
						crlf_ending = TRUE;
						from_start_pos--;
					} else {
						crlf_ending = FALSE;
					}
				}
				fromp = mbox_from;
			} else if (from_start_pos != (size_t)-1) {
				/* we have the whole From-line here now.
				   See if it's a valid one. */
				if (mbox_from_parse(buf + from_after_pos,
						    pos - from_after_pos,
						    &received_time,
						    &sender) == 0) {
					/* yep, we stop here. */
					rstream->next_received_time =
						received_time;
					i_free(rstream->next_sender);
					rstream->next_sender = sender;
					stream->istream.eof = TRUE;

					rstream->crlf_ending = crlf_ending;
					handle_end_of_mail(rstream,
							   from_start_pos);
					break;
				}
				from_start_pos = (size_t)-1;
			}
		} else {
			fromp = mbox_from;
			if (buf[i] == *fromp)
				fromp++;
		}
	}

	/* we want to go at least one byte further next time */
	rstream->input_peak_offset = stream->istream.v_offset + i;

	if (from_start_pos != (size_t)-1) {
		/* we're waiting for the \n at the end of From-line */
		new_pos = from_start_pos;
	} else {
		/* leave out the beginnings of potential From-line + CR */
		new_pos = i - (fromp - mbox_from) - 1;
	}

	stream->buffer = buf;
	if (new_pos == stream->pos) {
		if (stream->istream.eof || ret > 0)
			return i_stream_raw_mbox_read(stream);
		i_assert(new_pos > 0);
		ret = -2;
	} else {
		i_assert(new_pos > stream->pos);
		ret = new_pos - stream->pos;
		stream->pos = new_pos;
	}
	return ret;
}

static void i_stream_raw_mbox_seek(struct istream_private *stream,
				   uoff_t v_offset, bool mark ATTR_UNUSED)
{
	struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;

	stream->istream.v_offset = v_offset;
	stream->skip = stream->pos = 0;
	stream->buffer = NULL;

        rstream->input_peak_offset = 0;
	rstream->eof = FALSE;
}

static void i_stream_raw_mbox_sync(struct istream_private *stream)
{
	struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;

	i_stream_sync(rstream->input);

	rstream->istream.skip = 0;
	rstream->istream.pos = 0;
}

static const struct stat *
i_stream_raw_mbox_stat(struct istream_private *stream, bool exact)
{
	struct raw_mbox_istream *rstream = (struct raw_mbox_istream *)stream;
	const struct stat *st;

	st = i_stream_stat(rstream->input, exact);
	if (st == NULL)
		return NULL;

	stream->statbuf = *st;
	stream->statbuf.st_size = -1;
	return &stream->statbuf;
}

struct istream *i_stream_create_raw_mbox(struct istream *input,
					 bool kludge_one_mail_only)
{
	struct raw_mbox_istream *rstream;

	i_stream_ref(input);

	rstream = i_new(struct raw_mbox_istream, 1);

	rstream->one_mail_only = kludge_one_mail_only;
	rstream->input = input;
	rstream->body_offset = (uoff_t)-1;
	rstream->mail_size = (uoff_t)-1;
	rstream->received_time = (time_t)-1;
	rstream->next_received_time = (time_t)-1;

	rstream->istream.iostream.destroy = i_stream_raw_mbox_destroy;
	rstream->istream.iostream.set_max_buffer_size =
		i_stream_raw_mbox_set_max_buffer_size;

	rstream->istream.read = i_stream_raw_mbox_read;
	rstream->istream.seek = i_stream_raw_mbox_seek;
	rstream->istream.sync = i_stream_raw_mbox_sync;
	rstream->istream.stat = i_stream_raw_mbox_stat;

	rstream->istream.istream.blocking = input->blocking;
	rstream->istream.istream.seekable = input->seekable;
	return i_stream_create(&rstream->istream, -1,
			       input->real_stream->abs_start_offset);
}

static int istream_raw_mbox_is_valid_from(struct raw_mbox_istream *rstream)
{
	const unsigned char *data;
	size_t size;
	time_t received_time;
	char *sender;

	/* minimal: "From x Thu Nov 29 22:33:52 2001" = 31 chars */
	if (i_stream_read_data(rstream->input, &data, &size, 30) == -1)
		return -1;

	if ((size == 1 && data[0] == '\n') ||
	    (size == 2 && data[0] == '\r' && data[1] == '\n')) {
		/* EOF */
		return 1;
	}

	if (size > 31 && memcmp(data, "\nFrom ", 6) == 0) {
		data += 6;
		size -= 6;
	} else if (size > 32 && memcmp(data, "\r\nFrom ", 7) == 0) {
		data += 7;
		size -= 7;
	} else {
		return 0;
	}

	while (memchr(data, '\n', size) == NULL) {
		if (i_stream_read_data(rstream->input, &data, &size, size) < 0)
			break;
	}

	if (mbox_from_parse(data, size, &received_time, &sender) < 0)
		return 0;

	rstream->next_received_time = received_time;
	i_free(rstream->next_sender);
	rstream->next_sender = sender;
	return 1;
}

uoff_t istream_raw_mbox_get_start_offset(struct istream *stream)
{
	struct raw_mbox_istream *rstream =
		(struct raw_mbox_istream *)stream->real_stream;

	return rstream->from_offset;
}

uoff_t istream_raw_mbox_get_header_offset(struct istream *stream)
{
	struct raw_mbox_istream *rstream =
		(struct raw_mbox_istream *)stream->real_stream;

	if (rstream->hdr_offset == rstream->from_offset)
		(void)i_stream_raw_mbox_read(&rstream->istream);

	if (rstream->corrupted) {
		i_error("Unexpectedly lost From-line at "
			"%"PRIuUOFF_T, rstream->from_offset);
		return (uoff_t)-1;
	}

	return rstream->hdr_offset;
}

uoff_t istream_raw_mbox_get_body_offset(struct istream *stream)
{
	struct raw_mbox_istream *rstream =
		(struct raw_mbox_istream *)stream->real_stream;
	uoff_t offset;
	size_t pos;

	if (rstream->body_offset != (uoff_t)-1)
		return rstream->body_offset;

	offset = stream->v_offset;
	i_stream_seek(stream, rstream->hdr_offset);
	while (rstream->body_offset == (uoff_t)-1) {
		i_stream_get_data(stream, &pos);
		i_stream_skip(stream, pos);

		if (i_stream_raw_mbox_read(&rstream->istream) < 0) {
			if (rstream->corrupted) {
				i_error("Unexpectedly lost From-line at "
					"%"PRIuUOFF_T, rstream->from_offset);
			} else {
				i_assert(rstream->body_offset != (uoff_t)-1);
			}
			break;
		}
	}

	i_stream_seek(stream, offset);
	return rstream->body_offset;
}

uoff_t istream_raw_mbox_get_body_size(struct istream *stream, uoff_t body_size)
{
	struct raw_mbox_istream *rstream =
		(struct raw_mbox_istream *)stream->real_stream;
	const unsigned char *data;
	size_t size;

	i_assert(rstream->hdr_offset != (uoff_t)-1);
	i_assert(rstream->body_offset != (uoff_t)-1);

	if (rstream->mail_size != (uoff_t)-1) {
		return rstream->mail_size -
			(rstream->body_offset - rstream->hdr_offset);
	}

	if (body_size != (uoff_t)-1) {
		i_stream_seek(rstream->input, rstream->body_offset + body_size);
		if (istream_raw_mbox_is_valid_from(rstream) > 0) {
			rstream->mail_size = body_size +
				(rstream->body_offset - rstream->hdr_offset);
			return body_size;
		}
	}

	/* have to read through the message body */
	while (i_stream_read_data(stream, &data, &size, 0) > 0)
		i_stream_skip(stream, size);

	i_assert(rstream->mail_size != (uoff_t)-1);
	return rstream->mail_size -
		(rstream->body_offset - rstream->hdr_offset);
}

time_t istream_raw_mbox_get_received_time(struct istream *stream)
{
	struct raw_mbox_istream *rstream =
		(struct raw_mbox_istream *)stream->real_stream;

	if (rstream->received_time == (time_t)-1)
		(void)i_stream_raw_mbox_read(&rstream->istream);
	return rstream->received_time;
}

const char *istream_raw_mbox_get_sender(struct istream *stream)
{
	struct raw_mbox_istream *rstream =
		(struct raw_mbox_istream *)stream->real_stream;

	if (rstream->sender == NULL)
		(void)i_stream_raw_mbox_read(&rstream->istream);
	return rstream->sender == NULL ? "" : rstream->sender;
}

bool istream_raw_mbox_has_crlf_ending(struct istream *stream)
{
	struct raw_mbox_istream *rstream =
		(struct raw_mbox_istream *)stream->real_stream;

	return rstream->crlf_ending;
}

void istream_raw_mbox_next(struct istream *stream, uoff_t body_size)
{
	struct raw_mbox_istream *rstream =
		(struct raw_mbox_istream *)stream->real_stream;

	body_size = istream_raw_mbox_get_body_size(stream, body_size);
	rstream->mail_size = (uoff_t)-1;

	rstream->received_time = rstream->next_received_time;
	rstream->next_received_time = (time_t)-1;

	i_free(rstream->sender);
	rstream->sender = rstream->next_sender;
	rstream->next_sender = NULL;

	rstream->from_offset = rstream->body_offset + body_size;
	rstream->hdr_offset = rstream->from_offset;
	rstream->body_offset = (uoff_t)-1;

	if (stream->v_offset != rstream->from_offset)
		i_stream_seek_mark(stream, rstream->from_offset);
	i_stream_seek_mark(rstream->input, rstream->from_offset);

	rstream->eof = FALSE;
	rstream->istream.istream.eof = FALSE;
}

int istream_raw_mbox_seek(struct istream *stream, uoff_t offset)
{
	struct raw_mbox_istream *rstream =
		(struct raw_mbox_istream *)stream->real_stream;
	bool check;

	rstream->corrupted = FALSE;
	rstream->eof = FALSE;
	rstream->istream.istream.eof = FALSE;

	if (rstream->mail_size != (uoff_t)-1 &&
	    rstream->hdr_offset + rstream->mail_size == offset) {
		istream_raw_mbox_next(stream, (uoff_t)-1);
		return 0;
	}

	if (offset == rstream->from_offset) {
		/* back to beginning of current message */
		offset = rstream->hdr_offset;
		check = offset == 0;
	} else {
		rstream->body_offset = (uoff_t)-1;
		rstream->mail_size = (uoff_t)-1;
		rstream->received_time = (time_t)-1;
		rstream->next_received_time = (time_t)-1;

		i_free(rstream->sender);
		rstream->sender = NULL;
		i_free(rstream->next_sender);
		rstream->next_sender = NULL;

                rstream->from_offset = offset;
		rstream->hdr_offset = offset;
		check = TRUE;
	}

	i_stream_seek_mark(stream, offset);
	i_stream_seek_mark(rstream->input, offset);

	if (check)
		(void)i_stream_raw_mbox_read(&rstream->istream);
	return rstream->corrupted ? -1 : 0;
}

bool istream_raw_mbox_is_eof(struct istream *stream)
{
	struct raw_mbox_istream *rstream =
		(struct raw_mbox_istream *)stream->real_stream;

	return rstream->eof;
}