changeset 2429:92f92b3c447b HEAD

istream-header-filter now parses the headers incrementally while read()ing, instead of doing it at initialization and storing into one large buffer.
author Timo Sirainen <tss@iki.fi>
date Sun, 22 Aug 2004 05:58:39 +0300
parents abef2ac8843a
children 7c1dc4a7db3a
files src/imap/imap-fetch-body.c src/lib-mail/istream-header-filter.c src/lib-mail/istream-header-filter.h src/lib-storage/index/index-mail-headers.c src/lib-storage/index/mbox/mbox-mail.c
diffstat 5 files changed, 162 insertions(+), 95 deletions(-) [+]
line wrap: on
line diff
--- a/src/imap/imap-fetch-body.c	Thu Aug 19 06:56:01 2004 +0300
+++ b/src/imap/imap-fetch-body.c	Sun Aug 22 05:58:39 2004 +0300
@@ -333,28 +333,25 @@
 	const char *const *fields;
 	struct message_size msg_size;
 	struct istream *input;
-	size_t size, fields_count;
+	size_t fields_count;
 
 	/* MIME, HEADER.FIELDS (list), HEADER.FIELDS.NOT (list) */
 
 	if (strncmp(header_section, "HEADER.FIELDS ", 14) == 0) {
 		fields = imap_fetch_get_body_fields(header_section + 14,
 						    &fields_count);
-		input = i_stream_create_header_filter(ctx->client->cmd_pool,
-						      ctx->cur_input, FALSE,
+		input = i_stream_create_header_filter(ctx->cur_input, FALSE,
 						      fields, fields_count,
 						      NULL, NULL);
 	} else if (strncmp(header_section, "HEADER.FIELDS.NOT ", 18) == 0) {
 		fields = imap_fetch_get_body_fields(header_section + 18,
 						    &fields_count);
-		input = i_stream_create_header_filter(ctx->client->cmd_pool,
-						      ctx->cur_input, TRUE,
+		input = i_stream_create_header_filter(ctx->cur_input, TRUE,
 						      fields, fields_count,
 						      NULL, NULL);
 	} else if (strcmp(header_section, "MIME") == 0) {
 		/* Mime-Version + Content-* fields */
-		input = i_stream_create_header_filter(ctx->client->cmd_pool,
-						      ctx->cur_input, FALSE,
+		input = i_stream_create_header_filter(ctx->cur_input, FALSE,
 						      NULL, 0,
 						      header_filter_mime, NULL);
 	} else {
@@ -374,11 +371,9 @@
 	   Also, Netscape 4.x seems to require this or it won't show the
 	   mail.. So if we do make this as RFC says, we'll need to add
 	   netscape-workaround. */
+	message_get_header_size(ctx->cur_input, &msg_size, NULL);
+	i_stream_seek(ctx->cur_input, 0);
 
-	// FIXME: we rely on the current behavior of header filter..
-	(void)i_stream_get_data(ctx->cur_input, &size);
-	memset(&msg_size, 0, sizeof(msg_size));
-	msg_size.physical_size = msg_size.virtual_size = size;
 	return fetch_data(ctx, body, &msg_size);
 }
 
--- a/src/lib-mail/istream-header-filter.c	Thu Aug 19 06:56:01 2004 +0300
+++ b/src/lib-mail/istream-header-filter.c	Sun Aug 22 05:58:39 2004 +0300
@@ -1,8 +1,5 @@
 /* Copyright (C) 2003-2004 Timo Sirainen */
 
-/* FIXME: the header wouldn't necessarily have to be read in memory. we could
-   just parse it forward in _read(). */
-
 #include "lib.h"
 #include "buffer.h"
 #include "message-parser.h"
@@ -13,11 +10,25 @@
 
 struct header_filter_istream {
 	struct _istream istream;
+	pool_t pool;
 
 	struct istream *input;
+	struct message_header_parser_ctx *hdr_ctx;
 
-	buffer_t *headers;
+	const char **headers;
+	size_t headers_count;
+
+	header_filter_callback *callback;
+	void *context;
+
+	buffer_t *hdr_buf;
 	struct message_size header_size;
+	uoff_t skip_count;
+
+	unsigned int cur_line, parsed_lines;
+
+	unsigned int header_read:1;
+	unsigned int filter:1;
 };
 
 static void _close(struct _iostream *stream __attr_unused__)
@@ -29,8 +40,10 @@
 	struct header_filter_istream *mstream =
 		(struct header_filter_istream *)stream;
 
+	if (mstream->hdr_ctx != NULL)
+		message_parse_header_deinit(mstream->hdr_ctx);
 	i_stream_unref(mstream->input);
-	buffer_free(mstream->headers);
+	pool_unref(mstream->pool);
 }
 
 static void _set_max_buffer_size(struct _iostream *stream, size_t max_size)
@@ -41,6 +54,107 @@
 	i_stream_set_max_buffer_size(mstream->input, max_size);
 }
 
+static ssize_t read_header(struct header_filter_istream *mstream)
+{
+	struct message_header_line *hdr;
+	size_t pos;
+	ssize_t ret;
+	int matched;
+
+	if (mstream->header_read &&
+	    mstream->istream.istream.v_offset + mstream->istream.pos ==
+	    mstream->header_size.virtual_size) {
+		/* we don't support mixing headers and body.
+		   it shouldn't be needed. */
+		return -2;
+	}
+
+	if (mstream->hdr_ctx == NULL) {
+		mstream->hdr_ctx =
+			message_parse_header_init(mstream->input, NULL, FALSE);
+	}
+
+	buffer_copy(mstream->hdr_buf, 0,
+		    mstream->hdr_buf, mstream->istream.skip, (size_t)-1);
+
+        mstream->istream.pos -= mstream->istream.skip;
+	mstream->istream.skip = 0;
+
+	buffer_set_used_size(mstream->hdr_buf, mstream->istream.pos);
+
+	while ((hdr = message_parse_header_next(mstream->hdr_ctx)) != NULL) {
+		mstream->cur_line++;
+
+		if (hdr->eoh) {
+			buffer_append(mstream->hdr_buf, "\r\n", 2);
+			break;
+		}
+
+		matched = bsearch(hdr->name, mstream->headers,
+				  mstream->headers_count,
+				  sizeof(*mstream->headers),
+				  bsearch_strcasecmp) != NULL;
+		if (mstream->cur_line > mstream->parsed_lines &&
+		    mstream->callback != NULL) {
+                        mstream->parsed_lines = mstream->cur_line;
+			mstream->callback(hdr, &matched, mstream->context);
+		}
+
+		if (matched == mstream->filter) {
+			/* ignore */
+		} else {
+			if (!hdr->continued) {
+				buffer_append(mstream->hdr_buf,
+					      hdr->name, hdr->name_len);
+				buffer_append(mstream->hdr_buf,
+					      hdr->middle, hdr->middle_len);
+			}
+			buffer_append(mstream->hdr_buf,
+				      hdr->value, hdr->value_len);
+			buffer_append(mstream->hdr_buf, "\r\n", 2);
+
+			if (mstream->skip_count >= mstream->hdr_buf->used) {
+				/* we need more */
+				mstream->skip_count -= mstream->hdr_buf->used;
+				buffer_set_used_size(mstream->hdr_buf, 0);
+			} else {
+				if (mstream->skip_count > 0) {
+					mstream->istream.skip =
+						mstream->skip_count;
+					mstream->skip_count = 0;
+				}
+				break;
+			}
+		}
+	}
+
+	mstream->istream.buffer = buffer_get_data(mstream->hdr_buf, &pos);
+	ret = (ssize_t)(pos - mstream->istream.pos - mstream->istream.skip);
+	mstream->istream.pos = pos;
+
+	if (hdr == NULL) {
+		/* finished */
+		mstream->header_read = TRUE;
+
+		message_parse_header_deinit(mstream->hdr_ctx);
+		mstream->hdr_ctx = NULL;
+
+		mstream->header_size.physical_size = mstream->input->v_offset;
+		mstream->header_size.virtual_size =
+			mstream->istream.istream.v_offset + pos;
+	}
+
+	if (ret == 0) {
+		i_assert(hdr == NULL);
+		i_assert(mstream->istream.istream.v_offset +
+			 mstream->istream.pos ==
+			 mstream->header_size.virtual_size);
+		return -2;
+	}
+
+	return ret;
+}
+
 static ssize_t _read(struct _istream *stream)
 {
 	struct header_filter_istream *mstream =
@@ -48,10 +162,11 @@
 	ssize_t ret;
 	size_t pos;
 
-	if (stream->istream.v_offset < mstream->header_size.virtual_size) {
-		/* we don't support mixing headers and body.
-		   it shouldn't be needed. */
-		return -2;
+	if (!mstream->header_read ||
+	    stream->istream.v_offset < mstream->header_size.virtual_size) {
+		ret = read_header(mstream);
+		if (ret != -2 || stream->pos != stream->skip)
+			return ret;
 	}
 
 	if (mstream->input->v_offset - mstream->header_size.physical_size !=
@@ -71,7 +186,7 @@
 		stream->buffer = i_stream_get_data(mstream->input, &pos);
 	}
 
-	stream->pos -= mstream->istream.skip;
+	stream->pos -= stream->skip;
 	stream->skip = 0;
 
 	ret = pos <= stream->pos ? -1 :
@@ -86,94 +201,53 @@
 		(struct header_filter_istream *)stream;
 
 	stream->istream.v_offset = v_offset;
+	stream->skip = stream->pos = 0;
+	stream->buffer = NULL;
+
+	if (mstream->hdr_ctx != NULL) {
+		message_parse_header_deinit(mstream->hdr_ctx);
+		mstream->hdr_ctx = NULL;
+	}
+
 	if (v_offset < mstream->header_size.virtual_size) {
-		/* still in headers */
-		stream->skip = v_offset;
-		stream->pos = mstream->header_size.virtual_size;
-		stream->buffer = buffer_get_data(mstream->headers, NULL);
+		/* seek into headers. we'll have to re-parse them, use
+		   skip_count to set the wanted position */
+		i_stream_seek(mstream->input, 0);
+		mstream->skip_count = v_offset;
+		mstream->cur_line = 0;
 	} else {
-		/* body - use our real input stream */
-		stream->skip = stream->pos = 0;
-		stream->buffer = NULL;
-
+		/* body */
 		v_offset += mstream->header_size.physical_size -
 			mstream->header_size.virtual_size;
 		i_stream_seek(mstream->input, v_offset);
 	}
 }
 
-static void
-read_and_hide_headers(struct istream *input, int filter,
-		      const char *const *headers, size_t headers_count,
-		      buffer_t *dest, struct message_size *hdr_size,
-		      header_filter_callback *callback, void *context)
-{
-	struct message_header_parser_ctx *hdr_ctx;
-	struct message_header_line *hdr;
-	uoff_t virtual_size = 0;
-	int matched;
-
-	hdr_ctx = message_parse_header_init(input, hdr_size, FALSE);
-	while ((hdr = message_parse_header_next(hdr_ctx)) != NULL) {
-		if (hdr->eoh) {
-			if (dest != NULL)
-				buffer_append(dest, "\r\n", 2);
-			else
-				virtual_size += 2;
-			break;
-		}
-
-		matched = bsearch(hdr->name, headers, headers_count,
-				  sizeof(*headers), bsearch_strcasecmp) != NULL;
-		if (callback != NULL)
-			callback(hdr, &matched, context);
-
-		if (matched == filter) {
-			/* ignore */
-		} else if (dest != NULL) {
-			if (!hdr->continued) {
-				buffer_append(dest, hdr->name, hdr->name_len);
-				buffer_append(dest, hdr->middle,
-					      hdr->middle_len);
-			}
-			buffer_append(dest, hdr->value, hdr->value_len);
-			buffer_append(dest, "\r\n", 2);
-		} else {
-			if (!hdr->continued)
-				virtual_size += hdr->name_len + 2;
-			virtual_size += hdr->value_len + 2;
-		}
-	}
-	message_parse_header_deinit(hdr_ctx);
-
-	if (dest != NULL)
-		virtual_size = buffer_get_used_size(dest);
-
-	hdr_size->virtual_size = virtual_size;
-	hdr_size->lines = 0;
-}
-
 struct istream *
-i_stream_create_header_filter(pool_t pool, struct istream *input, int filter,
+i_stream_create_header_filter(struct istream *input, int filter,
 			      const char *const *headers, size_t headers_count,
 			      header_filter_callback *callback, void *context)
 {
 	struct header_filter_istream *mstream;
+	pool_t pool;
+	size_t i;
 
+	pool = pool_alloconly_create("header filter stream", 1024);
 	mstream = p_new(pool, struct header_filter_istream, 1);
+	mstream->pool = pool;
+
 	mstream->input = input;
 	i_stream_ref(mstream->input);
 
-	mstream->headers = buffer_create_dynamic(default_pool,
-						 8192, (size_t)-1);
-	read_and_hide_headers(input, filter, headers, headers_count,
-			      mstream->headers, &mstream->header_size,
-			      callback, context);
-	if (callback != NULL)
-		callback(NULL, FALSE, context);
+	mstream->headers = p_new(pool, const char *, headers_count);
+	for (i = 0; i < headers_count; i++) 
+		mstream->headers[i] = p_strdup(pool, headers[i]);
+	mstream->headers_count = headers_count;
+	mstream->hdr_buf = buffer_create_dynamic(pool, 512, (size_t)-1);
 
-	mstream->istream.buffer = buffer_get_data(mstream->headers, NULL);
-	mstream->istream.pos = mstream->header_size.virtual_size;
+	mstream->callback = callback;
+	mstream->context = context;
+	mstream->filter = filter;
 
 	mstream->istream.iostream.close = _close;
 	mstream->istream.iostream.destroy = _destroy;
--- a/src/lib-mail/istream-header-filter.h	Thu Aug 19 06:56:01 2004 +0300
+++ b/src/lib-mail/istream-header-filter.h	Sun Aug 22 05:58:39 2004 +0300
@@ -9,7 +9,7 @@
 /* NOTE: headers list must be sorted. If filter is TRUE, given headers are
    removed from output, otherwise only given headers are included in output. */
 struct istream *
-i_stream_create_header_filter(pool_t pool, struct istream *input, int filter,
+i_stream_create_header_filter(struct istream *input, int filter,
 			      const char *const *headers, size_t headers_count,
 			      header_filter_callback *callback, void *context);
 
--- a/src/lib-storage/index/index-mail-headers.c	Thu Aug 19 06:56:01 2004 +0300
+++ b/src/lib-storage/index/index-mail-headers.c	Sun Aug 22 05:58:39 2004 +0300
@@ -540,8 +540,7 @@
 
 	index_mail_parse_header_init(mail, _headers);
 	mail->data.filter_stream =
-		i_stream_create_header_filter(mail->pool, mail->data.stream,
-					      FALSE,
+		i_stream_create_header_filter(mail->data.stream, FALSE,
 					      headers->name, headers->count,
 					      header_cache_callback, mail);
 	return mail->data.filter_stream;
--- a/src/lib-storage/index/mbox/mbox-mail.c	Thu Aug 19 06:56:01 2004 +0300
+++ b/src/lib-storage/index/mbox/mbox-mail.c	Sun Aug 22 05:58:39 2004 +0300
@@ -129,8 +129,7 @@
 		raw_stream = i_stream_create_limit(default_pool, raw_stream,
 						   offset, (uoff_t)-1);
 		data->stream =
-			i_stream_create_header_filter(default_pool,
-						      raw_stream, TRUE,
+			i_stream_create_header_filter(raw_stream, TRUE,
 						      mbox_hide_headers,
 						      mbox_hide_headers_count,
 						      NULL, NULL);