changeset 1697:ef79ce6507ff HEAD

Message parsing can now be done in two parts - header and body. We're now more eager at parsing MIME structure for message. It's done whenever body has to be fully read (eg. rfc822.size).
author Timo Sirainen <tss@iki.fi>
date Thu, 21 Aug 2003 03:04:11 +0300
parents ad5be4c9cf09
children d77a282125b6
files src/lib-mail/message-parser.c src/lib-mail/message-parser.h src/lib-storage/index/index-mail-headers.c src/lib-storage/index/index-mail.c src/lib-storage/index/index-mail.h
diffstat 5 files changed, 261 insertions(+), 171 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-mail/message-parser.c	Thu Aug 21 02:26:37 2003 +0300
+++ b/src/lib-mail/message-parser.c	Thu Aug 21 03:04:11 2003 +0300
@@ -17,15 +17,17 @@
 	size_t len;
 };
 
-struct parser_context {
-	pool_t pool;
-	struct message_part *part;
+struct message_parser_ctx {
+	pool_t parser_pool, part_pool;
+	struct istream *input;
+	struct message_part *parts, *part;
 
 	char *last_boundary;
 	char *last_content_type;
 	struct message_boundary *boundaries;
 
 	message_header_callback_t *callback;
+	message_body_callback_t *body_callback;
 	void *context;
 };
 
@@ -42,16 +44,19 @@
 	int has_nuls;
 };
 
+static void
+message_parse_part_header(struct message_parser_ctx *parser_ctx);
+
 static struct message_part *
-message_parse_part(struct istream *input,
-		   struct parser_context *parser_ctx);
+message_parse_part_body(struct message_parser_ctx *parser_ctx);
 
 static struct message_part *
-message_parse_body(struct istream *input, struct message_boundary *boundaries,
-		   struct message_size *body_size, int *has_nuls);
+message_parse_body(struct message_parser_ctx *parser_ctx,
+		   struct message_boundary *boundaries,
+		   struct message_size *msg_size, int *has_nuls);
 
 static struct message_part *
-message_skip_boundary(struct istream *input,
+message_skip_boundary(struct message_parser_ctx *parser_ctx,
 		      struct message_boundary *boundaries,
 		      struct message_size *boundary_size, int *has_nuls);
 
@@ -92,14 +97,14 @@
 static void parse_content_type(const unsigned char *value, size_t value_len,
 			       void *context)
 {
-	struct parser_context *parser_ctx = context;
+	struct message_parser_ctx *parser_ctx = context;
 	const char *str;
 
 	if (parser_ctx->last_content_type != NULL || value_len == 0)
 		return;
 
 	str = parser_ctx->last_content_type =
-		p_strndup(parser_ctx->pool, value, value_len);
+		p_strndup(parser_ctx->parser_pool, value, value_len);
 
 	if (strcasecmp(str, "message/rfc822") == 0)
 		parser_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
@@ -121,7 +126,7 @@
 			 const unsigned char *value, size_t value_len,
 			 int value_quoted, void *context)
 {
-	struct parser_context *parser_ctx = context;
+	struct message_parser_ctx *parser_ctx = context;
 
 	if ((parser_ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
 	    name_len != 8 || memcasecmp(name, "boundary", 8) != 0)
@@ -129,22 +134,21 @@
 
 	if (parser_ctx->last_boundary == NULL) {
 		parser_ctx->last_boundary =
-			p_strndup(parser_ctx->pool, value, value_len);
+			p_strndup(parser_ctx->parser_pool, value, value_len);
 		if (value_quoted)
 			str_unescape(parser_ctx->last_boundary);
 	}
 }
 
 static struct message_part *
-message_parse_multipart(struct istream *input,
-			struct parser_context *parser_ctx)
+message_parse_multipart(struct message_parser_ctx *parser_ctx)
 {
 	struct message_part *parent_part, *next_part, *part;
 	struct message_boundary *b;
 	int has_nuls;
 
 	/* multipart message. add new boundary */
-	b = t_new(struct message_boundary, 1);
+	b = p_new(parser_ctx->parser_pool, struct message_boundary, 1);
 	b->part = parser_ctx->part;
 	b->boundary = parser_ctx->last_boundary;
 	b->len = strlen(b->boundary);
@@ -158,7 +162,7 @@
 
 	/* skip the data before the first boundary */
 	parent_part = parser_ctx->part;
-	next_part = message_skip_boundary(input, parser_ctx->boundaries,
+	next_part = message_skip_boundary(parser_ctx, parser_ctx->boundaries,
 					  &parent_part->body_size, &has_nuls);
 	if (has_nuls)
 		parent_part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
@@ -166,12 +170,13 @@
 	/* now, parse the parts */
 	while (next_part == parent_part) {
 		/* new child */
-		part = message_part_append(parser_ctx->pool, parent_part);
+		part = message_part_append(parser_ctx->part_pool, parent_part);
 		if ((parent_part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0)
 			part->flags |= MESSAGE_PART_FLAG_IS_MIME;
 
-                parser_ctx->part = part;
-		next_part = message_parse_part(input, parser_ctx);
+		parser_ctx->part = part;
+                message_parse_part_header(parser_ctx);
+		next_part = message_parse_part_body(parser_ctx);
 
 		if ((part->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0) {
 			/* it also belongs to parent */
@@ -185,7 +190,8 @@
 			break;
 
 		/* skip the boundary */
-		next_part = message_skip_boundary(input, parser_ctx->boundaries,
+		next_part = message_skip_boundary(parser_ctx,
+						  parser_ctx->boundaries,
 						  &parent_part->body_size,
 						  &has_nuls);
 		if (has_nuls)
@@ -201,27 +207,22 @@
 #define MUTEX_FLAGS \
 	(MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
 
-static struct message_part *
-message_parse_part(struct istream *input, struct parser_context *parser_ctx)
+static void message_parse_part_header(struct message_parser_ctx *parser_ctx)
 {
+	struct message_part *part = parser_ctx->part;
 	struct message_header_parser_ctx *hdr_ctx;
 	struct message_header_line *hdr;
-	struct message_part *next_part, *part;
-	uoff_t hdr_size;
-	int has_nuls;
 
-	hdr_ctx = message_parse_header_init(input,
-					    &parser_ctx->part->header_size);
+	hdr_ctx = message_parse_header_init(parser_ctx->input,
+					    &part->header_size);
 	while ((hdr = message_parse_header_next(hdr_ctx)) != NULL) {
 		/* call the user-defined header parser */
-		if (parser_ctx->callback != NULL) {
-			parser_ctx->callback(parser_ctx->part, hdr,
-					     parser_ctx->context);
-		}
+		if (parser_ctx->callback != NULL)
+			parser_ctx->callback(part, hdr, parser_ctx->context);
 
 		if (!hdr->eoh && strcasecmp(hdr->name, "Mime-Version") == 0) {
 			/* it's MIME. Content-* headers are valid */
-			parser_ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME;
+			part->flags |= MESSAGE_PART_FLAG_IS_MIME;
 		}
 
 		if (!hdr->eoh && strcasecmp(hdr->name, "Content-Type") == 0) {
@@ -238,63 +239,66 @@
 		}
 	}
 
-	if ((parser_ctx->part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
+	if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
 		/* It's not MIME. Reset everything we found from
 		   Content-Type. */
-		parser_ctx->part->flags = 0;
+		part->flags = 0;
                 parser_ctx->last_boundary = NULL;
 		parser_ctx->last_content_type = NULL;
 	}
-	if (parser_ctx->callback != NULL) {
-		parser_ctx->callback(parser_ctx->part, NULL,
-				     parser_ctx->context);
-	}
+	if (parser_ctx->callback != NULL)
+		parser_ctx->callback(part, NULL, parser_ctx->context);
 	if (hdr_ctx->has_nuls)
-		parser_ctx->part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
+		part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
 	message_parse_header_deinit(hdr_ctx);
 
-	i_assert((parser_ctx->part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
+	i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
+}
 
-	/* update message position/size */
-	hdr_size = parser_ctx->part->header_size.physical_size;
+static struct message_part *
+message_parse_part_body(struct message_parser_ctx *parser_ctx)
+{
+	struct message_part *part = parser_ctx->part;
+        struct message_part *next_part;
+	int has_nuls;
 
 	if (parser_ctx->last_boundary != NULL)
-		return message_parse_multipart(input, parser_ctx);
+		return message_parse_multipart(parser_ctx);
 
 	if (parser_ctx->last_content_type == NULL) {
-		if (parser_ctx->part->parent != NULL &&
-		    (parser_ctx->part->parent->flags &
+		if (part->parent != NULL &&
+		    (part->parent->flags &
 		     MESSAGE_PART_FLAG_MULTIPART_DIGEST)) {
 			/* when there's no content-type specified and we're
 			   below multipart/digest, the assume message/rfc822
 			   content-type */
-			parser_ctx->part->flags |=
-				MESSAGE_PART_FLAG_MESSAGE_RFC822;
+			part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
 		} else {
 			/* otherwise we default to text/plain */
-			parser_ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
+			part->flags |= MESSAGE_PART_FLAG_TEXT;
 		}
 	}
 
 	parser_ctx->last_boundary = NULL;
         parser_ctx->last_content_type = NULL;
 
-	if (parser_ctx->part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) {
+	if (part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) {
 		/* message/rfc822 part - the message body begins with
 		   headers again, this works pretty much the same as
 		   a single multipart/mixed item */
-		part = message_part_append(parser_ctx->pool, parser_ctx->part);
+		part = message_part_append(parser_ctx->part_pool, part);
 
 		parser_ctx->part = part;
-		next_part = message_parse_part(input, parser_ctx);
+		message_parse_part_header(parser_ctx);
+		next_part = message_parse_part_body(parser_ctx);
 		parser_ctx->part = part->parent;
 
 		/* our body size is the size of header+body in message/rfc822 */
 		message_size_add_part(&part->parent->body_size, part);
 	} else {
 		/* normal message, read until the next boundary */
-		part = parser_ctx->part;
-		next_part = message_parse_body(input, parser_ctx->boundaries,
+		next_part = message_parse_body(parser_ctx,
+					       parser_ctx->boundaries,
 					       &part->body_size, &has_nuls);
 		if (has_nuls)
 			part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
@@ -495,18 +499,19 @@
 }
 
 static struct message_part *
-message_parse_body(struct istream *input, struct message_boundary *boundaries,
+message_parse_body(struct message_parser_ctx *parser_ctx,
+		   struct message_boundary *boundaries,
 		   struct message_size *msg_size, int *has_nuls)
 {
 	struct message_boundary *boundary;
 	struct message_size body_size;
 
 	if (boundaries == NULL) {
-		message_get_body_size(input, &body_size, has_nuls);
+		message_get_body_size(parser_ctx->input, &body_size, has_nuls);
 		message_size_add(msg_size, &body_size);
 		boundary = NULL;
 	} else {
-		boundary = message_find_boundary(input, boundaries,
+		boundary = message_find_boundary(parser_ctx->input, boundaries,
 						 msg_size, FALSE, has_nuls);
 	}
 
@@ -516,7 +521,7 @@
 /* skip data until next boundary is found. if it's end boundary,
    skip the footer as well. */
 static struct message_part *
-message_skip_boundary(struct istream *input,
+message_skip_boundary(struct message_parser_ctx *parser_ctx,
 		      struct message_boundary *boundaries,
 		      struct message_size *boundary_size, int *has_nuls)
 {
@@ -525,61 +530,90 @@
 	size_t size;
 	int end_boundary;
 
-	boundary = message_find_boundary(input, boundaries,
+	boundary = message_find_boundary(parser_ctx->input, boundaries,
 					 boundary_size, TRUE, has_nuls);
 	if (boundary == NULL)
 		return NULL;
 
 	/* now, see if it's end boundary */
 	end_boundary = FALSE;
-	if (i_stream_read_data(input, &msg, &size, 1) > 0)
+	if (i_stream_read_data(parser_ctx->input, &msg, &size, 1) > 0)
 		end_boundary = msg[0] == '-' && msg[1] == '-';
 
 	/* skip the rest of the line */
-	message_skip_line(input, boundary_size, !end_boundary, has_nuls);
+	message_skip_line(parser_ctx->input, boundary_size,
+			  !end_boundary, has_nuls);
 
 	if (end_boundary) {
 		/* skip the footer */
-		return message_parse_body(input, boundary->next,
+		return message_parse_body(parser_ctx, boundary->next,
 					  boundary_size, has_nuls);
 	}
 
 	return boundary == NULL ? NULL : boundary->part;
 }
 
-struct message_part *message_parse(pool_t pool, struct istream *input,
-				   message_header_callback_t *callback,
-				   void *context)
+struct message_parser_ctx *
+message_parser_init(pool_t part_pool, struct istream *input)
 {
-	struct message_part *part;
-	struct parser_context parser_ctx;
+	struct message_parser_ctx *ctx;
+	pool_t pool;
+
+	pool = pool_alloconly_create("Message Parser", 1024);
+	ctx = p_new(pool, struct message_parser_ctx, 1);
+	ctx->parser_pool = pool;
+	ctx->part_pool = part_pool;
+	ctx->input = input;
+	ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1);
+	return ctx;
+}
+
+struct message_part *message_parser_deinit(struct message_parser_ctx *ctx)
+{
+	struct message_part *parts = ctx->parts;
+
+	pool_unref(ctx->parser_pool);
+	return parts;
+}
 
-	memset(&parser_ctx, 0, sizeof(parser_ctx));
-	parser_ctx.pool = pool;
-	parser_ctx.callback = callback;
-	parser_ctx.context = context;
-	parser_ctx.part = part = p_new(pool, struct message_part, 1);
+void message_parser_parse_header(struct message_parser_ctx *ctx,
+				 struct message_size *hdr_size,
+				 message_header_callback_t *callback,
+				 void *context)
+{
+	ctx->callback = callback;
+	ctx->context = context;
 
-	message_parse_part(input, &parser_ctx);
-	return part;
+	message_parse_part_header(ctx);
+        *hdr_size = ctx->part->header_size;
+}
+
+void message_parser_parse_body(struct message_parser_ctx *ctx,
+			       message_header_callback_t *hdr_callback,
+			       message_body_callback_t *body_callback,
+			       void *context)
+{
+	ctx->callback = hdr_callback;
+	ctx->body_callback = body_callback;
+	ctx->context = context;
+
+	message_parse_part_body(ctx);
 }
 
 static void part_parse_headers(struct message_part *part, struct istream *input,
-			       uoff_t start_offset,
 			       message_header_callback_t *callback,
 			       void *context)
 {
 	while (part != NULL) {
 		/* note that we want to parse the header of all
 		   the message parts, multiparts too. */
-		i_assert(part->physical_pos >= input->v_offset - start_offset);
-		i_stream_skip(input, part->physical_pos -
-			      (input->v_offset - start_offset));
+		i_assert(part->physical_pos >= input->v_offset);
+		i_stream_skip(input, part->physical_pos - input->v_offset);
 
 		message_parse_header(part, input, NULL, callback, context);
 		if (part->children != NULL) {
 			part_parse_headers(part->children, input,
-					   start_offset, callback, context);
+					   callback, context);
 		}
 
 		part = part->next;
@@ -590,7 +624,7 @@
 			      message_header_callback_t *callback,
 			      void *context)
 {
-	part_parse_headers(part, input, input->v_offset, callback, context);
+	part_parse_headers(part, input, callback, context);
 }
 
 void message_parse_header(struct message_part *part, struct istream *input,
--- a/src/lib-mail/message-parser.h	Thu Aug 21 02:26:37 2003 +0300
+++ b/src/lib-mail/message-parser.h	Thu Aug 21 03:04:11 2003 +0300
@@ -37,6 +37,7 @@
 	void *context;
 };
 
+struct message_parser_ctx;
 struct message_header_parser_ctx;
 
 struct message_header_line {
@@ -56,15 +57,16 @@
 	unsigned int use_full_value:1; /* set if you want full_value */
 };
 
-/* called once with hdr = NULL at end of headers */
+/* called once with hdr = NULL at the end of headers */
 typedef void message_header_callback_t(struct message_part *part,
 				       struct message_header_line *hdr,
 				       void *context);
+/* called once with size = 0 at the end of message part */
+typedef void message_body_callback_t(struct message_part *part,
+				     const unsigned char *data, size_t size,
+				     void *context);
 
 /* callback is called for each field in message header. */
-struct message_part *message_parse(pool_t pool, struct istream *input,
-				   message_header_callback_t *callback,
-				   void *context);
 void message_parse_from_parts(struct message_part *part, struct istream *input,
 			      message_header_callback_t *callback,
 			      void *context);
@@ -72,6 +74,26 @@
 			  struct message_size *hdr_size,
 			  message_header_callback_t *callback, void *context);
 
+
+/* Initialize message parser. part_spool specifies where struct message_parts
+   are allocated from. */
+struct message_parser_ctx *
+message_parser_init(pool_t part_pool, struct istream *input);
+struct message_part *message_parser_deinit(struct message_parser_ctx *ctx);
+
+/* Read and parse header. */
+void message_parser_parse_header(struct message_parser_ctx *ctx,
+				 struct message_size *hdr_size,
+				 message_header_callback_t *callback,
+				 void *context);
+/* Read and parse body. If message is a MIME multipart or message/rfc822
+   message, hdr_callback is called for all headers. body_callback is called
+   for the body content. */
+void message_parser_parse_body(struct message_parser_ctx *ctx,
+			       message_header_callback_t *hdr_callback,
+			       message_body_callback_t *body_callback,
+			       void *context);
+
 struct message_header_parser_ctx *
 message_parse_header_init(struct istream *input, struct message_size *hdr_size);
 void message_parse_header_deinit(struct message_header_parser_ctx *ctx);
--- a/src/lib-storage/index/index-mail-headers.c	Thu Aug 21 02:26:37 2003 +0300
+++ b/src/lib-storage/index/index-mail-headers.c	Thu Aug 21 03:04:11 2003 +0300
@@ -420,7 +420,7 @@
 	return TRUE;
 }
 
-int index_mail_parse_headers(struct index_mail *mail, int get_parts)
+int index_mail_parse_headers(struct index_mail *mail)
 {
 	struct mail_cache *cache = mail->ibox->index->cache;
 	struct index_mail_data *data = &mail->data;
@@ -491,21 +491,16 @@
 	data->bodystructure_header_parse = data->bodystructure_header_want;
 	index_mail_parse_header_init(mail, NULL);
 
-	if ((mail->wanted_fields & MAIL_FETCH_MESSAGE_PARTS) != 0)
-		get_parts = TRUE;
-	if (data->parts != NULL)
-		get_parts = FALSE;
-
-	if (!data->bodystructure_header_want && !get_parts) {
+	if (data->parts != NULL || data->parser_ctx != NULL) {
 		message_parse_header(data->parts, data->stream, &data->hdr_size,
 				     index_mail_parse_header, mail);
-	} else if (data->parts == NULL) {
-		data->parts = message_parse(mail->pool, data->stream,
+	} else {
+		data->parser_ctx =
+			message_parser_init(mail->pool, data->stream);
+		message_parser_parse_header(data->parser_ctx, &data->hdr_size,
 					    index_mail_parse_header, mail);
-	} else {
-		message_parse_from_parts(data->parts, data->stream,
-					 index_mail_parse_header, mail);
 	}
+	data->hdr_size_set = TRUE;
 
 	if (data->bodystructure_header_want) {
 		data->bodystructure_header_want = FALSE;
@@ -513,19 +508,7 @@
 		data->bodystructure_header_parsed = TRUE;
 	}
 
-	if (get_parts) {
-		/* we know the NULs now, update them */
-		if ((data->parts->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0) {
-			mail->mail.has_nuls = TRUE;
-			mail->mail.has_no_nuls = FALSE;
-		} else {
-			mail->mail.has_nuls = FALSE;
-			mail->mail.has_no_nuls = TRUE;
-		}
-	}
-
 	data->parse_header = FALSE;
-	data->hdr_size_set = TRUE;
 	data->header_fully_parsed = TRUE;
 
 	return TRUE;
@@ -554,7 +537,8 @@
 		}
 
 		if (idx < 0) {
-			index_mail_parse_headers(mail, FALSE);
+			if (!index_mail_parse_headers(mail))
+				return NULL;
 
 			/* might have been moved in memory, get it again */
 			hdr = cached_header_find(mail, field, NULL);
@@ -606,8 +590,10 @@
 				all_saved = FALSE;
 		}
 
-		if (!all_saved)
-			index_mail_parse_headers(mail, FALSE);
+		if (!all_saved) {
+			if (!index_mail_parse_headers(mail))
+				return NULL;
+		}
 	}
 
 	return i_stream_create_from_data(mail->pool,
--- a/src/lib-storage/index/index-mail.c	Thu Aug 21 02:26:37 2003 +0300
+++ b/src/lib-storage/index/index-mail.c	Thu Aug 21 03:04:11 2003 +0300
@@ -14,7 +14,7 @@
 #include "index-expunge.h"
 #include "index-mail.h"
 
-#include <ctype.h>
+static int index_mail_parse_body(struct index_mail *mail);
 
 static struct message_part *get_cached_parts(struct index_mail *mail)
 {
@@ -242,7 +242,11 @@
 			return data->parts;
 	}
 
-	if (!index_mail_parse_headers(mail, TRUE))
+	if (data->parser_ctx == NULL) {
+		if (!index_mail_parse_headers(mail))
+			return NULL;
+	}
+	if (!index_mail_parse_body(mail))
 		return NULL;
 
         cache_parts(mail);
@@ -314,17 +318,20 @@
 {
 	struct index_mail_data *data = &mail->data;
 
-	if ((mail->wanted_fields & MAIL_FETCH_MESSAGE_PARTS) != 0)
-		(void)get_parts(&mail->mail);
-
-	if (data->parts == NULL)
-		data->parts = get_cached_parts(mail);
+	if (data->parts == NULL) {
+		if ((mail->wanted_fields & MAIL_FETCH_MESSAGE_PARTS) != 0)
+			(void)get_parts(&mail->mail);
+		else
+			data->parts = get_cached_parts(mail);
+	}
 
 	if (data->parts != NULL) {
 		data->hdr_size = data->parts->header_size;
 		data->body_size = data->parts->body_size;
 		data->hdr_size_set = TRUE;
 		data->body_size_set = TRUE;
+		data->size = data->hdr_size.virtual_size +
+			data->body_size.virtual_size;
 	}
 
 	return data->parts != NULL;
@@ -377,7 +384,7 @@
 {
 	struct index_mail *mail = (struct index_mail *) _mail;
 	struct index_mail_data *data = &mail->data;
-	uoff_t hdr_size, body_size, hdr_phys_size;
+	struct message_size hdr_size, body_size;
 
 	if (data->size != (uoff_t)-1)
 		return data->size;
@@ -388,58 +395,68 @@
 			return data->size;
 	}
 
-	if (!get_msgpart_sizes(mail)) {
-		/* this gives us header size for free */
-		if (data->parse_header)
-			index_mail_parse_headers(mail, FALSE);
-	}
-
-	hdr_size = data->hdr_size_set ?
-		data->hdr_size.virtual_size : (uoff_t)-1;
-	body_size = data->body_size_set ?
-		data->body_size.virtual_size : (uoff_t)-1;
-
-	if (body_size != (uoff_t)-1 && hdr_size != (uoff_t)-1) {
-		data->size = hdr_size + body_size;
+	if (get_msgpart_sizes(mail))
 		return data->size;
-	}
 
 	/* maybe it's binary */
 	get_binary_sizes(mail);
-	if (data->hdr_size_set && hdr_size == (uoff_t)-1)
-		hdr_size = data->hdr_size.virtual_size;
-	if (data->body_size_set && body_size == (uoff_t)-1)
-		body_size = data->body_size.virtual_size;
-
-	if (body_size != (uoff_t)-1 && hdr_size != (uoff_t)-1) {
-		data->size = hdr_size + body_size;
+	if (data->hdr_size_set && data->body_size_set) {
+		data->size = data->hdr_size.virtual_size +
+			data->body_size.virtual_size;
 		return data->size;
 	}
 
-	/* have to parse, slow.. */
-	hdr_phys_size = hdr_size != (uoff_t)-1 && data->hdr_size_set ?
-		data->hdr_size.physical_size : (uoff_t)-1;
-	if (!index_mail_open_stream(mail, hdr_phys_size != (uoff_t)-1 ?
-				    hdr_phys_size : 0))
+	/* do it the slow way */
+	if (_mail->get_stream(_mail, &hdr_size, &body_size) == NULL)
 		return (uoff_t)-1;
 
-	if (hdr_phys_size == (uoff_t)-1) {
-		message_get_header_size(data->stream, &data->hdr_size, NULL);
-		hdr_size = data->hdr_size.virtual_size;
-		data->hdr_size_set = TRUE;
+	return data->size;
+}
+
+static int index_mail_parse_body(struct index_mail *mail)
+{
+	struct index_mail_data *data = &mail->data;
+        enum mail_index_record_flag index_flags;
+
+	i_assert(data->parts == NULL);
+	i_assert(data->parser_ctx != NULL);
+
+	i_stream_seek(data->stream, data->hdr_size.physical_size);
+
+	message_parser_parse_body(data->parser_ctx, NULL, NULL, NULL);
+	data->parts = message_parser_deinit(data->parser_ctx);
+        data->parser_ctx = NULL;
+
+	data->body_size = data->parts->body_size;
+	data->body_size_set = TRUE;
+
+	if (mail->mail.has_nuls || mail->mail.has_no_nuls)
+		return TRUE;
+
+	/* we know the NULs now, update them */
+	if ((data->parts->flags & MESSAGE_PART_FLAG_HAS_NULS) != 0) {
+		mail->mail.has_nuls = TRUE;
+		mail->mail.has_no_nuls = FALSE;
+	} else {
+		mail->mail.has_nuls = FALSE;
+		mail->mail.has_no_nuls = TRUE;
 	}
-	if (body_size == (uoff_t)-1) {
-		message_get_body_size(data->stream, &data->body_size, NULL);
-		body_size = data->body_size.virtual_size;
-		data->body_size_set = TRUE;
-	}
-	data->size = hdr_size + body_size;
+
+	if (!index_mail_cache_transaction_begin(mail))
+		return TRUE;
 
-	index_mail_cache_add_sizes(mail);
-	index_mail_cache_add(mail, MAIL_CACHE_VIRTUAL_FULL_SIZE,
-			     &data->size, sizeof(data->size));
+	index_flags = mail_cache_get_index_flags(mail->ibox->index->cache,
+						 mail->data.rec);
+	if (mail->mail.has_nuls)
+		index_flags |= MAIL_INDEX_FLAG_HAS_NULS;
+	else
+		index_flags |= MAIL_INDEX_FLAG_HAS_NO_NULS;
 
-	return data->size;
+	if (!mail_cache_update_index_flags(mail->ibox->index->cache,
+					   mail->data.rec, index_flags))
+		return FALSE;
+
+	return TRUE;
 }
 
 static struct istream *get_stream(struct mail *_mail,
@@ -459,9 +476,8 @@
 
 	if (hdr_size != NULL) {
 		if (!data->hdr_size_set) {
-			message_get_header_size(data->stream, &data->hdr_size,
-						NULL);
-			data->hdr_size_set = TRUE;
+			if (!index_mail_parse_headers(mail))
+				return NULL;
 		}
 
 		*hdr_size = data->hdr_size;
@@ -469,22 +485,42 @@
 
 	if (body_size != NULL) {
 		if (!data->body_size_set) {
-			i_stream_seek(data->stream,
-				      data->hdr_size.physical_size);
-
-			message_get_body_size(data->stream, &data->body_size,
-					      NULL);
-			data->body_size_set = TRUE;
+			if (!index_mail_parse_body(mail))
+				return NULL;
 		}
 
 		*body_size = data->body_size;
 	}
 
-	index_mail_cache_add_sizes(mail);
+	if (data->hdr_size_set && data->body_size_set) {
+		data->size = data->hdr_size.virtual_size +
+			data->body_size.virtual_size;
+		if (data->parts->children != NULL) {
+			/* cache the message parts only if this is a
+			   multipart message. it's pretty useless otherwise. */
+			cache_parts(mail);
+		} else {
+			index_mail_cache_add_sizes(mail);
+			index_mail_cache_add(mail, MAIL_CACHE_VIRTUAL_FULL_SIZE,
+					     &data->size, sizeof(data->size));
+		}
+	} else {
+		index_mail_cache_add_sizes(mail);
+	}
+
 	i_stream_seek(data->stream, 0);
 	return data->stream;
 }
 
+static void parse_bodystructure_header(struct message_part *part,
+				       struct message_header_line *hdr,
+				       void *context)
+{
+	pool_t pool = context;
+
+	imap_bodystructure_parse_header(pool, part, hdr);
+}
+
 static const char *get_special(struct mail *_mail, enum mail_fetch_field field)
 {
 	struct index_mail *mail = (struct index_mail *) _mail;
@@ -529,7 +565,18 @@
 
 		if (!data->bodystructure_header_parsed) {
 			data->bodystructure_header_want = TRUE;
-			if (!index_mail_parse_headers(mail, FALSE))
+			if (!index_mail_parse_headers(mail))
+				return NULL;
+		}
+
+		if (data->parts != NULL) {
+			i_assert(data->parts->next == NULL);
+			message_parse_from_parts(data->parts->children,
+						 data->stream,
+						 parse_bodystructure_header,
+						 mail->pool);
+		} else {
+			if (!index_mail_parse_body(mail))
 				return NULL;
 		}
 
--- a/src/lib-storage/index/index-mail.h	Thu Aug 21 02:26:37 2003 +0300
+++ b/src/lib-storage/index/index-mail.h	Thu Aug 21 03:04:11 2003 +0300
@@ -29,7 +29,8 @@
 	unsigned int idx_seq;
 
 	struct istream *stream;
-        struct message_size hdr_size, body_size;
+	struct message_size hdr_size, body_size;
+	struct message_parser_ctx *parser_ctx;
 
 	unsigned int parse_header:1;
 	unsigned int bodystructure_header_want:1;
@@ -76,7 +77,7 @@
 			  const void *data, size_t size);
 
 int index_mail_open_stream(struct index_mail *mail, uoff_t position);
-int index_mail_parse_headers(struct index_mail *mail, int get_parts);
+int index_mail_parse_headers(struct index_mail *mail);
 
 void index_mail_headers_init(struct index_mail *mail);
 void index_mail_headers_init_next(struct index_mail *mail);