changeset 21633:3a88d30ad000

lib-imap: imap-bodystructure: Moved message_part_data header parsing and query functions to their own module in lib-mail. Moved code from imap-bodystructure and imap-envelope.
author Stephan Bosch <stephan.bosch@dovecot.fi>
date Sun, 23 Oct 2016 20:05:33 +0200
parents eff89c5ea738
children e2071511ef6d
files src/lib-imap/imap-bodystructure.c src/lib-imap/imap-bodystructure.h src/lib-imap/imap-envelope.c src/lib-imap/imap-envelope.h src/lib-imap/test-imap-bodystructure.c src/lib-mail/Makefile.am src/lib-mail/message-part-data.c src/lib-mail/message-part-data.h src/lib-storage/index/index-mail-headers.c src/lib-storage/index/index-mail.c
diffstat 10 files changed, 543 insertions(+), 496 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-imap/imap-bodystructure.c	Sun Oct 23 19:47:12 2016 +0200
+++ b/src/lib-imap/imap-bodystructure.c	Sun Oct 23 20:05:33 2016 +0200
@@ -4,6 +4,7 @@
 #include "buffer.h"
 #include "istream.h"
 #include "str.h"
+#include "message-part-data.h"
 #include "message-parser.h"
 #include "rfc822-parser.h"
 #include "rfc2231-parser.h"
@@ -12,268 +13,8 @@
 #include "imap-envelope.h"
 #include "imap-bodystructure.h"
 
-#define DEFAULT_CHARSET "us-ascii"
-
-#define EMPTY_BODYSTRUCTURE \
-        "(\"text\" \"plain\" (\"charset\" \""DEFAULT_CHARSET"\") NIL NIL \"7bit\" 0 0)"
-
-static void
-parse_mime_parameters(struct rfc822_parser_context *parser,
-	pool_t pool, const struct message_part_param **params_r,
-	unsigned int *params_count_r)
-{
-	const char *const *results;
-	struct message_part_param *params;
-	unsigned int params_count, i;
-
-	rfc2231_parse(parser, &results);
-
-	params_count = str_array_length(results);
-	i_assert((params_count % 2) == 0);
-	params_count /= 2;
-
-	if (params_count > 0) {
-		params = p_new(pool, struct message_part_param, params_count);
-		for (i = 0; i < params_count; i++) {
-			params[i].name = p_strdup(pool, results[i*2+0]);
-			params[i].value = p_strdup(pool, results[i*2+1]);
-		}
-		*params_r = params;
-	}
-
-	*params_count_r = params_count;
-}
-
-static void
-parse_content_type(struct message_part_data *data,
-	pool_t pool, struct message_header_line *hdr)
-{
-	struct rfc822_parser_context parser;
-	string_t *str;
-	const char *value;
-	unsigned int i;
-	int ret;
-
-	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
-	rfc822_skip_lwsp(&parser);
-
-	str = t_str_new(256);
-	ret = rfc822_parse_content_type(&parser, str);
-
-	/* Save content type and subtype */
-	value = str_c(str);
-	for (i = 0; value[i] != '\0'; i++) {
-		if (value[i] == '/') {
-			data->content_subtype = p_strdup(pool, value + i+1);
-			break;
-		}
-	}
-	str_truncate(str, i);
-	data->content_type = p_strdup(pool, str_c(str));
-
-	if (ret < 0) {
-		/* Content-Type is broken, but we wanted to get it as well as
-		   we could. Don't try to read the parameters anymore though.
-
-		   We don't completely ignore a broken Content-Type, because
-		   then it would be written as text/plain. This would cause a
-		   mismatch with the message_part's MESSAGE_PART_FLAG_TEXT. */
-		return;
-	}
-
-	parse_mime_parameters(&parser, pool,
-		&data->content_type_params,
-		&data->content_type_params_count);
-}
-
-static void
-parse_content_transfer_encoding(struct message_part_data *data,
-	pool_t pool, struct message_header_line *hdr)
-{
-	struct rfc822_parser_context parser;
-	string_t *str;
-
-	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
-	rfc822_skip_lwsp(&parser);
-
-	str = t_str_new(256);
-	if (rfc822_parse_mime_token(&parser, str) >= 0 &&
-	    rfc822_skip_lwsp(&parser) == 0 && str_len(str) > 0) {
-		data->content_transfer_encoding =
-			p_strdup(pool, str_c(str));
-	}
-}
-
-static void
-parse_content_disposition(struct message_part_data *data,
-	pool_t pool, struct message_header_line *hdr)
-{
-	struct rfc822_parser_context parser;
-	string_t *str;
-
-	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
-	rfc822_skip_lwsp(&parser);
-
-	str = t_str_new(256);
-	if (rfc822_parse_mime_token(&parser, str) < 0)
-		return;
-	data->content_disposition = p_strdup(pool, str_c(str));
-
-	parse_mime_parameters(&parser, pool,
-		&data->content_disposition_params,
-		&data->content_disposition_params_count);
-}
-
-static void
-parse_content_language(struct message_part_data *data,
-	pool_t pool, const unsigned char *value, size_t value_len)
-{
-	struct rfc822_parser_context parser;
-	ARRAY_TYPE(const_string) langs;
-	string_t *str;
-
-	/* Language-Header = "Content-Language" ":" 1#Language-tag
-	   Language-Tag = Primary-tag *( "-" Subtag )
-	   Primary-tag = 1*8ALPHA
-	   Subtag = 1*8ALPHA */
-
-	rfc822_parser_init(&parser, value, value_len, NULL);
-
-	t_array_init(&langs, 16);
-	str = t_str_new(128);
-
-	rfc822_skip_lwsp(&parser);
-	while (rfc822_parse_atom(&parser, str) >= 0) {
-		const char *lang = p_strdup(pool, str_c(str));
-
-		array_append(&langs, &lang, 1);
-		str_truncate(str, 0);
-
-		if (parser.data == parser.end || *parser.data != ',')
-			break;
-		parser.data++;
-		rfc822_skip_lwsp(&parser);
-	}
-
-	if (array_count(&langs) > 0) {
-		array_append_zero(&langs);
-		data->content_language =
-			p_strarray_dup(pool, array_idx(&langs, 0));
-	}
-}
-
-static void
-parse_content_header(struct message_part_data *data,
-	pool_t pool, struct message_header_line *hdr)
-{
-	const char *name = hdr->name + strlen("Content-");
-	const char *value;
-
-	if (hdr->continues) {
-		hdr->use_full_value = TRUE;
-		return;
-	}
-
-	value = t_strndup(hdr->full_value, hdr->full_value_len);
-
-	switch (*name) {
-	case 'i':
-	case 'I':
-		if (strcasecmp(name, "ID") == 0 && data->content_id == NULL)
-			data->content_id = p_strdup(pool, value);
-		break;
-
-	case 'm':
-	case 'M':
-		if (strcasecmp(name, "MD5") == 0 && data->content_md5 == NULL)
-			data->content_md5 = p_strdup(pool, value);
-		break;
-
-	case 't':
-	case 'T':
-		if (strcasecmp(name, "Type") == 0 && data->content_type == NULL)
-			parse_content_type(data, pool, hdr);
-		else if (strcasecmp(name, "Transfer-Encoding") == 0 &&
-			 data->content_transfer_encoding == NULL)
-			parse_content_transfer_encoding(data, pool, hdr);
-		break;
-
-	case 'l':
-	case 'L':
-		if (strcasecmp(name, "Language") == 0 &&
-		    data->content_language == NULL) {
-			parse_content_language(data, pool,
-				hdr->full_value, hdr->full_value_len);
-		} else if (strcasecmp(name, "Location") == 0 &&
-			   data->content_location == NULL) {
-			data->content_location = p_strdup(pool, value);
-		}
-		break;
-
-	case 'd':
-	case 'D':
-		if (strcasecmp(name, "Description") == 0 &&
-		    data->content_description == NULL)
-			data->content_description = p_strdup(pool, value);
-		else if (strcasecmp(name, "Disposition") == 0 &&
-			 data->content_disposition_params == NULL)
-			parse_content_disposition(data, pool, hdr);
-		break;
-	}
-}
-
-void message_part_data_parse_from_header(pool_t pool,
-	struct message_part *part,
-	struct message_header_line *hdr)
-{
-	struct message_part_data *part_data;
-	struct message_part_envelope_data *envelope;
-	bool parent_rfc822;
-
-	if (hdr == NULL) {
-		if (part->data == NULL) {
-			/* no Content-* headers. add an empty context
-			   structure anyway. */
-			part->data = part_data =
-				p_new(pool, struct message_part_data, 1);
-		} else if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
-			/* If there was no Mime-Version, forget all
-			   the Content-stuff */
-			part_data = part->data;
-			envelope = part_data->envelope;
-
-			i_zero(part_data);
-			part_data->envelope = envelope;
-		}
-		return;
-	}
-
-	if (hdr->eoh)
-		return;
-
-	parent_rfc822 = part->parent != NULL &&
-		(part->parent->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0;
-	if (!parent_rfc822 && strncasecmp(hdr->name, "Content-", 8) != 0)
-		return;
-
-	if (part->data == NULL) {
-		/* initialize message part data */
-		part->data = part_data =
-			p_new(pool, struct message_part_data, 1);
-	}
-	part_data = part->data;
-
-	if (strncasecmp(hdr->name, "Content-", 8) == 0) {
-		T_BEGIN {
-			parse_content_header(part_data, pool, hdr);
-		} T_END;
-	}
-
-	if (parent_rfc822) {
-		/* message/rfc822, we need the envelope */
-		message_part_envelope_parse_from_header(pool, &part_data->envelope, hdr);
-	}
-}
+#define EMPTY_BODYSTRUCTURE "(\"text\" \"plain\" " \
+	"(\"charset\" \""MESSAGE_PART_DEFAULT_CHARSET"\") NIL NIL \"7bit\" 0 0)"
 
 static void
 params_write(const struct message_part_param *params,
@@ -303,7 +44,8 @@
 	if (default_charset && !seen_charset) {
 		if (i > 0)
 			str_append_c(str, ' ');
-		str_append(str, "\"charset\" \""DEFAULT_CHARSET"\"");
+		str_append(str, "\"charset\" "
+			"\""MESSAGE_PART_DEFAULT_CHARSET"\"");
 	}
 	str_append_c(str, ')');
 }
@@ -473,48 +215,6 @@
 	part_write_bodystructure_common(data, str);
 }
 
-bool message_part_data_is_plain_7bit(const struct message_part *part)
-{
-	const struct message_part_data *data = part->data;
-
-	i_assert(part->parent == NULL);
-
-	/* if content-type is text/xxx we don't have to check any
-	   multipart stuff */
-	if ((part->flags & MESSAGE_PART_FLAG_TEXT) == 0)
-		return FALSE;
-	if (part->next != NULL || part->children != NULL)
-		return FALSE; /* shouldn't happen normally.. */
-
-	/* must be text/plain */
-	if (data->content_subtype != NULL &&
-	    strcasecmp(data->content_subtype, "plain") != 0)
-		return FALSE;
-
-	/* only allowed parameter is charset=us-ascii, which is also default */
-	if (data->content_type_params_count > 0 &&
-	    (strcasecmp(data->content_type_params[0].name, "charset") != 0 ||
-	     strcasecmp(data->content_type_params[0].value, DEFAULT_CHARSET) != 0))
-		return FALSE;
-
-	if (data->content_id != NULL ||
-	    data->content_description != NULL)
-		return FALSE;
-
-	if (data->content_transfer_encoding != NULL &&
-	    strcasecmp(data->content_transfer_encoding, "7bit") != 0)
-		return FALSE;
-
-	/* BODYSTRUCTURE checks: */
-	if (data->content_md5 != NULL ||
-	    data->content_disposition != NULL ||
-	    data->content_language != NULL ||
-	    data->content_location != NULL)
-		return FALSE;
-
-	return TRUE;
-}
-
 void imap_bodystructure_write(const struct message_part *part,
 			      string_t *dest, bool extended)
 {
--- a/src/lib-imap/imap-bodystructure.h	Sun Oct 23 19:47:12 2016 +0200
+++ b/src/lib-imap/imap-bodystructure.h	Sun Oct 23 20:05:33 2016 +0200
@@ -1,42 +1,9 @@
 #ifndef IMAP_BODYSTRUCTURE_H
 #define IMAP_BODYSTRUCTURE_H
 
-struct message_part_param {
-	const char *name;
-	const char *value;
-};
-
-struct message_part_data {
-	const char *content_type, *content_subtype;
-	const struct message_part_param *content_type_params;
-	unsigned int content_type_params_count;
-
-	const char *content_transfer_encoding;
-	const char *content_id;
-	const char *content_description;
-	const char *content_disposition;
-	const struct message_part_param *content_disposition_params;
-	unsigned int content_disposition_params_count;
-	const char *content_md5;
-	const char *const *content_language;
-	const char *content_location;
-
-	struct message_part_envelope_data *envelope;
-};
-
 struct message_part;
 struct message_header_line;
 
-/* Parse a single header. Note that this modifies part->data. */
-void message_part_data_parse_from_header(pool_t pool,
-	struct message_part *part,
-	struct message_header_line *hdr);
-
-/* Returns TRUE if this message part has content-type "text/plain",
-   chaset "us-ascii" and content-tranfer-encoding "7bit" */
-bool message_part_data_is_plain_7bit(const struct message_part *part)
-	ATTR_PURE;
-
 /* Write a BODY/BODYSTRUCTURE from given message_part. The message_part->data
    field must be set. part->body_size.virtual_size and .lines are also used
    for writing it. */
--- a/src/lib-imap/imap-envelope.c	Sun Oct 23 19:47:12 2016 +0200
+++ b/src/lib-imap/imap-envelope.c	Sun Oct 23 20:05:33 2016 +0200
@@ -4,169 +4,18 @@
 #include "istream.h"
 #include "str.h"
 #include "message-address.h"
+#include "message-part-data.h"
 #include "message-parser.h"
 #include "imap-parser.h"
 #include "imap-envelope.h"
 #include "imap-quote.h"
 
-struct message_part_envelope_data {
-	const char *date, *subject;
-	struct message_address *from, *sender, *reply_to;
-	struct message_address *to, *cc, *bcc;
-
-	const char *in_reply_to, *message_id;
-};
-
-enum envelope_field {
-	ENVELOPE_FIELD_DATE = 0,
-	ENVELOPE_FIELD_SUBJECT,
-	ENVELOPE_FIELD_FROM,
-	ENVELOPE_FIELD_SENDER,
-	ENVELOPE_FIELD_REPLY_TO,
-	ENVELOPE_FIELD_TO,
-	ENVELOPE_FIELD_CC,
-	ENVELOPE_FIELD_BCC,
-	ENVELOPE_FIELD_IN_REPLY_TO,
-	ENVELOPE_FIELD_MESSAGE_ID,
-
-	ENVELOPE_FIELD_UNKNOWN
-};
-
 const char *imap_envelope_headers[] = {
 	"Date", "Subject", "From", "Sender", "Reply-To",
 	"To", "Cc", "Bcc", "In-Reply-To", "Message-ID",
 	NULL
 };
 
-static enum envelope_field
-envelope_get_field(const char *name)
-{
-	switch (*name) {
-	case 'B':
-	case 'b':
-		if (strcasecmp(name, "Bcc") == 0)
-			return ENVELOPE_FIELD_BCC;
-		break;
-	case 'C':
-	case 'c':
-		if (strcasecmp(name, "Cc") == 0)
-			return ENVELOPE_FIELD_CC;
-		break;
-	case 'D':
-	case 'd':
-		if (strcasecmp(name, "Date") == 0)
-			return ENVELOPE_FIELD_DATE;
-		break;
-	case 'F':
-	case 'f':
-		if (strcasecmp(name, "From") == 0)
-			return ENVELOPE_FIELD_FROM;
-		break;
-	case 'I':
-	case 'i':
-		if (strcasecmp(name, "In-reply-to") == 0)
-			return ENVELOPE_FIELD_IN_REPLY_TO;
-		break;
-	case 'M':
-	case 'm':
-		if (strcasecmp(name, "Message-id") == 0)
-			return ENVELOPE_FIELD_MESSAGE_ID;
-		break;
-	case 'R':
-	case 'r':
-		if (strcasecmp(name, "Reply-to") == 0)
-			return ENVELOPE_FIELD_REPLY_TO;
-		break;
-	case 'S':
-	case 's':
-		if (strcasecmp(name, "Subject") == 0)
-			return ENVELOPE_FIELD_SUBJECT;
-		if (strcasecmp(name, "Sender") == 0)
-			return ENVELOPE_FIELD_SENDER;
-		break;
-	case 'T':
-	case 't':
-		if (strcasecmp(name, "To") == 0)
-			return ENVELOPE_FIELD_TO;
-		break;
-	}
-
-	return ENVELOPE_FIELD_UNKNOWN;
-}
-
-void message_part_envelope_parse_from_header(pool_t pool,
-	struct message_part_envelope_data **data,
-	struct message_header_line *hdr)
-{
-	struct message_part_envelope_data *d;
-	enum envelope_field field;
-	struct message_address **addr_p;
-	const char **str_p;
-
-	if (*data == NULL) {
-		*data = p_new(pool, struct message_part_envelope_data, 1);
-	}
-
-	if (hdr == NULL)
-		return;
-	field = envelope_get_field(hdr->name);
-	if (field == ENVELOPE_FIELD_UNKNOWN)
-		return;
-
-	if (hdr->continues) {
-		/* wait for full value */
-		hdr->use_full_value = TRUE;
-		return;
-	}
-
-	d = *data;
-	addr_p = NULL; str_p = NULL;
-	switch (field) {
-	case ENVELOPE_FIELD_DATE:
-		str_p = &d->date;
-		break;
-	case ENVELOPE_FIELD_SUBJECT:
-		str_p = &d->subject;
-		break;
-	case ENVELOPE_FIELD_MESSAGE_ID:
-		str_p = &d->message_id;
-		break;
-	case ENVELOPE_FIELD_IN_REPLY_TO:
-		str_p = &d->in_reply_to;
-		break;
-
-	case ENVELOPE_FIELD_CC:
-		addr_p = &d->cc;
-		break;
-	case ENVELOPE_FIELD_BCC:
-		addr_p = &d->bcc;
-		break;
-	case ENVELOPE_FIELD_FROM:
-		addr_p = &d->from;
-		break;
-	case ENVELOPE_FIELD_SENDER:
-		addr_p = &d->sender;
-		break;
-	case ENVELOPE_FIELD_TO:
-		addr_p = &d->to;
-		break;
-	case ENVELOPE_FIELD_REPLY_TO:
-		addr_p = &d->reply_to;
-		break;
-	case ENVELOPE_FIELD_UNKNOWN:
-		i_unreached();
-	}
-
-	if (addr_p != NULL) {
-		*addr_p = message_address_parse(pool, hdr->full_value,
-						hdr->full_value_len,
-						UINT_MAX, TRUE);
-	} else if (str_p != NULL) {
-		*str_p = p_strndup(pool,
-			hdr->full_value, hdr->full_value_len);
-	}
-}
-
 static void imap_write_address(string_t *str, struct message_address *addr)
 {
 	if (addr == NULL) {
--- a/src/lib-imap/imap-envelope.h	Sun Oct 23 19:47:12 2016 +0200
+++ b/src/lib-imap/imap-envelope.h	Sun Oct 23 20:05:33 2016 +0200
@@ -2,16 +2,10 @@
 #define IMAP_ENVELOPE_H
 
 struct imap_arg;
-struct message_header_line;
 struct message_part_envelope_data;
 
 extern const char *imap_envelope_headers[];
 
-/* Update envelope data based from given header field */
-void message_part_envelope_parse_from_header(pool_t pool,
-	struct message_part_envelope_data **_data,
-	struct message_header_line *hdr);
-
 /* Write envelope to given string */
 void imap_envelope_write_part_data(struct message_part_envelope_data *data,
 				   string_t *str);
--- a/src/lib-imap/test-imap-bodystructure.c	Sun Oct 23 19:47:12 2016 +0200
+++ b/src/lib-imap/test-imap-bodystructure.c	Sun Oct 23 20:05:33 2016 +0200
@@ -3,6 +3,7 @@
 #include "lib.h"
 #include "istream.h"
 #include "str.h"
+#include "message-part-data.h"
 #include "message-parser.h"
 #include "imap-bodystructure.h"
 #include "test-common.h"
--- a/src/lib-mail/Makefile.am	Sun Oct 23 19:47:12 2016 +0200
+++ b/src/lib-mail/Makefile.am	Sun Oct 23 20:05:33 2016 +0200
@@ -27,6 +27,7 @@
 	message-id.c \
 	message-parser.c \
 	message-part.c \
+	message-part-data.c \
 	message-part-serialize.c \
 	message-search.c \
 	message-size.c \
@@ -63,6 +64,7 @@
 	message-id.h \
 	message-parser.h \
 	message-part.h \
+	message-part-data.h \
 	message-part-serialize.h \
 	message-search.h \
 	message-size.h \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-part-data.c	Sun Oct 23 20:05:33 2016 +0200
@@ -0,0 +1,468 @@
+/* Copyright (c) 2014-2017 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "str.h"
+#include "array.h"
+#include "rfc822-parser.h"
+#include "rfc2231-parser.h"
+#include "message-address.h"
+#include "message-header-parser.h"
+
+#include "message-part-data.h"
+
+/*
+ *
+ */
+
+bool message_part_data_is_plain_7bit(const struct message_part *part)
+{
+	const struct message_part_data *data = part->data;
+
+	i_assert(part->parent == NULL);
+
+	/* if content-type is text/xxx we don't have to check any
+	   multipart stuff */
+	if ((part->flags & MESSAGE_PART_FLAG_TEXT) == 0)
+		return FALSE;
+	if (part->next != NULL || part->children != NULL)
+		return FALSE; /* shouldn't happen normally.. */
+
+	/* must be text/plain */
+	if (data->content_subtype != NULL &&
+	    strcasecmp(data->content_subtype, "plain") != 0)
+		return FALSE;
+
+	/* only allowed parameter is charset=us-ascii, which is also default */
+	if (data->content_type_params_count > 0 &&
+	    (strcasecmp(data->content_type_params[0].name, "charset") != 0 ||
+	     strcasecmp(data->content_type_params[0].value,
+				MESSAGE_PART_DEFAULT_CHARSET) != 0))
+		return FALSE;
+
+	if (data->content_id != NULL ||
+	    data->content_description != NULL)
+		return FALSE;
+
+	if (data->content_transfer_encoding != NULL &&
+	    strcasecmp(data->content_transfer_encoding, "7bit") != 0)
+		return FALSE;
+
+	/* BODYSTRUCTURE checks: */
+	if (data->content_md5 != NULL ||
+	    data->content_disposition != NULL ||
+	    data->content_language != NULL ||
+	    data->content_location != NULL)
+		return FALSE;
+
+	return TRUE;
+}
+
+/*
+ * Header parsing
+ */
+
+/* Message part envelope */
+
+enum envelope_field {
+	ENVELOPE_FIELD_DATE = 0,
+	ENVELOPE_FIELD_SUBJECT,
+	ENVELOPE_FIELD_FROM,
+	ENVELOPE_FIELD_SENDER,
+	ENVELOPE_FIELD_REPLY_TO,
+	ENVELOPE_FIELD_TO,
+	ENVELOPE_FIELD_CC,
+	ENVELOPE_FIELD_BCC,
+	ENVELOPE_FIELD_IN_REPLY_TO,
+	ENVELOPE_FIELD_MESSAGE_ID,
+
+	ENVELOPE_FIELD_UNKNOWN
+};
+
+static enum envelope_field
+envelope_get_field(const char *name)
+{
+	switch (*name) {
+	case 'B':
+	case 'b':
+		if (strcasecmp(name, "Bcc") == 0)
+			return ENVELOPE_FIELD_BCC;
+		break;
+	case 'C':
+	case 'c':
+		if (strcasecmp(name, "Cc") == 0)
+			return ENVELOPE_FIELD_CC;
+		break;
+	case 'D':
+	case 'd':
+		if (strcasecmp(name, "Date") == 0)
+			return ENVELOPE_FIELD_DATE;
+		break;
+	case 'F':
+	case 'f':
+		if (strcasecmp(name, "From") == 0)
+			return ENVELOPE_FIELD_FROM;
+		break;
+	case 'I':
+	case 'i':
+		if (strcasecmp(name, "In-reply-to") == 0)
+			return ENVELOPE_FIELD_IN_REPLY_TO;
+		break;
+	case 'M':
+	case 'm':
+		if (strcasecmp(name, "Message-id") == 0)
+			return ENVELOPE_FIELD_MESSAGE_ID;
+		break;
+	case 'R':
+	case 'r':
+		if (strcasecmp(name, "Reply-to") == 0)
+			return ENVELOPE_FIELD_REPLY_TO;
+		break;
+	case 'S':
+	case 's':
+		if (strcasecmp(name, "Subject") == 0)
+			return ENVELOPE_FIELD_SUBJECT;
+		if (strcasecmp(name, "Sender") == 0)
+			return ENVELOPE_FIELD_SENDER;
+		break;
+	case 'T':
+	case 't':
+		if (strcasecmp(name, "To") == 0)
+			return ENVELOPE_FIELD_TO;
+		break;
+	}
+
+	return ENVELOPE_FIELD_UNKNOWN;
+}
+
+void message_part_envelope_parse_from_header(pool_t pool,
+	struct message_part_envelope_data **data,
+	struct message_header_line *hdr)
+{
+	struct message_part_envelope_data *d;
+	enum envelope_field field;
+	struct message_address **addr_p;
+	const char **str_p;
+
+	if (*data == NULL) {
+		*data = p_new(pool, struct message_part_envelope_data, 1);
+	}
+
+	if (hdr == NULL)
+		return;
+	field = envelope_get_field(hdr->name);
+	if (field == ENVELOPE_FIELD_UNKNOWN)
+		return;
+
+	if (hdr->continues) {
+		/* wait for full value */
+		hdr->use_full_value = TRUE;
+		return;
+	}
+
+	d = *data;
+	addr_p = NULL; str_p = NULL;
+	switch (field) {
+	case ENVELOPE_FIELD_DATE:
+		str_p = &d->date;
+		break;
+	case ENVELOPE_FIELD_SUBJECT:
+		str_p = &d->subject;
+		break;
+	case ENVELOPE_FIELD_MESSAGE_ID:
+		str_p = &d->message_id;
+		break;
+	case ENVELOPE_FIELD_IN_REPLY_TO:
+		str_p = &d->in_reply_to;
+		break;
+
+	case ENVELOPE_FIELD_CC:
+		addr_p = &d->cc;
+		break;
+	case ENVELOPE_FIELD_BCC:
+		addr_p = &d->bcc;
+		break;
+	case ENVELOPE_FIELD_FROM:
+		addr_p = &d->from;
+		break;
+	case ENVELOPE_FIELD_SENDER:
+		addr_p = &d->sender;
+		break;
+	case ENVELOPE_FIELD_TO:
+		addr_p = &d->to;
+		break;
+	case ENVELOPE_FIELD_REPLY_TO:
+		addr_p = &d->reply_to;
+		break;
+	case ENVELOPE_FIELD_UNKNOWN:
+		i_unreached();
+	}
+
+	if (addr_p != NULL) {
+		*addr_p = message_address_parse(pool, hdr->full_value,
+						hdr->full_value_len,
+						UINT_MAX, TRUE);
+	} else if (str_p != NULL) {
+		*str_p = p_strndup(pool,
+			hdr->full_value, hdr->full_value_len);
+	}
+}
+
+/* Message part data */
+
+static void
+parse_mime_parameters(struct rfc822_parser_context *parser,
+	pool_t pool, const struct message_part_param **params_r,
+	unsigned int *params_count_r)
+{
+	const char *const *results;
+	struct message_part_param *params;
+	unsigned int params_count, i;
+
+	rfc2231_parse(parser, &results);
+
+	params_count = str_array_length(results);
+	i_assert((params_count % 2) == 0);
+	params_count /= 2;
+
+	if (params_count > 0) {
+		params = p_new(pool, struct message_part_param, params_count);
+		for (i = 0; i < params_count; i++) {
+			params[i].name = p_strdup(pool, results[i*2+0]);
+			params[i].value = p_strdup(pool, results[i*2+1]);
+		}
+		*params_r = params;
+	}
+
+	*params_count_r = params_count;
+}
+
+static void
+parse_content_type(struct message_part_data *data,
+	pool_t pool, struct message_header_line *hdr)
+{
+	struct rfc822_parser_context parser;
+	string_t *str;
+	const char *value;
+	unsigned int i;
+	int ret;
+
+	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
+	rfc822_skip_lwsp(&parser);
+
+	str = t_str_new(256);
+	ret = rfc822_parse_content_type(&parser, str);
+
+	/* Save content type and subtype */
+	value = str_c(str);
+	for (i = 0; value[i] != '\0'; i++) {
+		if (value[i] == '/') {
+			data->content_subtype = p_strdup(pool, value + i+1);
+			break;
+		}
+	}
+	str_truncate(str, i);
+	data->content_type = p_strdup(pool, str_c(str));
+
+	if (ret < 0) {
+		/* Content-Type is broken, but we wanted to get it as well as
+		   we could. Don't try to read the parameters anymore though.
+
+		   We don't completely ignore a broken Content-Type, because
+		   then it would be written as text/plain. This would cause a
+		   mismatch with the message_part's MESSAGE_PART_FLAG_TEXT. */
+		return;
+	}
+
+	parse_mime_parameters(&parser, pool,
+		&data->content_type_params,
+		&data->content_type_params_count);
+}
+
+static void
+parse_content_transfer_encoding(struct message_part_data *data,
+	pool_t pool, struct message_header_line *hdr)
+{
+	struct rfc822_parser_context parser;
+	string_t *str;
+
+	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
+	rfc822_skip_lwsp(&parser);
+
+	str = t_str_new(256);
+	if (rfc822_parse_mime_token(&parser, str) >= 0 &&
+	    rfc822_skip_lwsp(&parser) == 0 && str_len(str) > 0) {
+		data->content_transfer_encoding =
+			p_strdup(pool, str_c(str));
+	}
+}
+
+static void
+parse_content_disposition(struct message_part_data *data,
+	pool_t pool, struct message_header_line *hdr)
+{
+	struct rfc822_parser_context parser;
+	string_t *str;
+
+	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
+	rfc822_skip_lwsp(&parser);
+
+	str = t_str_new(256);
+	if (rfc822_parse_mime_token(&parser, str) < 0)
+		return;
+	data->content_disposition = p_strdup(pool, str_c(str));
+
+	parse_mime_parameters(&parser, pool,
+		&data->content_disposition_params,
+		&data->content_disposition_params_count);
+}
+
+static void
+parse_content_language(struct message_part_data *data,
+	pool_t pool, const unsigned char *value, size_t value_len)
+{
+	struct rfc822_parser_context parser;
+	ARRAY_TYPE(const_string) langs;
+	string_t *str;
+
+	/* Language-Header = "Content-Language" ":" 1#Language-tag
+	   Language-Tag = Primary-tag *( "-" Subtag )
+	   Primary-tag = 1*8ALPHA
+	   Subtag = 1*8ALPHA */
+
+	rfc822_parser_init(&parser, value, value_len, NULL);
+
+	t_array_init(&langs, 16);
+	str = t_str_new(128);
+
+	rfc822_skip_lwsp(&parser);
+	while (rfc822_parse_atom(&parser, str) >= 0) {
+		const char *lang = p_strdup(pool, str_c(str));
+
+		array_append(&langs, &lang, 1);
+		str_truncate(str, 0);
+
+		if (parser.data == parser.end || *parser.data != ',')
+			break;
+		parser.data++;
+		rfc822_skip_lwsp(&parser);
+	}
+
+	if (array_count(&langs) > 0) {
+		array_append_zero(&langs);
+		data->content_language =
+			p_strarray_dup(pool, array_idx(&langs, 0));
+	}
+}
+
+static void
+parse_content_header(struct message_part_data *data,
+	pool_t pool, struct message_header_line *hdr)
+{
+	const char *name = hdr->name + strlen("Content-");
+	const char *value;
+
+	if (hdr->continues) {
+		hdr->use_full_value = TRUE;
+		return;
+	}
+
+	value = t_strndup(hdr->full_value, hdr->full_value_len);
+
+	switch (*name) {
+	case 'i':
+	case 'I':
+		if (strcasecmp(name, "ID") == 0 && data->content_id == NULL)
+			data->content_id = p_strdup(pool, value);
+		break;
+
+	case 'm':
+	case 'M':
+		if (strcasecmp(name, "MD5") == 0 && data->content_md5 == NULL)
+			data->content_md5 = p_strdup(pool, value);
+		break;
+
+	case 't':
+	case 'T':
+		if (strcasecmp(name, "Type") == 0 && data->content_type == NULL)
+			parse_content_type(data, pool, hdr);
+		else if (strcasecmp(name, "Transfer-Encoding") == 0 &&
+			 data->content_transfer_encoding == NULL)
+			parse_content_transfer_encoding(data, pool, hdr);
+		break;
+
+	case 'l':
+	case 'L':
+		if (strcasecmp(name, "Language") == 0 &&
+		    data->content_language == NULL) {
+			parse_content_language(data, pool,
+				hdr->full_value, hdr->full_value_len);
+		} else if (strcasecmp(name, "Location") == 0 &&
+			   data->content_location == NULL) {
+			data->content_location = p_strdup(pool, value);
+		}
+		break;
+
+	case 'd':
+	case 'D':
+		if (strcasecmp(name, "Description") == 0 &&
+		    data->content_description == NULL)
+			data->content_description = p_strdup(pool, value);
+		else if (strcasecmp(name, "Disposition") == 0 &&
+			 data->content_disposition_params == NULL)
+			parse_content_disposition(data, pool, hdr);
+		break;
+	}
+}
+
+void message_part_data_parse_from_header(pool_t pool,
+	struct message_part *part,
+	struct message_header_line *hdr)
+{
+	struct message_part_data *part_data;
+	struct message_part_envelope_data *envelope;
+	bool parent_rfc822;
+
+	if (hdr == NULL) {
+		if (part->data == NULL) {
+			/* no Content-* headers. add an empty context
+			   structure anyway. */
+			part->data = part_data =
+				p_new(pool, struct message_part_data, 1);
+		} else if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
+			/* If there was no Mime-Version, forget all
+			   the Content-stuff */
+			part_data = part->data;
+			envelope = part_data->envelope;
+
+			i_zero(part_data);
+			part_data->envelope = envelope;
+		}
+		return;
+	}
+
+	if (hdr->eoh)
+		return;
+
+	parent_rfc822 = part->parent != NULL &&
+		(part->parent->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0;
+	if (!parent_rfc822 && strncasecmp(hdr->name, "Content-", 8) != 0)
+		return;
+
+	if (part->data == NULL) {
+		/* initialize message part data */
+		part->data = part_data =
+			p_new(pool, struct message_part_data, 1);
+	}
+	part_data = part->data;
+
+	if (strncasecmp(hdr->name, "Content-", 8) == 0) {
+		T_BEGIN {
+			parse_content_header(part_data, pool, hdr);
+		} T_END;
+	}
+
+	if (parent_rfc822) {
+		/* message/rfc822, we need the envelope */
+		message_part_envelope_parse_from_header(pool, &part_data->envelope, hdr);
+	}
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-part-data.h	Sun Oct 23 20:05:33 2016 +0200
@@ -0,0 +1,64 @@
+#ifndef MESSAGE_PART_DATA_H
+#define MESSAGE_PART_DATA_H
+
+#include "message-part.h"
+
+#define MESSAGE_PART_DEFAULT_CHARSET "us-ascii"
+
+struct message_header_line;
+
+struct message_part_param {
+	const char *name;
+	const char *value;
+};
+
+struct message_part_envelope_data {
+	const char *date, *subject;
+	struct message_address *from, *sender, *reply_to;
+	struct message_address *to, *cc, *bcc;
+
+	const char *in_reply_to, *message_id;
+};
+
+struct message_part_data {
+	const char *content_type, *content_subtype;
+	const struct message_part_param *content_type_params;
+	unsigned int content_type_params_count;
+
+	const char *content_transfer_encoding;
+	const char *content_id;
+	const char *content_description;
+	const char *content_disposition;
+	const struct message_part_param *content_disposition_params;
+	unsigned int content_disposition_params_count;
+	const char *content_md5;
+	const char *const *content_language;
+	const char *content_location;
+
+	struct message_part_envelope_data *envelope;
+};
+
+/*
+ *
+ */
+
+/* Returns TRUE if this message part has content-type "text/plain",
+   chaset "us-ascii" and content-tranfer-encoding "7bit" */
+bool message_part_data_is_plain_7bit(const struct message_part *part)
+	ATTR_PURE;
+
+/*
+ * Header parsing
+ */
+
+/* Update envelope data based from given header field */
+void message_part_envelope_parse_from_header(pool_t pool,
+	struct message_part_envelope_data **_data,
+	struct message_header_line *hdr);
+
+/* Parse a single header. Note that this modifies part->context. */
+void message_part_data_parse_from_header(pool_t pool,
+	struct message_part *part,
+	struct message_header_line *hdr);
+
+#endif
\ No newline at end of file
--- a/src/lib-storage/index/index-mail-headers.c	Sun Oct 23 19:47:12 2016 +0200
+++ b/src/lib-storage/index/index-mail-headers.c	Sun Oct 23 20:05:33 2016 +0200
@@ -6,6 +6,7 @@
 #include "buffer.h"
 #include "str.h"
 #include "message-date.h"
+#include "message-part-data.h"
 #include "message-parser.h"
 #include "message-header-decode.h"
 #include "istream-tee.h"
--- a/src/lib-storage/index/index-mail.c	Sun Oct 23 19:47:12 2016 +0200
+++ b/src/lib-storage/index/index-mail.c	Sun Oct 23 20:05:33 2016 +0200
@@ -8,6 +8,7 @@
 #include "hex-binary.h"
 #include "str.h"
 #include "message-date.h"
+#include "message-part-data.h"
 #include "message-part-serialize.h"
 #include "message-parser.h"
 #include "message-snippet.h"