view src/lib-mail/message-part-data.c @ 21634:e2071511ef6d

lib-imap: imap-envelope: Moved imap_envelope_headers to lib-mail/message-part-data as message_part_envelope_headers.
author Stephan Bosch <stephan.bosch@dovecot.fi>
date Sun, 23 Oct 2016 22:29:49 +0200
parents 3a88d30ad000
children 30aacb0df12f
line wrap: on
line source

/* Copyright (c) 2014-2017 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "str.h"
#include "array.h"
#include "rfc822-parser.h"
#include "rfc2231-parser.h"
#include "message-address.h"
#include "message-header-parser.h"

#include "message-part-data.h"

const char *message_part_envelope_headers[] = {
	"Date", "Subject", "From", "Sender", "Reply-To",
	"To", "Cc", "Bcc", "In-Reply-To", "Message-ID",
	NULL
};

/*
 *
 */

bool message_part_data_is_plain_7bit(const struct message_part *part)
{
	const struct message_part_data *data = part->data;

	i_assert(part->parent == NULL);

	/* if content-type is text/xxx we don't have to check any
	   multipart stuff */
	if ((part->flags & MESSAGE_PART_FLAG_TEXT) == 0)
		return FALSE;
	if (part->next != NULL || part->children != NULL)
		return FALSE; /* shouldn't happen normally.. */

	/* must be text/plain */
	if (data->content_subtype != NULL &&
	    strcasecmp(data->content_subtype, "plain") != 0)
		return FALSE;

	/* only allowed parameter is charset=us-ascii, which is also default */
	if (data->content_type_params_count > 0 &&
	    (strcasecmp(data->content_type_params[0].name, "charset") != 0 ||
	     strcasecmp(data->content_type_params[0].value,
				MESSAGE_PART_DEFAULT_CHARSET) != 0))
		return FALSE;

	if (data->content_id != NULL ||
	    data->content_description != NULL)
		return FALSE;

	if (data->content_transfer_encoding != NULL &&
	    strcasecmp(data->content_transfer_encoding, "7bit") != 0)
		return FALSE;

	/* BODYSTRUCTURE checks: */
	if (data->content_md5 != NULL ||
	    data->content_disposition != NULL ||
	    data->content_language != NULL ||
	    data->content_location != NULL)
		return FALSE;

	return TRUE;
}

/*
 * Header parsing
 */

/* Message part envelope */

enum envelope_field {
	ENVELOPE_FIELD_DATE = 0,
	ENVELOPE_FIELD_SUBJECT,
	ENVELOPE_FIELD_FROM,
	ENVELOPE_FIELD_SENDER,
	ENVELOPE_FIELD_REPLY_TO,
	ENVELOPE_FIELD_TO,
	ENVELOPE_FIELD_CC,
	ENVELOPE_FIELD_BCC,
	ENVELOPE_FIELD_IN_REPLY_TO,
	ENVELOPE_FIELD_MESSAGE_ID,

	ENVELOPE_FIELD_UNKNOWN
};

static enum envelope_field
envelope_get_field(const char *name)
{
	switch (*name) {
	case 'B':
	case 'b':
		if (strcasecmp(name, "Bcc") == 0)
			return ENVELOPE_FIELD_BCC;
		break;
	case 'C':
	case 'c':
		if (strcasecmp(name, "Cc") == 0)
			return ENVELOPE_FIELD_CC;
		break;
	case 'D':
	case 'd':
		if (strcasecmp(name, "Date") == 0)
			return ENVELOPE_FIELD_DATE;
		break;
	case 'F':
	case 'f':
		if (strcasecmp(name, "From") == 0)
			return ENVELOPE_FIELD_FROM;
		break;
	case 'I':
	case 'i':
		if (strcasecmp(name, "In-reply-to") == 0)
			return ENVELOPE_FIELD_IN_REPLY_TO;
		break;
	case 'M':
	case 'm':
		if (strcasecmp(name, "Message-id") == 0)
			return ENVELOPE_FIELD_MESSAGE_ID;
		break;
	case 'R':
	case 'r':
		if (strcasecmp(name, "Reply-to") == 0)
			return ENVELOPE_FIELD_REPLY_TO;
		break;
	case 'S':
	case 's':
		if (strcasecmp(name, "Subject") == 0)
			return ENVELOPE_FIELD_SUBJECT;
		if (strcasecmp(name, "Sender") == 0)
			return ENVELOPE_FIELD_SENDER;
		break;
	case 'T':
	case 't':
		if (strcasecmp(name, "To") == 0)
			return ENVELOPE_FIELD_TO;
		break;
	}

	return ENVELOPE_FIELD_UNKNOWN;
}

void message_part_envelope_parse_from_header(pool_t pool,
	struct message_part_envelope_data **data,
	struct message_header_line *hdr)
{
	struct message_part_envelope_data *d;
	enum envelope_field field;
	struct message_address **addr_p;
	const char **str_p;

	if (*data == NULL) {
		*data = p_new(pool, struct message_part_envelope_data, 1);
	}

	if (hdr == NULL)
		return;
	field = envelope_get_field(hdr->name);
	if (field == ENVELOPE_FIELD_UNKNOWN)
		return;

	if (hdr->continues) {
		/* wait for full value */
		hdr->use_full_value = TRUE;
		return;
	}

	d = *data;
	addr_p = NULL; str_p = NULL;
	switch (field) {
	case ENVELOPE_FIELD_DATE:
		str_p = &d->date;
		break;
	case ENVELOPE_FIELD_SUBJECT:
		str_p = &d->subject;
		break;
	case ENVELOPE_FIELD_MESSAGE_ID:
		str_p = &d->message_id;
		break;
	case ENVELOPE_FIELD_IN_REPLY_TO:
		str_p = &d->in_reply_to;
		break;

	case ENVELOPE_FIELD_CC:
		addr_p = &d->cc;
		break;
	case ENVELOPE_FIELD_BCC:
		addr_p = &d->bcc;
		break;
	case ENVELOPE_FIELD_FROM:
		addr_p = &d->from;
		break;
	case ENVELOPE_FIELD_SENDER:
		addr_p = &d->sender;
		break;
	case ENVELOPE_FIELD_TO:
		addr_p = &d->to;
		break;
	case ENVELOPE_FIELD_REPLY_TO:
		addr_p = &d->reply_to;
		break;
	case ENVELOPE_FIELD_UNKNOWN:
		i_unreached();
	}

	if (addr_p != NULL) {
		*addr_p = message_address_parse(pool, hdr->full_value,
						hdr->full_value_len,
						UINT_MAX, TRUE);
	} else if (str_p != NULL) {
		*str_p = p_strndup(pool,
			hdr->full_value, hdr->full_value_len);
	}
}

/* Message part data */

static void
parse_mime_parameters(struct rfc822_parser_context *parser,
	pool_t pool, const struct message_part_param **params_r,
	unsigned int *params_count_r)
{
	const char *const *results;
	struct message_part_param *params;
	unsigned int params_count, i;

	rfc2231_parse(parser, &results);

	params_count = str_array_length(results);
	i_assert((params_count % 2) == 0);
	params_count /= 2;

	if (params_count > 0) {
		params = p_new(pool, struct message_part_param, params_count);
		for (i = 0; i < params_count; i++) {
			params[i].name = p_strdup(pool, results[i*2+0]);
			params[i].value = p_strdup(pool, results[i*2+1]);
		}
		*params_r = params;
	}

	*params_count_r = params_count;
}

static void
parse_content_type(struct message_part_data *data,
	pool_t pool, struct message_header_line *hdr)
{
	struct rfc822_parser_context parser;
	string_t *str;
	const char *value;
	unsigned int i;
	int ret;

	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
	rfc822_skip_lwsp(&parser);

	str = t_str_new(256);
	ret = rfc822_parse_content_type(&parser, str);

	/* Save content type and subtype */
	value = str_c(str);
	for (i = 0; value[i] != '\0'; i++) {
		if (value[i] == '/') {
			data->content_subtype = p_strdup(pool, value + i+1);
			break;
		}
	}
	str_truncate(str, i);
	data->content_type = p_strdup(pool, str_c(str));

	if (ret < 0) {
		/* Content-Type is broken, but we wanted to get it as well as
		   we could. Don't try to read the parameters anymore though.

		   We don't completely ignore a broken Content-Type, because
		   then it would be written as text/plain. This would cause a
		   mismatch with the message_part's MESSAGE_PART_FLAG_TEXT. */
		return;
	}

	parse_mime_parameters(&parser, pool,
		&data->content_type_params,
		&data->content_type_params_count);
}

static void
parse_content_transfer_encoding(struct message_part_data *data,
	pool_t pool, struct message_header_line *hdr)
{
	struct rfc822_parser_context parser;
	string_t *str;

	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
	rfc822_skip_lwsp(&parser);

	str = t_str_new(256);
	if (rfc822_parse_mime_token(&parser, str) >= 0 &&
	    rfc822_skip_lwsp(&parser) == 0 && str_len(str) > 0) {
		data->content_transfer_encoding =
			p_strdup(pool, str_c(str));
	}
}

static void
parse_content_disposition(struct message_part_data *data,
	pool_t pool, struct message_header_line *hdr)
{
	struct rfc822_parser_context parser;
	string_t *str;

	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
	rfc822_skip_lwsp(&parser);

	str = t_str_new(256);
	if (rfc822_parse_mime_token(&parser, str) < 0)
		return;
	data->content_disposition = p_strdup(pool, str_c(str));

	parse_mime_parameters(&parser, pool,
		&data->content_disposition_params,
		&data->content_disposition_params_count);
}

static void
parse_content_language(struct message_part_data *data,
	pool_t pool, const unsigned char *value, size_t value_len)
{
	struct rfc822_parser_context parser;
	ARRAY_TYPE(const_string) langs;
	string_t *str;

	/* Language-Header = "Content-Language" ":" 1#Language-tag
	   Language-Tag = Primary-tag *( "-" Subtag )
	   Primary-tag = 1*8ALPHA
	   Subtag = 1*8ALPHA */

	rfc822_parser_init(&parser, value, value_len, NULL);

	t_array_init(&langs, 16);
	str = t_str_new(128);

	rfc822_skip_lwsp(&parser);
	while (rfc822_parse_atom(&parser, str) >= 0) {
		const char *lang = p_strdup(pool, str_c(str));

		array_append(&langs, &lang, 1);
		str_truncate(str, 0);

		if (parser.data == parser.end || *parser.data != ',')
			break;
		parser.data++;
		rfc822_skip_lwsp(&parser);
	}

	if (array_count(&langs) > 0) {
		array_append_zero(&langs);
		data->content_language =
			p_strarray_dup(pool, array_idx(&langs, 0));
	}
}

static void
parse_content_header(struct message_part_data *data,
	pool_t pool, struct message_header_line *hdr)
{
	const char *name = hdr->name + strlen("Content-");
	const char *value;

	if (hdr->continues) {
		hdr->use_full_value = TRUE;
		return;
	}

	value = t_strndup(hdr->full_value, hdr->full_value_len);

	switch (*name) {
	case 'i':
	case 'I':
		if (strcasecmp(name, "ID") == 0 && data->content_id == NULL)
			data->content_id = p_strdup(pool, value);
		break;

	case 'm':
	case 'M':
		if (strcasecmp(name, "MD5") == 0 && data->content_md5 == NULL)
			data->content_md5 = p_strdup(pool, value);
		break;

	case 't':
	case 'T':
		if (strcasecmp(name, "Type") == 0 && data->content_type == NULL)
			parse_content_type(data, pool, hdr);
		else if (strcasecmp(name, "Transfer-Encoding") == 0 &&
			 data->content_transfer_encoding == NULL)
			parse_content_transfer_encoding(data, pool, hdr);
		break;

	case 'l':
	case 'L':
		if (strcasecmp(name, "Language") == 0 &&
		    data->content_language == NULL) {
			parse_content_language(data, pool,
				hdr->full_value, hdr->full_value_len);
		} else if (strcasecmp(name, "Location") == 0 &&
			   data->content_location == NULL) {
			data->content_location = p_strdup(pool, value);
		}
		break;

	case 'd':
	case 'D':
		if (strcasecmp(name, "Description") == 0 &&
		    data->content_description == NULL)
			data->content_description = p_strdup(pool, value);
		else if (strcasecmp(name, "Disposition") == 0 &&
			 data->content_disposition_params == NULL)
			parse_content_disposition(data, pool, hdr);
		break;
	}
}

void message_part_data_parse_from_header(pool_t pool,
	struct message_part *part,
	struct message_header_line *hdr)
{
	struct message_part_data *part_data;
	struct message_part_envelope_data *envelope;
	bool parent_rfc822;

	if (hdr == NULL) {
		if (part->data == NULL) {
			/* no Content-* headers. add an empty context
			   structure anyway. */
			part->data = part_data =
				p_new(pool, struct message_part_data, 1);
		} else if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
			/* If there was no Mime-Version, forget all
			   the Content-stuff */
			part_data = part->data;
			envelope = part_data->envelope;

			i_zero(part_data);
			part_data->envelope = envelope;
		}
		return;
	}

	if (hdr->eoh)
		return;

	parent_rfc822 = part->parent != NULL &&
		(part->parent->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0;
	if (!parent_rfc822 && strncasecmp(hdr->name, "Content-", 8) != 0)
		return;

	if (part->data == NULL) {
		/* initialize message part data */
		part->data = part_data =
			p_new(pool, struct message_part_data, 1);
	}
	part_data = part->data;

	if (strncasecmp(hdr->name, "Content-", 8) == 0) {
		T_BEGIN {
			parse_content_header(part_data, pool, hdr);
		} T_END;
	}

	if (parent_rfc822) {
		/* message/rfc822, we need the envelope */
		message_part_envelope_parse_from_header(pool, &part_data->envelope, hdr);
	}
}