# HG changeset patch # User Stephan Bosch # Date 1477245933 -7200 # Node ID 3a88d30ad0008716b0b78c682296d231bca16cf9 # Parent eff89c5ea7387f086822f71e3041336d514b9e25 lib-imap: imap-bodystructure: Moved message_part_data header parsing and query functions to their own module in lib-mail. Moved code from imap-bodystructure and imap-envelope. diff -r eff89c5ea738 -r 3a88d30ad000 src/lib-imap/imap-bodystructure.c --- a/src/lib-imap/imap-bodystructure.c Sun Oct 23 19:47:12 2016 +0200 +++ b/src/lib-imap/imap-bodystructure.c Sun Oct 23 20:05:33 2016 +0200 @@ -4,6 +4,7 @@ #include "buffer.h" #include "istream.h" #include "str.h" +#include "message-part-data.h" #include "message-parser.h" #include "rfc822-parser.h" #include "rfc2231-parser.h" @@ -12,268 +13,8 @@ #include "imap-envelope.h" #include "imap-bodystructure.h" -#define DEFAULT_CHARSET "us-ascii" - -#define EMPTY_BODYSTRUCTURE \ - "(\"text\" \"plain\" (\"charset\" \""DEFAULT_CHARSET"\") NIL NIL \"7bit\" 0 0)" - -static void -parse_mime_parameters(struct rfc822_parser_context *parser, - pool_t pool, const struct message_part_param **params_r, - unsigned int *params_count_r) -{ - const char *const *results; - struct message_part_param *params; - unsigned int params_count, i; - - rfc2231_parse(parser, &results); - - params_count = str_array_length(results); - i_assert((params_count % 2) == 0); - params_count /= 2; - - if (params_count > 0) { - params = p_new(pool, struct message_part_param, params_count); - for (i = 0; i < params_count; i++) { - params[i].name = p_strdup(pool, results[i*2+0]); - params[i].value = p_strdup(pool, results[i*2+1]); - } - *params_r = params; - } - - *params_count_r = params_count; -} - -static void -parse_content_type(struct message_part_data *data, - pool_t pool, struct message_header_line *hdr) -{ - struct rfc822_parser_context parser; - string_t *str; - const char *value; - unsigned int i; - int ret; - - rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); - rfc822_skip_lwsp(&parser); - - str = t_str_new(256); - ret = rfc822_parse_content_type(&parser, str); - - /* Save content type and subtype */ - value = str_c(str); - for (i = 0; value[i] != '\0'; i++) { - if (value[i] == '/') { - data->content_subtype = p_strdup(pool, value + i+1); - break; - } - } - str_truncate(str, i); - data->content_type = p_strdup(pool, str_c(str)); - - if (ret < 0) { - /* Content-Type is broken, but we wanted to get it as well as - we could. Don't try to read the parameters anymore though. - - We don't completely ignore a broken Content-Type, because - then it would be written as text/plain. This would cause a - mismatch with the message_part's MESSAGE_PART_FLAG_TEXT. */ - return; - } - - parse_mime_parameters(&parser, pool, - &data->content_type_params, - &data->content_type_params_count); -} - -static void -parse_content_transfer_encoding(struct message_part_data *data, - pool_t pool, struct message_header_line *hdr) -{ - struct rfc822_parser_context parser; - string_t *str; - - rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); - rfc822_skip_lwsp(&parser); - - str = t_str_new(256); - if (rfc822_parse_mime_token(&parser, str) >= 0 && - rfc822_skip_lwsp(&parser) == 0 && str_len(str) > 0) { - data->content_transfer_encoding = - p_strdup(pool, str_c(str)); - } -} - -static void -parse_content_disposition(struct message_part_data *data, - pool_t pool, struct message_header_line *hdr) -{ - struct rfc822_parser_context parser; - string_t *str; - - rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); - rfc822_skip_lwsp(&parser); - - str = t_str_new(256); - if (rfc822_parse_mime_token(&parser, str) < 0) - return; - data->content_disposition = p_strdup(pool, str_c(str)); - - parse_mime_parameters(&parser, pool, - &data->content_disposition_params, - &data->content_disposition_params_count); -} - -static void -parse_content_language(struct message_part_data *data, - pool_t pool, const unsigned char *value, size_t value_len) -{ - struct rfc822_parser_context parser; - ARRAY_TYPE(const_string) langs; - string_t *str; - - /* Language-Header = "Content-Language" ":" 1#Language-tag - Language-Tag = Primary-tag *( "-" Subtag ) - Primary-tag = 1*8ALPHA - Subtag = 1*8ALPHA */ - - rfc822_parser_init(&parser, value, value_len, NULL); - - t_array_init(&langs, 16); - str = t_str_new(128); - - rfc822_skip_lwsp(&parser); - while (rfc822_parse_atom(&parser, str) >= 0) { - const char *lang = p_strdup(pool, str_c(str)); - - array_append(&langs, &lang, 1); - str_truncate(str, 0); - - if (parser.data == parser.end || *parser.data != ',') - break; - parser.data++; - rfc822_skip_lwsp(&parser); - } - - if (array_count(&langs) > 0) { - array_append_zero(&langs); - data->content_language = - p_strarray_dup(pool, array_idx(&langs, 0)); - } -} - -static void -parse_content_header(struct message_part_data *data, - pool_t pool, struct message_header_line *hdr) -{ - const char *name = hdr->name + strlen("Content-"); - const char *value; - - if (hdr->continues) { - hdr->use_full_value = TRUE; - return; - } - - value = t_strndup(hdr->full_value, hdr->full_value_len); - - switch (*name) { - case 'i': - case 'I': - if (strcasecmp(name, "ID") == 0 && data->content_id == NULL) - data->content_id = p_strdup(pool, value); - break; - - case 'm': - case 'M': - if (strcasecmp(name, "MD5") == 0 && data->content_md5 == NULL) - data->content_md5 = p_strdup(pool, value); - break; - - case 't': - case 'T': - if (strcasecmp(name, "Type") == 0 && data->content_type == NULL) - parse_content_type(data, pool, hdr); - else if (strcasecmp(name, "Transfer-Encoding") == 0 && - data->content_transfer_encoding == NULL) - parse_content_transfer_encoding(data, pool, hdr); - break; - - case 'l': - case 'L': - if (strcasecmp(name, "Language") == 0 && - data->content_language == NULL) { - parse_content_language(data, pool, - hdr->full_value, hdr->full_value_len); - } else if (strcasecmp(name, "Location") == 0 && - data->content_location == NULL) { - data->content_location = p_strdup(pool, value); - } - break; - - case 'd': - case 'D': - if (strcasecmp(name, "Description") == 0 && - data->content_description == NULL) - data->content_description = p_strdup(pool, value); - else if (strcasecmp(name, "Disposition") == 0 && - data->content_disposition_params == NULL) - parse_content_disposition(data, pool, hdr); - break; - } -} - -void message_part_data_parse_from_header(pool_t pool, - struct message_part *part, - struct message_header_line *hdr) -{ - struct message_part_data *part_data; - struct message_part_envelope_data *envelope; - bool parent_rfc822; - - if (hdr == NULL) { - if (part->data == NULL) { - /* no Content-* headers. add an empty context - structure anyway. */ - part->data = part_data = - p_new(pool, struct message_part_data, 1); - } else if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) { - /* If there was no Mime-Version, forget all - the Content-stuff */ - part_data = part->data; - envelope = part_data->envelope; - - i_zero(part_data); - part_data->envelope = envelope; - } - return; - } - - if (hdr->eoh) - return; - - parent_rfc822 = part->parent != NULL && - (part->parent->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0; - if (!parent_rfc822 && strncasecmp(hdr->name, "Content-", 8) != 0) - return; - - if (part->data == NULL) { - /* initialize message part data */ - part->data = part_data = - p_new(pool, struct message_part_data, 1); - } - part_data = part->data; - - if (strncasecmp(hdr->name, "Content-", 8) == 0) { - T_BEGIN { - parse_content_header(part_data, pool, hdr); - } T_END; - } - - if (parent_rfc822) { - /* message/rfc822, we need the envelope */ - message_part_envelope_parse_from_header(pool, &part_data->envelope, hdr); - } -} +#define EMPTY_BODYSTRUCTURE "(\"text\" \"plain\" " \ + "(\"charset\" \""MESSAGE_PART_DEFAULT_CHARSET"\") NIL NIL \"7bit\" 0 0)" static void params_write(const struct message_part_param *params, @@ -303,7 +44,8 @@ if (default_charset && !seen_charset) { if (i > 0) str_append_c(str, ' '); - str_append(str, "\"charset\" \""DEFAULT_CHARSET"\""); + str_append(str, "\"charset\" " + "\""MESSAGE_PART_DEFAULT_CHARSET"\""); } str_append_c(str, ')'); } @@ -473,48 +215,6 @@ part_write_bodystructure_common(data, str); } -bool message_part_data_is_plain_7bit(const struct message_part *part) -{ - const struct message_part_data *data = part->data; - - i_assert(part->parent == NULL); - - /* if content-type is text/xxx we don't have to check any - multipart stuff */ - if ((part->flags & MESSAGE_PART_FLAG_TEXT) == 0) - return FALSE; - if (part->next != NULL || part->children != NULL) - return FALSE; /* shouldn't happen normally.. */ - - /* must be text/plain */ - if (data->content_subtype != NULL && - strcasecmp(data->content_subtype, "plain") != 0) - return FALSE; - - /* only allowed parameter is charset=us-ascii, which is also default */ - if (data->content_type_params_count > 0 && - (strcasecmp(data->content_type_params[0].name, "charset") != 0 || - strcasecmp(data->content_type_params[0].value, DEFAULT_CHARSET) != 0)) - return FALSE; - - if (data->content_id != NULL || - data->content_description != NULL) - return FALSE; - - if (data->content_transfer_encoding != NULL && - strcasecmp(data->content_transfer_encoding, "7bit") != 0) - return FALSE; - - /* BODYSTRUCTURE checks: */ - if (data->content_md5 != NULL || - data->content_disposition != NULL || - data->content_language != NULL || - data->content_location != NULL) - return FALSE; - - return TRUE; -} - void imap_bodystructure_write(const struct message_part *part, string_t *dest, bool extended) { diff -r eff89c5ea738 -r 3a88d30ad000 src/lib-imap/imap-bodystructure.h --- a/src/lib-imap/imap-bodystructure.h Sun Oct 23 19:47:12 2016 +0200 +++ b/src/lib-imap/imap-bodystructure.h Sun Oct 23 20:05:33 2016 +0200 @@ -1,42 +1,9 @@ #ifndef IMAP_BODYSTRUCTURE_H #define IMAP_BODYSTRUCTURE_H -struct message_part_param { - const char *name; - const char *value; -}; - -struct message_part_data { - const char *content_type, *content_subtype; - const struct message_part_param *content_type_params; - unsigned int content_type_params_count; - - const char *content_transfer_encoding; - const char *content_id; - const char *content_description; - const char *content_disposition; - const struct message_part_param *content_disposition_params; - unsigned int content_disposition_params_count; - const char *content_md5; - const char *const *content_language; - const char *content_location; - - struct message_part_envelope_data *envelope; -}; - struct message_part; struct message_header_line; -/* Parse a single header. Note that this modifies part->data. */ -void message_part_data_parse_from_header(pool_t pool, - struct message_part *part, - struct message_header_line *hdr); - -/* Returns TRUE if this message part has content-type "text/plain", - chaset "us-ascii" and content-tranfer-encoding "7bit" */ -bool message_part_data_is_plain_7bit(const struct message_part *part) - ATTR_PURE; - /* Write a BODY/BODYSTRUCTURE from given message_part. The message_part->data field must be set. part->body_size.virtual_size and .lines are also used for writing it. */ diff -r eff89c5ea738 -r 3a88d30ad000 src/lib-imap/imap-envelope.c --- a/src/lib-imap/imap-envelope.c Sun Oct 23 19:47:12 2016 +0200 +++ b/src/lib-imap/imap-envelope.c Sun Oct 23 20:05:33 2016 +0200 @@ -4,169 +4,18 @@ #include "istream.h" #include "str.h" #include "message-address.h" +#include "message-part-data.h" #include "message-parser.h" #include "imap-parser.h" #include "imap-envelope.h" #include "imap-quote.h" -struct message_part_envelope_data { - const char *date, *subject; - struct message_address *from, *sender, *reply_to; - struct message_address *to, *cc, *bcc; - - const char *in_reply_to, *message_id; -}; - -enum envelope_field { - ENVELOPE_FIELD_DATE = 0, - ENVELOPE_FIELD_SUBJECT, - ENVELOPE_FIELD_FROM, - ENVELOPE_FIELD_SENDER, - ENVELOPE_FIELD_REPLY_TO, - ENVELOPE_FIELD_TO, - ENVELOPE_FIELD_CC, - ENVELOPE_FIELD_BCC, - ENVELOPE_FIELD_IN_REPLY_TO, - ENVELOPE_FIELD_MESSAGE_ID, - - ENVELOPE_FIELD_UNKNOWN -}; - const char *imap_envelope_headers[] = { "Date", "Subject", "From", "Sender", "Reply-To", "To", "Cc", "Bcc", "In-Reply-To", "Message-ID", NULL }; -static enum envelope_field -envelope_get_field(const char *name) -{ - switch (*name) { - case 'B': - case 'b': - if (strcasecmp(name, "Bcc") == 0) - return ENVELOPE_FIELD_BCC; - break; - case 'C': - case 'c': - if (strcasecmp(name, "Cc") == 0) - return ENVELOPE_FIELD_CC; - break; - case 'D': - case 'd': - if (strcasecmp(name, "Date") == 0) - return ENVELOPE_FIELD_DATE; - break; - case 'F': - case 'f': - if (strcasecmp(name, "From") == 0) - return ENVELOPE_FIELD_FROM; - break; - case 'I': - case 'i': - if (strcasecmp(name, "In-reply-to") == 0) - return ENVELOPE_FIELD_IN_REPLY_TO; - break; - case 'M': - case 'm': - if (strcasecmp(name, "Message-id") == 0) - return ENVELOPE_FIELD_MESSAGE_ID; - break; - case 'R': - case 'r': - if (strcasecmp(name, "Reply-to") == 0) - return ENVELOPE_FIELD_REPLY_TO; - break; - case 'S': - case 's': - if (strcasecmp(name, "Subject") == 0) - return ENVELOPE_FIELD_SUBJECT; - if (strcasecmp(name, "Sender") == 0) - return ENVELOPE_FIELD_SENDER; - break; - case 'T': - case 't': - if (strcasecmp(name, "To") == 0) - return ENVELOPE_FIELD_TO; - break; - } - - return ENVELOPE_FIELD_UNKNOWN; -} - -void message_part_envelope_parse_from_header(pool_t pool, - struct message_part_envelope_data **data, - struct message_header_line *hdr) -{ - struct message_part_envelope_data *d; - enum envelope_field field; - struct message_address **addr_p; - const char **str_p; - - if (*data == NULL) { - *data = p_new(pool, struct message_part_envelope_data, 1); - } - - if (hdr == NULL) - return; - field = envelope_get_field(hdr->name); - if (field == ENVELOPE_FIELD_UNKNOWN) - return; - - if (hdr->continues) { - /* wait for full value */ - hdr->use_full_value = TRUE; - return; - } - - d = *data; - addr_p = NULL; str_p = NULL; - switch (field) { - case ENVELOPE_FIELD_DATE: - str_p = &d->date; - break; - case ENVELOPE_FIELD_SUBJECT: - str_p = &d->subject; - break; - case ENVELOPE_FIELD_MESSAGE_ID: - str_p = &d->message_id; - break; - case ENVELOPE_FIELD_IN_REPLY_TO: - str_p = &d->in_reply_to; - break; - - case ENVELOPE_FIELD_CC: - addr_p = &d->cc; - break; - case ENVELOPE_FIELD_BCC: - addr_p = &d->bcc; - break; - case ENVELOPE_FIELD_FROM: - addr_p = &d->from; - break; - case ENVELOPE_FIELD_SENDER: - addr_p = &d->sender; - break; - case ENVELOPE_FIELD_TO: - addr_p = &d->to; - break; - case ENVELOPE_FIELD_REPLY_TO: - addr_p = &d->reply_to; - break; - case ENVELOPE_FIELD_UNKNOWN: - i_unreached(); - } - - if (addr_p != NULL) { - *addr_p = message_address_parse(pool, hdr->full_value, - hdr->full_value_len, - UINT_MAX, TRUE); - } else if (str_p != NULL) { - *str_p = p_strndup(pool, - hdr->full_value, hdr->full_value_len); - } -} - static void imap_write_address(string_t *str, struct message_address *addr) { if (addr == NULL) { diff -r eff89c5ea738 -r 3a88d30ad000 src/lib-imap/imap-envelope.h --- a/src/lib-imap/imap-envelope.h Sun Oct 23 19:47:12 2016 +0200 +++ b/src/lib-imap/imap-envelope.h Sun Oct 23 20:05:33 2016 +0200 @@ -2,16 +2,10 @@ #define IMAP_ENVELOPE_H struct imap_arg; -struct message_header_line; struct message_part_envelope_data; extern const char *imap_envelope_headers[]; -/* Update envelope data based from given header field */ -void message_part_envelope_parse_from_header(pool_t pool, - struct message_part_envelope_data **_data, - struct message_header_line *hdr); - /* Write envelope to given string */ void imap_envelope_write_part_data(struct message_part_envelope_data *data, string_t *str); diff -r eff89c5ea738 -r 3a88d30ad000 src/lib-imap/test-imap-bodystructure.c --- a/src/lib-imap/test-imap-bodystructure.c Sun Oct 23 19:47:12 2016 +0200 +++ b/src/lib-imap/test-imap-bodystructure.c Sun Oct 23 20:05:33 2016 +0200 @@ -3,6 +3,7 @@ #include "lib.h" #include "istream.h" #include "str.h" +#include "message-part-data.h" #include "message-parser.h" #include "imap-bodystructure.h" #include "test-common.h" diff -r eff89c5ea738 -r 3a88d30ad000 src/lib-mail/Makefile.am --- a/src/lib-mail/Makefile.am Sun Oct 23 19:47:12 2016 +0200 +++ b/src/lib-mail/Makefile.am Sun Oct 23 20:05:33 2016 +0200 @@ -27,6 +27,7 @@ message-id.c \ message-parser.c \ message-part.c \ + message-part-data.c \ message-part-serialize.c \ message-search.c \ message-size.c \ @@ -63,6 +64,7 @@ message-id.h \ message-parser.h \ message-part.h \ + message-part-data.h \ message-part-serialize.h \ message-search.h \ message-size.h \ diff -r eff89c5ea738 -r 3a88d30ad000 src/lib-mail/message-part-data.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-mail/message-part-data.c Sun Oct 23 20:05:33 2016 +0200 @@ -0,0 +1,468 @@ +/* Copyright (c) 2014-2017 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "array.h" +#include "rfc822-parser.h" +#include "rfc2231-parser.h" +#include "message-address.h" +#include "message-header-parser.h" + +#include "message-part-data.h" + +/* + * + */ + +bool message_part_data_is_plain_7bit(const struct message_part *part) +{ + const struct message_part_data *data = part->data; + + i_assert(part->parent == NULL); + + /* if content-type is text/xxx we don't have to check any + multipart stuff */ + if ((part->flags & MESSAGE_PART_FLAG_TEXT) == 0) + return FALSE; + if (part->next != NULL || part->children != NULL) + return FALSE; /* shouldn't happen normally.. */ + + /* must be text/plain */ + if (data->content_subtype != NULL && + strcasecmp(data->content_subtype, "plain") != 0) + return FALSE; + + /* only allowed parameter is charset=us-ascii, which is also default */ + if (data->content_type_params_count > 0 && + (strcasecmp(data->content_type_params[0].name, "charset") != 0 || + strcasecmp(data->content_type_params[0].value, + MESSAGE_PART_DEFAULT_CHARSET) != 0)) + return FALSE; + + if (data->content_id != NULL || + data->content_description != NULL) + return FALSE; + + if (data->content_transfer_encoding != NULL && + strcasecmp(data->content_transfer_encoding, "7bit") != 0) + return FALSE; + + /* BODYSTRUCTURE checks: */ + if (data->content_md5 != NULL || + data->content_disposition != NULL || + data->content_language != NULL || + data->content_location != NULL) + return FALSE; + + return TRUE; +} + +/* + * Header parsing + */ + +/* Message part envelope */ + +enum envelope_field { + ENVELOPE_FIELD_DATE = 0, + ENVELOPE_FIELD_SUBJECT, + ENVELOPE_FIELD_FROM, + ENVELOPE_FIELD_SENDER, + ENVELOPE_FIELD_REPLY_TO, + ENVELOPE_FIELD_TO, + ENVELOPE_FIELD_CC, + ENVELOPE_FIELD_BCC, + ENVELOPE_FIELD_IN_REPLY_TO, + ENVELOPE_FIELD_MESSAGE_ID, + + ENVELOPE_FIELD_UNKNOWN +}; + +static enum envelope_field +envelope_get_field(const char *name) +{ + switch (*name) { + case 'B': + case 'b': + if (strcasecmp(name, "Bcc") == 0) + return ENVELOPE_FIELD_BCC; + break; + case 'C': + case 'c': + if (strcasecmp(name, "Cc") == 0) + return ENVELOPE_FIELD_CC; + break; + case 'D': + case 'd': + if (strcasecmp(name, "Date") == 0) + return ENVELOPE_FIELD_DATE; + break; + case 'F': + case 'f': + if (strcasecmp(name, "From") == 0) + return ENVELOPE_FIELD_FROM; + break; + case 'I': + case 'i': + if (strcasecmp(name, "In-reply-to") == 0) + return ENVELOPE_FIELD_IN_REPLY_TO; + break; + case 'M': + case 'm': + if (strcasecmp(name, "Message-id") == 0) + return ENVELOPE_FIELD_MESSAGE_ID; + break; + case 'R': + case 'r': + if (strcasecmp(name, "Reply-to") == 0) + return ENVELOPE_FIELD_REPLY_TO; + break; + case 'S': + case 's': + if (strcasecmp(name, "Subject") == 0) + return ENVELOPE_FIELD_SUBJECT; + if (strcasecmp(name, "Sender") == 0) + return ENVELOPE_FIELD_SENDER; + break; + case 'T': + case 't': + if (strcasecmp(name, "To") == 0) + return ENVELOPE_FIELD_TO; + break; + } + + return ENVELOPE_FIELD_UNKNOWN; +} + +void message_part_envelope_parse_from_header(pool_t pool, + struct message_part_envelope_data **data, + struct message_header_line *hdr) +{ + struct message_part_envelope_data *d; + enum envelope_field field; + struct message_address **addr_p; + const char **str_p; + + if (*data == NULL) { + *data = p_new(pool, struct message_part_envelope_data, 1); + } + + if (hdr == NULL) + return; + field = envelope_get_field(hdr->name); + if (field == ENVELOPE_FIELD_UNKNOWN) + return; + + if (hdr->continues) { + /* wait for full value */ + hdr->use_full_value = TRUE; + return; + } + + d = *data; + addr_p = NULL; str_p = NULL; + switch (field) { + case ENVELOPE_FIELD_DATE: + str_p = &d->date; + break; + case ENVELOPE_FIELD_SUBJECT: + str_p = &d->subject; + break; + case ENVELOPE_FIELD_MESSAGE_ID: + str_p = &d->message_id; + break; + case ENVELOPE_FIELD_IN_REPLY_TO: + str_p = &d->in_reply_to; + break; + + case ENVELOPE_FIELD_CC: + addr_p = &d->cc; + break; + case ENVELOPE_FIELD_BCC: + addr_p = &d->bcc; + break; + case ENVELOPE_FIELD_FROM: + addr_p = &d->from; + break; + case ENVELOPE_FIELD_SENDER: + addr_p = &d->sender; + break; + case ENVELOPE_FIELD_TO: + addr_p = &d->to; + break; + case ENVELOPE_FIELD_REPLY_TO: + addr_p = &d->reply_to; + break; + case ENVELOPE_FIELD_UNKNOWN: + i_unreached(); + } + + if (addr_p != NULL) { + *addr_p = message_address_parse(pool, hdr->full_value, + hdr->full_value_len, + UINT_MAX, TRUE); + } else if (str_p != NULL) { + *str_p = p_strndup(pool, + hdr->full_value, hdr->full_value_len); + } +} + +/* Message part data */ + +static void +parse_mime_parameters(struct rfc822_parser_context *parser, + pool_t pool, const struct message_part_param **params_r, + unsigned int *params_count_r) +{ + const char *const *results; + struct message_part_param *params; + unsigned int params_count, i; + + rfc2231_parse(parser, &results); + + params_count = str_array_length(results); + i_assert((params_count % 2) == 0); + params_count /= 2; + + if (params_count > 0) { + params = p_new(pool, struct message_part_param, params_count); + for (i = 0; i < params_count; i++) { + params[i].name = p_strdup(pool, results[i*2+0]); + params[i].value = p_strdup(pool, results[i*2+1]); + } + *params_r = params; + } + + *params_count_r = params_count; +} + +static void +parse_content_type(struct message_part_data *data, + pool_t pool, struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + string_t *str; + const char *value; + unsigned int i; + int ret; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + + str = t_str_new(256); + ret = rfc822_parse_content_type(&parser, str); + + /* Save content type and subtype */ + value = str_c(str); + for (i = 0; value[i] != '\0'; i++) { + if (value[i] == '/') { + data->content_subtype = p_strdup(pool, value + i+1); + break; + } + } + str_truncate(str, i); + data->content_type = p_strdup(pool, str_c(str)); + + if (ret < 0) { + /* Content-Type is broken, but we wanted to get it as well as + we could. Don't try to read the parameters anymore though. + + We don't completely ignore a broken Content-Type, because + then it would be written as text/plain. This would cause a + mismatch with the message_part's MESSAGE_PART_FLAG_TEXT. */ + return; + } + + parse_mime_parameters(&parser, pool, + &data->content_type_params, + &data->content_type_params_count); +} + +static void +parse_content_transfer_encoding(struct message_part_data *data, + pool_t pool, struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + string_t *str; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + + str = t_str_new(256); + if (rfc822_parse_mime_token(&parser, str) >= 0 && + rfc822_skip_lwsp(&parser) == 0 && str_len(str) > 0) { + data->content_transfer_encoding = + p_strdup(pool, str_c(str)); + } +} + +static void +parse_content_disposition(struct message_part_data *data, + pool_t pool, struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + string_t *str; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + rfc822_skip_lwsp(&parser); + + str = t_str_new(256); + if (rfc822_parse_mime_token(&parser, str) < 0) + return; + data->content_disposition = p_strdup(pool, str_c(str)); + + parse_mime_parameters(&parser, pool, + &data->content_disposition_params, + &data->content_disposition_params_count); +} + +static void +parse_content_language(struct message_part_data *data, + pool_t pool, const unsigned char *value, size_t value_len) +{ + struct rfc822_parser_context parser; + ARRAY_TYPE(const_string) langs; + string_t *str; + + /* Language-Header = "Content-Language" ":" 1#Language-tag + Language-Tag = Primary-tag *( "-" Subtag ) + Primary-tag = 1*8ALPHA + Subtag = 1*8ALPHA */ + + rfc822_parser_init(&parser, value, value_len, NULL); + + t_array_init(&langs, 16); + str = t_str_new(128); + + rfc822_skip_lwsp(&parser); + while (rfc822_parse_atom(&parser, str) >= 0) { + const char *lang = p_strdup(pool, str_c(str)); + + array_append(&langs, &lang, 1); + str_truncate(str, 0); + + if (parser.data == parser.end || *parser.data != ',') + break; + parser.data++; + rfc822_skip_lwsp(&parser); + } + + if (array_count(&langs) > 0) { + array_append_zero(&langs); + data->content_language = + p_strarray_dup(pool, array_idx(&langs, 0)); + } +} + +static void +parse_content_header(struct message_part_data *data, + pool_t pool, struct message_header_line *hdr) +{ + const char *name = hdr->name + strlen("Content-"); + const char *value; + + if (hdr->continues) { + hdr->use_full_value = TRUE; + return; + } + + value = t_strndup(hdr->full_value, hdr->full_value_len); + + switch (*name) { + case 'i': + case 'I': + if (strcasecmp(name, "ID") == 0 && data->content_id == NULL) + data->content_id = p_strdup(pool, value); + break; + + case 'm': + case 'M': + if (strcasecmp(name, "MD5") == 0 && data->content_md5 == NULL) + data->content_md5 = p_strdup(pool, value); + break; + + case 't': + case 'T': + if (strcasecmp(name, "Type") == 0 && data->content_type == NULL) + parse_content_type(data, pool, hdr); + else if (strcasecmp(name, "Transfer-Encoding") == 0 && + data->content_transfer_encoding == NULL) + parse_content_transfer_encoding(data, pool, hdr); + break; + + case 'l': + case 'L': + if (strcasecmp(name, "Language") == 0 && + data->content_language == NULL) { + parse_content_language(data, pool, + hdr->full_value, hdr->full_value_len); + } else if (strcasecmp(name, "Location") == 0 && + data->content_location == NULL) { + data->content_location = p_strdup(pool, value); + } + break; + + case 'd': + case 'D': + if (strcasecmp(name, "Description") == 0 && + data->content_description == NULL) + data->content_description = p_strdup(pool, value); + else if (strcasecmp(name, "Disposition") == 0 && + data->content_disposition_params == NULL) + parse_content_disposition(data, pool, hdr); + break; + } +} + +void message_part_data_parse_from_header(pool_t pool, + struct message_part *part, + struct message_header_line *hdr) +{ + struct message_part_data *part_data; + struct message_part_envelope_data *envelope; + bool parent_rfc822; + + if (hdr == NULL) { + if (part->data == NULL) { + /* no Content-* headers. add an empty context + structure anyway. */ + part->data = part_data = + p_new(pool, struct message_part_data, 1); + } else if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) { + /* If there was no Mime-Version, forget all + the Content-stuff */ + part_data = part->data; + envelope = part_data->envelope; + + i_zero(part_data); + part_data->envelope = envelope; + } + return; + } + + if (hdr->eoh) + return; + + parent_rfc822 = part->parent != NULL && + (part->parent->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0; + if (!parent_rfc822 && strncasecmp(hdr->name, "Content-", 8) != 0) + return; + + if (part->data == NULL) { + /* initialize message part data */ + part->data = part_data = + p_new(pool, struct message_part_data, 1); + } + part_data = part->data; + + if (strncasecmp(hdr->name, "Content-", 8) == 0) { + T_BEGIN { + parse_content_header(part_data, pool, hdr); + } T_END; + } + + if (parent_rfc822) { + /* message/rfc822, we need the envelope */ + message_part_envelope_parse_from_header(pool, &part_data->envelope, hdr); + } +} \ No newline at end of file diff -r eff89c5ea738 -r 3a88d30ad000 src/lib-mail/message-part-data.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-mail/message-part-data.h Sun Oct 23 20:05:33 2016 +0200 @@ -0,0 +1,64 @@ +#ifndef MESSAGE_PART_DATA_H +#define MESSAGE_PART_DATA_H + +#include "message-part.h" + +#define MESSAGE_PART_DEFAULT_CHARSET "us-ascii" + +struct message_header_line; + +struct message_part_param { + const char *name; + const char *value; +}; + +struct message_part_envelope_data { + const char *date, *subject; + struct message_address *from, *sender, *reply_to; + struct message_address *to, *cc, *bcc; + + const char *in_reply_to, *message_id; +}; + +struct message_part_data { + const char *content_type, *content_subtype; + const struct message_part_param *content_type_params; + unsigned int content_type_params_count; + + const char *content_transfer_encoding; + const char *content_id; + const char *content_description; + const char *content_disposition; + const struct message_part_param *content_disposition_params; + unsigned int content_disposition_params_count; + const char *content_md5; + const char *const *content_language; + const char *content_location; + + struct message_part_envelope_data *envelope; +}; + +/* + * + */ + +/* Returns TRUE if this message part has content-type "text/plain", + chaset "us-ascii" and content-tranfer-encoding "7bit" */ +bool message_part_data_is_plain_7bit(const struct message_part *part) + ATTR_PURE; + +/* + * Header parsing + */ + +/* Update envelope data based from given header field */ +void message_part_envelope_parse_from_header(pool_t pool, + struct message_part_envelope_data **_data, + struct message_header_line *hdr); + +/* Parse a single header. Note that this modifies part->context. */ +void message_part_data_parse_from_header(pool_t pool, + struct message_part *part, + struct message_header_line *hdr); + +#endif \ No newline at end of file diff -r eff89c5ea738 -r 3a88d30ad000 src/lib-storage/index/index-mail-headers.c --- a/src/lib-storage/index/index-mail-headers.c Sun Oct 23 19:47:12 2016 +0200 +++ b/src/lib-storage/index/index-mail-headers.c Sun Oct 23 20:05:33 2016 +0200 @@ -6,6 +6,7 @@ #include "buffer.h" #include "str.h" #include "message-date.h" +#include "message-part-data.h" #include "message-parser.h" #include "message-header-decode.h" #include "istream-tee.h" diff -r eff89c5ea738 -r 3a88d30ad000 src/lib-storage/index/index-mail.c --- a/src/lib-storage/index/index-mail.c Sun Oct 23 19:47:12 2016 +0200 +++ b/src/lib-storage/index/index-mail.c Sun Oct 23 20:05:33 2016 +0200 @@ -8,6 +8,7 @@ #include "hex-binary.h" #include "str.h" #include "message-date.h" +#include "message-part-data.h" #include "message-part-serialize.h" #include "message-parser.h" #include "message-snippet.h"