Mercurial > dovecot > original-hg > dovecot-1.2
changeset 6117:9214044ce1f1 HEAD
Removed message-content-parser. Instead added rfc822_parse_content_type()
and rfc822_parse_content_param() to help parse such headers.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Fri, 20 Jul 2007 09:42:25 +0300 |
parents | 6bc2995619f3 |
children | 841b052e269e |
files | src/lib-imap/imap-bodystructure.c src/lib-mail/Makefile.am src/lib-mail/message-content-parser.c src/lib-mail/message-content-parser.h src/lib-mail/message-decoder.c src/lib-mail/message-parser.c src/lib-mail/message-search.c src/lib-mail/rfc822-parser.c src/lib-mail/rfc822-parser.h |
diffstat | 9 files changed, 271 insertions(+), 258 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-imap/imap-bodystructure.c Fri Jul 20 08:28:26 2007 +0300 +++ b/src/lib-imap/imap-bodystructure.c Fri Jul 20 09:42:25 2007 +0300 @@ -5,7 +5,6 @@ #include "istream.h" #include "str.h" #include "message-parser.h" -#include "message-content-parser.h" #include "rfc822-parser.h" #include "imap-parser.h" #include "imap-quote.h" @@ -20,7 +19,6 @@ struct message_part_body_data { pool_t pool; - string_t *str; /* temporary */ const char *content_type, *content_subtype; const char *content_type_params; const char *content_transfer_encoding; @@ -32,65 +30,111 @@ const char *content_language; struct message_part_envelope_data *envelope; - - unsigned int charset_found:1; }; -static void parse_content_type(const unsigned char *value, size_t value_len, - void *context) +static void parse_content_type(struct message_part_body_data *data, + struct message_header_line *hdr) { - struct message_part_body_data *data = context; - size_t i; + struct rfc822_parser_context parser; + const char *key, *value; + string_t *str; + unsigned int i; + bool charset_found = FALSE; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + (void)rfc822_skip_lwsp(&parser); + + str = t_str_new(256); + if (rfc822_parse_content_type(&parser, str) < 0) + return; - for (i = 0; i < value_len; i++) { - if (value[i] == '/') + /* Save content type and subtype */ + value = str_c(str); + for (i = 0; value[i] != '\0'; i++) { + if (value[i] == '/') { + data->content_subtype = + imap_quote(data->pool, str_data(str) + i + 1, + str_len(str) - (i + 1)); break; + } + } + data->content_type = + imap_quote(data->pool, str_data(str), i); + + /* parse parameters and save them */ + str_truncate(str, 0); + while (rfc822_parse_content_param(&parser, &key, &value) > 0) { + if (strcasecmp(key, "charset") == 0) + charset_found = TRUE; + + str_append_c(str, ' '); + imap_quote_append_string(str, key, TRUE); + str_append_c(str, ' '); + imap_quote_append_string(str, value, TRUE); } - if (i == value_len) - data->content_type = imap_quote(data->pool, value, value_len); - else { - data->content_type = imap_quote(data->pool, value, i); - - i++; - data->content_subtype = - imap_quote(data->pool, value+i, value_len-i); + if (!charset_found && + strcasecmp(data->content_type, "\"text\"") == 0) { + /* set a default charset */ + str_append_c(str, ' '); + str_append(str, DEFAULT_CHARSET); + } + if (str_len(str) > 0) { + data->content_type_params = + p_strdup(data->pool, str_c(str) + 1); } } -static void parse_save_params_list(const unsigned char *name, size_t name_len, - const unsigned char *value, size_t value_len, - bool value_quoted __attr_unused__, - void *context) +static void parse_content_transfer_encoding(struct message_part_body_data *data, + struct message_header_line *hdr) { - struct message_part_body_data *data = context; + struct rfc822_parser_context parser; + string_t *str; - if (str_len(data->str) != 0) - str_append_c(data->str, ' '); + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + (void)rfc822_skip_lwsp(&parser); - if (name_len == 7 && memcasecmp(name, "charset", 7) == 0) - data->charset_found = TRUE; - - imap_quote_append(data->str, name, name_len, TRUE); - str_append_c(data->str, ' '); - imap_quote_append(data->str, value, value_len, TRUE); + t_push(); + str = t_str_new(256); + if (rfc822_parse_mime_token(&parser, str) >= 0) { + data->content_transfer_encoding = + imap_quote(data->pool, str_data(str), str_len(str)); + } + t_pop(); } -static void parse_content_transfer_encoding(const unsigned char *value, - size_t value_len, void *context) +static void parse_content_disposition(struct message_part_body_data *data, + struct message_header_line *hdr) { - struct message_part_body_data *data = context; + struct rfc822_parser_context parser; + const char *key, *value; + string_t *str; + + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + (void)rfc822_skip_lwsp(&parser); - data->content_transfer_encoding = - imap_quote(data->pool, value, value_len); -} + t_push(); + str = t_str_new(256); + if (rfc822_parse_mime_token(&parser, str) < 0) { + t_pop(); + return; + } + data->content_disposition = + imap_quote(data->pool, str_data(str), str_len(str)); -static void parse_content_disposition(const unsigned char *value, - size_t value_len, void *context) -{ - struct message_part_body_data *data = context; - - data->content_disposition = imap_quote(data->pool, value, value_len); + /* parse parameters and save them */ + str_truncate(str, 0); + while (rfc822_parse_content_param(&parser, &key, &value) > 0) { + str_append_c(str, ' '); + imap_quote_append_string(str, key, TRUE); + str_append_c(str, ' '); + imap_quote_append_string(str, value, TRUE); + } + if (str_len(str) > 0) { + data->content_disposition_params = + p_strdup(data->pool, str_c(str) + 1); + } + t_pop(); } static void parse_content_language(const unsigned char *value, size_t value_len, @@ -164,27 +208,13 @@ case 't': case 'T': if (strcasecmp(name, "Type") == 0 && d->content_type == NULL) { - d->str = str_new(default_pool, 256); - message_content_parse_header(value, value_len, - parse_content_type, - parse_save_params_list, d); - if (!d->charset_found && - strncasecmp(d->content_type, "\"text\"", 6) == 0) { - /* set a default charset */ - if (str_len(d->str) != 0) - str_append_c(d->str, ' '); - str_append(d->str, DEFAULT_CHARSET); - } - d->content_type_params = - p_strdup_empty(pool, str_c(d->str)); - str_free(&d->str); + t_push(); + parse_content_type(d, hdr); + t_pop(); } if (strcasecmp(name, "Transfer-Encoding") == 0 && - d->content_transfer_encoding == NULL) { - message_content_parse_header(value, value_len, - parse_content_transfer_encoding, - null_parse_content_param_callback, d); - } + d->content_transfer_encoding == NULL) + parse_content_transfer_encoding(d, hdr); break; case 'l': @@ -202,15 +232,8 @@ imap_quote(pool, value, value_len); } if (strcasecmp(name, "Disposition") == 0 && - d->content_disposition_params == NULL) { - d->str = str_new(default_pool, 256); - message_content_parse_header(value, value_len, - parse_content_disposition, - parse_save_params_list, d); - d->content_disposition_params = - p_strdup_empty(pool, str_c(d->str)); - str_free(&d->str); - } + d->content_disposition_params == NULL) + parse_content_disposition(d, hdr); break; } }
--- a/src/lib-mail/Makefile.am Fri Jul 20 08:28:26 2007 +0300 +++ b/src/lib-mail/Makefile.am Fri Jul 20 09:42:25 2007 +0300 @@ -7,7 +7,6 @@ libmail_a_SOURCES = \ istream-header-filter.c \ message-address.c \ - message-content-parser.c \ message-date.c \ message-decoder.c \ message-header-decode.c \ @@ -25,7 +24,6 @@ istream-header-filter.h \ mail-types.h \ message-address.h \ - message-content-parser.h \ message-date.h \ message-decoder.h \ message-header-decode.h \
--- a/src/lib-mail/message-content-parser.c Fri Jul 20 08:28:26 2007 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,78 +0,0 @@ -/* Copyright (C) 2002-2005 Timo Sirainen */ - -#include "lib.h" -#include "str.h" -#include "rfc822-parser.h" -#include "message-content-parser.h" - -parse_content_callback_t *null_parse_content_callback = NULL; -parse_content_param_callback_t *null_parse_content_param_callback = NULL; - -void message_content_parse_header(const unsigned char *data, size_t size, - parse_content_callback_t *callback, - parse_content_param_callback_t *param_cb, - void *context) -{ - struct rfc822_parser_context parser; - string_t *str; - size_t key_len; - bool quoted_string; - - rfc822_parser_init(&parser, data, size, NULL); - - t_push(); - str = t_str_new(256); - - /* get content type */ - (void)rfc822_skip_lwsp(&parser); - if (rfc822_parse_mime_token(&parser, str) > 0) { - if (*parser.data == '/') { - parser.data++; - str_append_c(str, '/'); - (void)rfc822_parse_mime_token(&parser, str); - } - } - - if (callback != NULL) - callback(str_data(str), str_len(str), context); - - if (param_cb == NULL) { - /* we don't care about parameters */ - t_pop(); - return; - } - - while (parser.data != parser.end && *parser.data == ';') { - parser.data++; - (void)rfc822_skip_lwsp(&parser); - - str_truncate(str, 0); - if (rfc822_parse_mime_token(&parser, str) <= 0) - break; - - /* <token> "=" <token> | <quoted-string> */ - if (str_len(str) == 0 || *parser.data != '=') - break; - parser.data++; - if (rfc822_skip_lwsp(&parser) <= 0) - break; - - quoted_string = parser.data != parser.end && - *parser.data == '"'; - key_len = str_len(str); - if (quoted_string) { - if (rfc822_parse_quoted_string(&parser, str) < 0) - break; - } else { - if (rfc822_parse_mime_token(&parser, str) < 0) - break; - } - - param_cb(str_data(str), key_len, - str_data(str) + key_len, str_len(str) - key_len, - quoted_string, context); - - str_truncate(str, 0); - } - t_pop(); -}
--- a/src/lib-mail/message-content-parser.h Fri Jul 20 08:28:26 2007 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#ifndef __MESSAGE_CONTENT_PARSER_H -#define __MESSAGE_CONTENT_PARSER_H - -/* NOTE: name and value aren't \0-terminated. */ -typedef void parse_content_callback_t(const unsigned char *value, - size_t value_len, void *context); -typedef void parse_content_param_callback_t(const unsigned char *name, - size_t name_len, - const unsigned char *value, - size_t value_len, - bool value_quoted, void *context); - -extern parse_content_callback_t *null_parse_content_callback; -extern parse_content_param_callback_t *null_parse_content_param_callback; - -void message_content_parse_header(const unsigned char *data, size_t size, - parse_content_callback_t *callback, - parse_content_param_callback_t *param_cb, - void *context); - -#endif
--- a/src/lib-mail/message-decoder.c Fri Jul 20 08:28:26 2007 +0300 +++ b/src/lib-mail/message-decoder.c Fri Jul 20 09:42:25 2007 +0300 @@ -2,12 +2,12 @@ #include "lib.h" #include "buffer.h" -#include "strescape.h" #include "base64.h" +#include "str.h" #include "charset-utf8.h" #include "quoted-printable.h" +#include "rfc822-parser.h" #include "message-parser.h" -#include "message-content-parser.h" #include "message-header-decode.h" #include "message-decoder.h" @@ -102,46 +102,68 @@ return TRUE; } -static void parse_content_encoding(const unsigned char *value, size_t value_len, - void *context) +static void +parse_content_transfer_encoding(struct message_decoder_context *ctx, + struct message_header_line *hdr) { - struct message_decoder_context *ctx = context; + struct rfc822_parser_context parser; + string_t *value; - ctx->content_type = CONTENT_TYPE_UNKNOWN; + t_push(); + value = t_str_new(64); + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); - switch (value_len) { + (void)rfc822_skip_lwsp(&parser); + (void)rfc822_parse_mime_token(&parser, value); + + switch (str_len(value)) { case 4: - if (memcasecmp(value, "7bit", 4) == 0 || - memcasecmp(value, "8bit", 4) == 0) + if (memcasecmp(str_data(value), "7bit", 4) == 0 || + memcasecmp(str_data(value), "8bit", 4) == 0) ctx->content_type = CONTENT_TYPE_BINARY; break; case 6: - if (memcasecmp(value, "base64", 6) == 0) + if (memcasecmp(str_data(value), "base64", 6) == 0) ctx->content_type = CONTENT_TYPE_BASE64; - else if (memcasecmp(value, "binary", 6) == 0) + else if (memcasecmp(str_data(value), "binary", 6) == 0) ctx->content_type = CONTENT_TYPE_BINARY; break; case 16: - if (memcasecmp(value, "quoted-printable", 16) == 0) + if (memcasecmp(str_data(value), "quoted-printable", 16) == 0) ctx->content_type = CONTENT_TYPE_QP; break; } + t_pop(); } static void -parse_content_type_param(const unsigned char *name, size_t name_len, - const unsigned char *value, size_t value_len, - bool value_quoted, void *context) +parse_content_type(struct message_decoder_context *ctx, + struct message_header_line *hdr) { - struct message_decoder_context *ctx = context; + struct rfc822_parser_context parser; + const char *key, *value; + string_t *str; + + if (ctx->content_charset != NULL) + return; - if (name_len == 7 && memcasecmp(name, "charset", 7) == 0 && - ctx->content_charset == NULL) { - ctx->content_charset = i_strndup(value, value_len); - if (value_quoted) str_unescape(ctx->content_charset); + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + (void)rfc822_skip_lwsp(&parser); + t_push(); + str = t_str_new(64); + if (rfc822_parse_content_type(&parser, str) <= 0) { + t_pop(); + return; + } - ctx->charset_utf8 = charset_is_utf8(ctx->content_charset); + while (rfc822_parse_content_param(&parser, &key, &value) > 0) { + if (strcasecmp(key, "charset") == 0) { + ctx->content_charset = i_strdup(value); + ctx->charset_utf8 = charset_is_utf8(value); + break; + } } + t_pop(); } static bool message_decode_header(struct message_decoder_context *ctx, @@ -156,20 +178,11 @@ } if (hdr->name_len == 12 && - strcasecmp(hdr->name, "Content-Type") == 0) { - message_content_parse_header(hdr->full_value, - hdr->full_value_len, - null_parse_content_callback, - parse_content_type_param, ctx); - } + strcasecmp(hdr->name, "Content-Type") == 0) + parse_content_type(ctx, hdr); if (hdr->name_len == 25 && - strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0) { - message_content_parse_header(hdr->full_value, - hdr->full_value_len, - parse_content_encoding, - null_parse_content_param_callback, - ctx); - } + strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0) + parse_content_transfer_encoding(ctx, hdr); buffer_set_used_size(ctx->buf, 0); message_header_decode(hdr->full_value, hdr->full_value_len,
--- a/src/lib-mail/message-parser.c Fri Jul 20 08:28:26 2007 +0300 +++ b/src/lib-mail/message-parser.c Fri Jul 20 09:42:25 2007 +0300 @@ -1,9 +1,9 @@ /* Copyright (C) 2002-2006 Timo Sirainen */ #include "lib.h" +#include "str.h" #include "istream.h" -#include "strescape.h" -#include "message-content-parser.h" +#include "rfc822-parser.h" #include "message-parser.h" /* RFC-2046 requires boundaries are max. 70 chars + "--" prefix + "--" suffix. @@ -388,49 +388,46 @@ return 1; } -static void -parse_content_type(const unsigned char *value, size_t value_len, void *context) +static void parse_content_type(struct message_parser_ctx *ctx, + struct message_header_line *hdr) { - struct message_parser_ctx *ctx = context; - const char *str; + struct rfc822_parser_context parser; + const char *key, *value; + string_t *content_type; - if (ctx->part_seen_content_type || value_len == 0) + if (ctx->part_seen_content_type) return; ctx->part_seen_content_type = TRUE; - t_push(); - str = t_strndup(value, value_len); - if (strcasecmp(str, "message/rfc822") == 0) + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + (void)rfc822_skip_lwsp(&parser); + + content_type = t_str_new(64); + if (rfc822_parse_content_type(&parser, content_type) < 0) + return; + + if (strcasecmp(str_c(content_type), "message/rfc822") == 0) ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822; - else if (strncasecmp(str, "text", 4) == 0 && - (str[4] == '/' || str[4] == '\0')) + else if (strncasecmp(str_c(content_type), "text", 4) == 0 && + (str_len(content_type) == 4 || + str_data(content_type)[4] == '/')) ctx->part->flags |= MESSAGE_PART_FLAG_TEXT; - else if (strncasecmp(str, "multipart/", 10) == 0) { + else if (strncasecmp(str_c(content_type), "multipart/", 10) == 0) { ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART; - if (strcasecmp(str+10, "digest") == 0) + if (strcasecmp(str_c(content_type)+10, "digest") == 0) ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST; } - t_pop(); -} - -static void -parse_content_type_param(const unsigned char *name, size_t name_len, - const unsigned char *value, size_t value_len, - bool value_quoted, void *context) -{ - struct message_parser_ctx *ctx = context; - char *boundary; if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 || - name_len != 8 || memcasecmp(name, "boundary", 8) != 0) + ctx->last_boundary != NULL) return; - if (ctx->last_boundary == NULL) { - boundary = p_strndup(ctx->parser_pool, value, value_len); - if (value_quoted) - str_unescape(boundary); - ctx->last_boundary = boundary; + while (rfc822_parse_content_param(&parser, &key, &value) > 0) { + if (strcasecmp(key, "boundary") == 0) { + ctx->last_boundary = p_strdup(ctx->parser_pool, value); + break; + } } } @@ -463,10 +460,9 @@ if (hdr->continues) hdr->use_full_value = TRUE; else { - message_content_parse_header(hdr->full_value, - hdr->full_value_len, - parse_content_type, - parse_content_type_param, ctx); + t_push(); + parse_content_type(ctx, hdr); + t_pop(); } }
--- a/src/lib-mail/message-search.c Fri Jul 20 08:28:26 2007 +0300 +++ b/src/lib-mail/message-search.c Fri Jul 20 09:42:25 2007 +0300 @@ -3,11 +3,12 @@ #include "lib.h" #include "buffer.h" #include "istream.h" +#include "str.h" #include "str-find.h" #include "charset-utf8.h" +#include "rfc822-parser.h" #include "message-decoder.h" #include "message-parser.h" -#include "message-content-parser.h" #include "message-search.h" struct message_search_context { @@ -25,20 +26,6 @@ unsigned int content_type_text:1; /* text/any or message/any */ }; -static void parse_content_type(const unsigned char *value, size_t value_len, - void *context) -{ - struct message_search_context *ctx = context; - const char *str; - - t_push(); - str = t_strndup(value, value_len); - ctx->content_type_text = - strncasecmp(str, "text/", 5) == 0 || - strncasecmp(str, "message/", 8) == 0; - t_pop(); -} - int message_search_init(pool_t pool, const char *key, const char *charset, enum message_search_flags flags, struct message_search_context **ctx_r) @@ -81,6 +68,25 @@ p_free(ctx->pool, ctx); } +static void parse_content_type(struct message_search_context *ctx, + struct message_header_line *hdr) +{ + struct rfc822_parser_context parser; + string_t *content_type; + + t_push(); + rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); + (void)rfc822_skip_lwsp(&parser); + + content_type = t_str_new(64); + if (rfc822_parse_content_type(&parser, content_type) >= 0) { + ctx->content_type_text = + strncasecmp(str_c(content_type), "text/", 5) == 0 || + strncasecmp(str_c(content_type), "message/", 8) == 0; + } + t_pop(); +} + static void handle_header(struct message_search_context *ctx, struct message_header_line *hdr) { @@ -90,9 +96,7 @@ hdr->use_full_value = TRUE; return; } - message_content_parse_header(hdr->full_value, - hdr->full_value_len, - parse_content_type, NULL, ctx); + parse_content_type(ctx, hdr); } }
--- a/src/lib-mail/rfc822-parser.c Fri Jul 20 08:28:26 2007 +0300 +++ b/src/lib-mail/rfc822-parser.c Fri Jul 20 09:42:25 2007 +0300 @@ -2,6 +2,7 @@ #include "lib.h" #include "str.h" +#include "strescape.h" #include "rfc822-parser.h" /* @@ -334,3 +335,72 @@ else return rfc822_parse_dot_atom(ctx, str); } + +int rfc822_parse_content_type(struct rfc822_parser_context *ctx, string_t *str) +{ + if (rfc822_skip_lwsp(ctx) <= 0) + return -1; + + /* get main type */ + if (rfc822_parse_mime_token(ctx, str) <= 0) + return -1; + + /* skip over "/" */ + if (*ctx->data != '/') + return -1; + ctx->data++; + if (rfc822_skip_lwsp(ctx) <= 0) + return -1; + str_append_c(str, '/'); + + /* get subtype */ + return rfc822_parse_mime_token(ctx, str); +} + +int rfc822_parse_content_param(struct rfc822_parser_context *ctx, + const char **key_r, const char **value_r) +{ + string_t *tmp; + size_t value_pos; + int ret; + + /* .. := *(";" parameter) + parameter := attribute "=" value + attribute := token + value := token / quoted-string + */ + *key_r = NULL; + *value_r = NULL; + + if (ctx->data == ctx->end) + return 0; + if (*ctx->data != ';') + return -1; + ctx->data++; + + if (rfc822_skip_lwsp(ctx) <= 0) + return -1; + + tmp = t_str_new(64); + if (rfc822_parse_mime_token(ctx, tmp) <= 0) + return -1; + str_append_c(tmp, '\0'); + value_pos = str_len(tmp); + + if (*ctx->data != '=') + return -1; + ctx->data++; + + if ((ret = rfc822_skip_lwsp(ctx)) <= 0) { + /* broken / no value */ + } else if (*ctx->data == '"') { + ret = rfc822_parse_quoted_string(ctx, tmp); + str_unescape(str_c_modifiable(tmp) + value_pos); + } else { + ret = rfc822_parse_mime_token(ctx, tmp); + } + + *key_r = str_c(tmp); + *value_r = *key_r + value_pos; + return ret < 0 ? -1 : 1; +}
--- a/src/lib-mail/rfc822-parser.h Fri Jul 20 08:28:26 2007 +0300 +++ b/src/lib-mail/rfc822-parser.h Fri Jul 20 09:42:25 2007 +0300 @@ -36,4 +36,12 @@ /* dot-atom / domain-literal */ int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str); +/* Parse Content-Type header's type/subtype. */ +int rfc822_parse_content_type(struct rfc822_parser_context *ctx, string_t *str); +/* For Content-Type style parameter parsing. Expect ";" key "=" value. + value is unescaped if needed. The returned strings are allocated from data + stack. Returns 1 = key/value set, 0 = no more data, -1 = invalid input. */ +int rfc822_parse_content_param(struct rfc822_parser_context *ctx, + const char **key_r, const char **value_r); + #endif