Mercurial > dovecot > original-hg > dovecot-1.2
view src/lib-mail/message-decoder.c @ 6414:a6a49d5efc59 HEAD
Changed buffer_free() and buffer_free_without_data() APIs to take ** pointer
which is set to NULL instead of hiding it with a macro.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sun, 16 Sep 2007 12:09:30 +0300 |
parents | fd9f6b4d846e |
children | 13b0a3ba5f7b |
line wrap: on
line source
/* Copyright (C) 2006 Timo Sirainen */ #include "lib.h" #include "buffer.h" #include "base64.h" #include "str.h" #include "unichar.h" #include "charset-utf8.h" #include "quoted-printable.h" #include "rfc822-parser.h" #include "message-parser.h" #include "message-header-decode.h" #include "message-decoder.h" enum content_type { CONTENT_TYPE_UNKNOWN = 0, CONTENT_TYPE_BINARY, CONTENT_TYPE_QP, CONTENT_TYPE_BASE64 }; /* base64 takes max 4 bytes per character, q-p takes max 3. */ #define MAX_ENCODING_BUF_SIZE 3 /* UTF-8 takes max 5 bytes per character. Not sure about others, but I'd think 10 is more than enough for everyone.. */ #define MAX_TRANSLATION_BUF_SIZE 10 struct message_decoder_context { struct message_part *prev_part; struct message_header_line hdr; buffer_t *buf, *buf2; struct charset_translation *charset_trans; char translation_buf[MAX_TRANSLATION_BUF_SIZE]; unsigned int translation_size; char encoding_buf[MAX_ENCODING_BUF_SIZE]; unsigned int encoding_size; char *content_charset; enum content_type content_type; unsigned int dtcase:1; unsigned int charset_utf8:1; }; struct message_decoder_context *message_decoder_init(bool dtcase) { struct message_decoder_context *ctx; ctx = i_new(struct message_decoder_context, 1); ctx->dtcase = dtcase; ctx->buf = buffer_create_dynamic(default_pool, 8192); ctx->buf2 = buffer_create_dynamic(default_pool, 8192); return ctx; } void message_decoder_deinit(struct message_decoder_context **_ctx) { struct message_decoder_context *ctx = *_ctx; *_ctx = NULL; if (ctx->charset_trans != NULL) charset_to_utf8_end(&ctx->charset_trans); buffer_free(&ctx->buf); buffer_free(&ctx->buf2); i_free(ctx->content_charset); i_free(ctx); } static void parse_content_transfer_encoding(struct message_decoder_context *ctx, struct message_header_line *hdr) { struct rfc822_parser_context parser; string_t *value; t_push(); value = t_str_new(64); rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); (void)rfc822_skip_lwsp(&parser); (void)rfc822_parse_mime_token(&parser, value); switch (str_len(value)) { case 4: if (memcasecmp(str_data(value), "7bit", 4) == 0 || memcasecmp(str_data(value), "8bit", 4) == 0) ctx->content_type = CONTENT_TYPE_BINARY; break; case 6: if (memcasecmp(str_data(value), "base64", 6) == 0) ctx->content_type = CONTENT_TYPE_BASE64; else if (memcasecmp(str_data(value), "binary", 6) == 0) ctx->content_type = CONTENT_TYPE_BINARY; break; case 16: if (memcasecmp(str_data(value), "quoted-printable", 16) == 0) ctx->content_type = CONTENT_TYPE_QP; break; } t_pop(); } static void parse_content_type(struct message_decoder_context *ctx, struct message_header_line *hdr) { struct rfc822_parser_context parser; const char *key, *value; string_t *str; if (ctx->content_charset != NULL) return; rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL); (void)rfc822_skip_lwsp(&parser); t_push(); str = t_str_new(64); if (rfc822_parse_content_type(&parser, str) <= 0) { t_pop(); return; } while (rfc822_parse_content_param(&parser, &key, &value) > 0) { if (strcasecmp(key, "charset") == 0) { ctx->content_charset = i_strdup(value); ctx->charset_utf8 = charset_is_utf8(value); break; } } t_pop(); } static bool message_decode_header(struct message_decoder_context *ctx, struct message_header_line *hdr, struct message_block *output) { size_t value_len; if (hdr->continues) { hdr->use_full_value = TRUE; return FALSE; } if (hdr->name_len == 12 && strcasecmp(hdr->name, "Content-Type") == 0) parse_content_type(ctx, hdr); if (hdr->name_len == 25 && strcasecmp(hdr->name, "Content-Transfer-Encoding") == 0) parse_content_transfer_encoding(ctx, hdr); buffer_set_used_size(ctx->buf, 0); message_header_decode_utf8(hdr->full_value, hdr->full_value_len, ctx->buf, ctx->dtcase); value_len = ctx->buf->used; if (ctx->dtcase) { (void)uni_utf8_to_decomposed_titlecase(hdr->name, hdr->name_len, ctx->buf); buffer_append_c(ctx->buf, '\0'); } ctx->hdr = *hdr; ctx->hdr.full_value = ctx->buf->data; ctx->hdr.full_value_len = value_len; ctx->hdr.value_len = 0; if (ctx->dtcase) { ctx->hdr.name = CONST_PTR_OFFSET(ctx->buf->data, ctx->hdr.full_value_len); ctx->hdr.name_len = ctx->buf->used - 1 - value_len; } output->hdr = &ctx->hdr; return TRUE; } static void translation_buf_decode(struct message_decoder_context *ctx, const unsigned char **data, size_t *size) { unsigned char trans_buf[MAX_TRANSLATION_BUF_SIZE+1]; size_t pos, skip; /* @UNSAFE */ memcpy(trans_buf, ctx->translation_buf, ctx->translation_size); skip = sizeof(trans_buf) - ctx->translation_size; if (skip > *size) skip = *size; memcpy(trans_buf + ctx->translation_size, data, skip); pos = *size; (void)charset_to_utf8(ctx->charset_trans, *data, &pos, ctx->buf2); i_assert(pos > ctx->translation_size); skip = (ctx->translation_size + skip) - pos; i_assert(*size >= skip); *data += skip; *size -= skip; ctx->translation_size = 0; } static bool message_decode_body(struct message_decoder_context *ctx, struct message_block *input, struct message_block *output) { unsigned char new_buf[MAX_ENCODING_BUF_SIZE+1]; const unsigned char *data = NULL; size_t pos, size = 0, skip = 0; int ret; if (ctx->charset_trans == NULL && !ctx->charset_utf8) { if (charset_to_utf8_begin(ctx->content_charset != NULL ? ctx->content_charset : "UTF-8", ctx->dtcase, &ctx->charset_trans) < 0) ctx->charset_trans = NULL; } if (ctx->encoding_size != 0) { /* @UNSAFE */ memcpy(new_buf, ctx->encoding_buf, ctx->encoding_size); skip = sizeof(new_buf) - ctx->encoding_size; if (skip > input->size) skip = input->size; memcpy(new_buf + ctx->encoding_size, input->data, skip); } switch (ctx->content_type) { case CONTENT_TYPE_UNKNOWN: /* just skip this body */ return FALSE; case CONTENT_TYPE_BINARY: data = input->data; size = pos = input->size; break; case CONTENT_TYPE_QP: buffer_set_used_size(ctx->buf, 0); if (ctx->encoding_size != 0) { quoted_printable_decode(new_buf, ctx->encoding_size + skip, &pos, ctx->buf); i_assert(pos >= ctx->encoding_size); skip = pos - ctx->encoding_size; } quoted_printable_decode(input->data + skip, input->size - skip, &pos, ctx->buf); pos += skip; data = ctx->buf->data; size = ctx->buf->used; break; case CONTENT_TYPE_BASE64: buffer_set_used_size(ctx->buf, 0); if (ctx->encoding_size != 0) { if (base64_decode(new_buf, ctx->encoding_size + skip, &pos, ctx->buf) < 0) { /* corrupted base64 data, don't bother with the rest of it */ return FALSE; } i_assert(pos >= ctx->encoding_size); skip = pos - ctx->encoding_size; } ret = base64_decode(input->data + skip, input->size - skip, &pos, ctx->buf); if (ret < 0) { /* corrupted base64 data, don't bother with the rest of it */ return FALSE; } if (ret == 0) { /* end of base64 input */ pos = input->size - skip; } pos += skip; data = ctx->buf->data; size = ctx->buf->used; break; } if (pos != input->size) { /* @UNSAFE */ i_assert(pos < input->size); ctx->encoding_size = input->size - pos; i_assert(ctx->encoding_size <= sizeof(ctx->encoding_buf)); memcpy(ctx->encoding_buf, input->data + pos, ctx->encoding_size); } else { ctx->encoding_size = 0; } if (ctx->charset_utf8) { if (ctx->dtcase) { buffer_set_used_size(ctx->buf2, 0); (void)uni_utf8_to_decomposed_titlecase(data, size, ctx->buf); output->data = ctx->buf2->data; output->size = ctx->buf2->used; } else { output->data = data; output->size = size; } } else if (ctx->charset_trans == NULL) { output->data = data; output->size = size; } else { buffer_set_used_size(ctx->buf2, 0); if (ctx->translation_size != 0) translation_buf_decode(ctx, &data, &size); pos = size; (void)charset_to_utf8(ctx->charset_trans, data, &pos, ctx->buf2); if (pos != size) { ctx->translation_size = size - pos; i_assert(ctx->translation_size <= sizeof(ctx->translation_buf)); memcpy(ctx->translation_buf, data + pos, ctx->translation_size); } output->data = ctx->buf2->data; output->size = ctx->buf2->used; } output->hdr = NULL; return TRUE; } bool message_decoder_decode_next_block(struct message_decoder_context *ctx, struct message_block *input, struct message_block *output) { if (input->part != ctx->prev_part) { /* MIME part changed. */ i_free_and_null(ctx->content_charset); ctx->content_type = CONTENT_TYPE_BINARY; ctx->charset_utf8 = TRUE; ctx->encoding_size = 0; } output->part = input->part; ctx->prev_part = input->part; if (input->hdr != NULL) return message_decode_header(ctx, input->hdr, output); else return message_decode_body(ctx, input, output); }