Mercurial > dovecot > core-2.2
changeset 18504:a9f8a617dc02
lib-mail: Added qp-decoder, which is a rewritten quoted_printable_decode()
The main benefit is that qp-decoder allows feeding data to it in smaller
pieces. It can also give better error reporting.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sun, 03 May 2015 14:50:01 +0300 |
parents | 333533e2d231 |
children | a8e9fdcb17c5 |
files | src/lib-mail/Makefile.am src/lib-mail/qp-decoder.c src/lib-mail/qp-decoder.h src/lib-mail/test-qp-decoder.c |
diffstat | 4 files changed, 406 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-mail/Makefile.am Wed Apr 29 17:45:30 2015 +0200 +++ b/src/lib-mail/Makefile.am Sun May 03 14:50:01 2015 +0300 @@ -31,6 +31,7 @@ message-size.c \ message-snippet.c \ ostream-dot.c \ + qp-decoder.c \ quoted-printable.c \ rfc2231-parser.c \ rfc822-parser.c @@ -65,6 +66,7 @@ message-size.h \ message-snippet.h \ ostream-dot.h \ + qp-decoder.h \ quoted-printable.h \ rfc2231-parser.h \ rfc822-parser.h @@ -91,6 +93,7 @@ test-message-part \ test-message-snippet \ test-ostream-dot \ + test-qp-decoder \ test-quoted-printable \ test-rfc2231-parser @@ -181,6 +184,10 @@ test_ostream_dot_LDADD = ostream-dot.lo $(test_libs) test_ostream_dot_DEPENDENCIES = $(test_deps) +test_qp_decoder_SOURCES = test-qp-decoder.c +test_qp_decoder_LDADD = qp-decoder.lo $(test_libs) +test_qp_decoder_DEPENDENCIES = $(test_deps) + test_quoted_printable_SOURCES = test-quoted-printable.c test_quoted_printable_LDADD = quoted-printable.lo $(test_libs) test_quoted_printable_DEPENDENCIES = $(test_deps)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-mail/qp-decoder.c Sun May 03 14:50:01 2015 +0300 @@ -0,0 +1,285 @@ +/* Copyright (c) 2002-2015 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "buffer.h" +#include "hex-binary.h" +#include "qp-decoder.h" + +/* quoted-printable lines can be max 76 characters. if we've seen more than + that much whitespace, it means there really shouldn't be anything else left + in the line except trailing whitespace. */ +#define QP_MAX_WHITESPACE_LEN 76 + +#define QP_IS_TRAILING_WHITESPACE(c) \ + ((c) == ' ' || (c) == '\t') + +enum qp_state { + STATE_TEXT = 0, + STATE_WHITESPACE, + STATE_EQUALS, + STATE_EQUALS_WHITESPACE, + STATE_HEX2, + STATE_CR, + STATE_SOFTCR +}; + +struct qp_decoder { + buffer_t *dest; + buffer_t *whitespace; + enum qp_state state; + char hexchar; +}; + +struct qp_decoder *qp_decoder_init(buffer_t *dest) +{ + struct qp_decoder *qp; + + qp = i_new(struct qp_decoder, 1); + qp->dest = dest; + qp->whitespace = buffer_create_dynamic(default_pool, 80); + return qp; +} + +void qp_decoder_deinit(struct qp_decoder **_qp) +{ + struct qp_decoder *qp = *_qp; + + buffer_free(&qp->whitespace); + i_free(qp); +} + +static size_t +qp_decoder_more_text(struct qp_decoder *qp, const unsigned char *src, + size_t src_size) +{ + size_t i, start = 0, ret = src_size; + + for (i = 0; i < src_size; i++) { + if (src[i] > '=') { + /* fast path */ + continue; + } + switch (src[i]) { + case '=': + qp->state = STATE_EQUALS; + break; + case '\r': + qp->state = STATE_CR; + break; + case '\n': + /* LF without preceding CR */ + buffer_append(qp->dest, src+start, i-start); + buffer_append(qp->dest, "\r\n", 2); + start = i+1; + continue; + case ' ': + case '\t': + i_assert(qp->whitespace->used == 0); + qp->state = STATE_WHITESPACE; + buffer_append_c(qp->whitespace, src[i]); + break; + default: + continue; + } + ret = i+1; + break; + } + buffer_append(qp->dest, src+start, i-start); + return ret; +} + +static void qp_decoder_invalid(struct qp_decoder *qp, const char **error_r) +{ + switch (qp->state) { + case STATE_EQUALS: + buffer_append_c(qp->dest, '='); + *error_r = "'=' not followed by two hex digits"; + break; + case STATE_HEX2: + buffer_append_c(qp->dest, '='); + buffer_append_c(qp->dest, qp->hexchar); + *error_r = "'=<hex>' not followed by a hex digit"; + break; + case STATE_EQUALS_WHITESPACE: + buffer_append_c(qp->dest, '='); + buffer_append_buf(qp->dest, qp->whitespace, 0, (size_t)-1); + buffer_set_used_size(qp->whitespace, 0); + *error_r = "'=<whitespace>' not followed by newline"; + break; + case STATE_CR: + buffer_append_buf(qp->dest, qp->whitespace, 0, (size_t)-1); + buffer_set_used_size(qp->whitespace, 0); + buffer_append_c(qp->dest, '\r'); + *error_r = "CR not followed by LF"; + break; + case STATE_SOFTCR: + buffer_append_c(qp->dest, '='); + buffer_append_buf(qp->dest, qp->whitespace, 0, (size_t)-1); + buffer_set_used_size(qp->whitespace, 0); + buffer_append_c(qp->dest, '\r'); + *error_r = "CR not followed by LF"; + break; + case STATE_TEXT: + case STATE_WHITESPACE: + i_unreached(); + } + qp->state = STATE_TEXT; + i_assert(*error_r != NULL); +} + +int qp_decoder_more(struct qp_decoder *qp, const unsigned char *src, + size_t src_size, size_t *invalid_src_pos_r, + const char **error_r) +{ + const char *error; + size_t i; + + *invalid_src_pos_r = (size_t)-1; + *error_r = NULL; + + for (i = 0; i < src_size; ) { + switch (qp->state) { + case STATE_TEXT: + i += qp_decoder_more_text(qp, src+i, src_size-i); + /* don't increment i any more than we already did, + so continue instead of break */ + continue; + case STATE_WHITESPACE: + if (QP_IS_TRAILING_WHITESPACE(src[i])) { + /* more whitespace */ + if (qp->whitespace->used <= QP_MAX_WHITESPACE_LEN) + buffer_append_c(qp->whitespace, src[i]); + } else if (src[i] == '\r') { + qp->state = STATE_CR; + } else if (src[i] == '\n') { + /* drop the trailing whitespace */ + buffer_append(qp->dest, "\r\n", 2); + buffer_set_used_size(qp->whitespace, 0); + } else { + /* this wasn't trailing whitespace. + put it back. */ + buffer_append_buf(qp->dest, qp->whitespace, + 0, (size_t)-1); + if (qp->whitespace->used > QP_MAX_WHITESPACE_LEN) { + /* we already truncated some of the + whitespace away, because the line + is too long */ + if (*invalid_src_pos_r == (size_t)-1) { + *invalid_src_pos_r = i; + *error_r = "Too much whitespace"; + } + } + buffer_set_used_size(qp->whitespace, 0); + qp->state = STATE_TEXT; + continue; /* don't increment i */ + } + break; + case STATE_EQUALS: + if ((src[i] >= '0' && src[i] <= '9') || + (src[i] >= 'A' && src[i] <= 'F') || + /* lowercase hex isn't strictly valid, but allow */ + (src[i] >= 'a' && src[i] <= 'f')) { + qp->hexchar = src[i]; + qp->state = STATE_HEX2; + } else if (QP_IS_TRAILING_WHITESPACE(src[i])) { + i_assert(qp->whitespace->used == 0); + buffer_append_c(qp->whitespace, src[i]); + qp->state = STATE_EQUALS_WHITESPACE; + } else if (src[i] == '\r') + qp->state = STATE_SOFTCR; + else if (src[i] == '\n') { + qp->state = STATE_TEXT; + } else { + /* invalid input */ + qp_decoder_invalid(qp, &error); + if (*invalid_src_pos_r == (size_t)-1) { + *invalid_src_pos_r = i; + *error_r = error; + } + continue; /* don't increment i */ + } + break; + case STATE_HEX2: + if ((src[i] >= '0' && src[i] <= '9') || + (src[i] >= 'A' && src[i] <= 'F') || + (src[i] >= 'a' && src[i] <= 'f')) { + char data[3]; + + data[0] = qp->hexchar; + data[1] = src[i]; + data[2] = '\0'; + if (hex_to_binary(data, qp->dest) < 0) + i_unreached(); + qp->state = STATE_TEXT; + } else { + /* invalid input */ + qp_decoder_invalid(qp, &error); + if (*invalid_src_pos_r == (size_t)-1) { + *invalid_src_pos_r = i; + *error_r = error; + } + continue; /* don't increment i */ + } + break; + case STATE_EQUALS_WHITESPACE: + if (QP_IS_TRAILING_WHITESPACE(src[i])) { + if (qp->whitespace->used <= QP_MAX_WHITESPACE_LEN) + buffer_append_c(qp->whitespace, src[i]); + else { + /* if this isn't going to get truncated + anyway, it's going to be an error */ + } + } else if (src[i] == '\r') + qp->state = STATE_SOFTCR; + else if (src[i] == '\n') { + buffer_set_used_size(qp->whitespace, 0); + qp->state = STATE_TEXT; + } else { + /* =<whitespace> not followed by [CR]LF + is invalid. */ + qp_decoder_invalid(qp, &error); + if (*invalid_src_pos_r == (size_t)-1) { + *invalid_src_pos_r = i; + *error_r = error; + } + continue; /* don't increment i */ + } + break; + case STATE_CR: + case STATE_SOFTCR: + if (src[i] == '\n') { + buffer_set_used_size(qp->whitespace, 0); + if (qp->state != STATE_SOFTCR) + buffer_append(qp->dest, "\r\n", 2); + qp->state = STATE_TEXT; + } else { + qp_decoder_invalid(qp, &error); + if (*invalid_src_pos_r == (size_t)-1) { + *invalid_src_pos_r = i; + *error_r = error; + } + continue; /* don't increment i */ + } + break; + } + i++; + } + i_assert((*invalid_src_pos_r == (size_t)-1) == (*error_r == NULL)); + return *invalid_src_pos_r == (size_t)-1 ? 0 : -1; +} + +int qp_decoder_finish(struct qp_decoder *qp, const char **error_r) +{ + int ret; + + if (qp->state == STATE_TEXT || qp->state == STATE_WHITESPACE) { + ret = 0; + *error_r = NULL; + } else { + qp_decoder_invalid(qp, error_r); + ret = -1; + } + qp->state = STATE_TEXT; + buffer_set_used_size(qp->whitespace, 0); + return ret; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-mail/qp-decoder.h Sun May 03 14:50:01 2015 +0300 @@ -0,0 +1,19 @@ +#ifndef QP_DECODER_H +#define QP_DECODER_H + +/* Initialize quoted-printable decoder. Write all the decoded output to dest. */ +struct qp_decoder *qp_decoder_init(buffer_t *dest); +void qp_decoder_deinit(struct qp_decoder **qp); + +/* Translate more quoted printable data into binary. Returns 0 if input was + valid, -1 if there were some decoding errors (which were skipped over). + LFs without preceding CR are returned as CRLF (but =0A isn't). */ +int qp_decoder_more(struct qp_decoder *qp, const unsigned char *src, + size_t src_size, size_t *invalid_src_pos_r, + const char **error_r); +/* Finish decoding any pending input. Returns the same as qp_decoder_more(). + This function also resets the entire decoder state, so the same decoder can + be used to decode more data if wanted. */ +int qp_decoder_finish(struct qp_decoder *qp, const char **error_r); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-mail/test-qp-decoder.c Sun May 03 14:50:01 2015 +0300 @@ -0,0 +1,95 @@ +/* Copyright (c) 2007-2015 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "qp-decoder.h" +#include "test-common.h" + +struct test_quoted_printable_decode_data { + const char *input; + const char *output; + size_t error_pos; + int ret; +}; + +static void test_qp_decoder(void) +{ +#define WHITESPACE10 " \t \t \t" +#define WHITESPACE70 WHITESPACE10 WHITESPACE10 WHITESPACE10 WHITESPACE10 WHITESPACE10 WHITESPACE10 WHITESPACE10 + static struct test_quoted_printable_decode_data tests[] = { + { "foo \r\nbar=\n", "foo\r\nbar", 0, 0 }, + { "foo\t=\nbar", "foo\tbar", 0, 0 }, + { "foo = \n=01", "foo \001", 0, 0 }, + { "foo =\t\r\nbar", "foo bar", 0, 0 }, + { "foo =\r\n=01", "foo \001", 0, 0 }, + { "foo \nbar=\r\n", "foo\r\nbar", 0, 0 }, + { "=0A=0D ", "\n\r", 0, 0 }, + { "foo_bar", "foo_bar", 0, 0 }, + { "\n\n", "\r\n\r\n", 0, 0 }, + { "\r\n\n\n\r\n", "\r\n\r\n\r\n\r\n", 0, 0 }, + + { "foo=", "foo=", 4, -1 }, + { "foo= \t", "foo= \t", 6, -1 }, + { "foo= \r", "foo= \r", 6, -1 }, + { "foo= \r bar", "foo= \r bar", 6, -1 }, + { "foo=A", "foo=A", 5, -1 }, + { "foo=Ax", "foo=Ax", 5, -1 }, + { "foo=Ax=xy", "foo=Ax=xy", 5, -1 }, + + /* above 76 whitespaces is invalid and gets truncated + (at 77th whitespace because of the current implementation) */ + { WHITESPACE70" 7\n", WHITESPACE70" 7\r\n", 0, 0 }, + { WHITESPACE70" 8\n", WHITESPACE70" 8\r\n", 77, -1 }, + { WHITESPACE70" 9\n", WHITESPACE70" 9\r\n", 78, -1 }, + { WHITESPACE70" 0\n", WHITESPACE70" 0\r\n", 79, -1 } + }; + string_t *str; + unsigned int i, j; + + test_begin("qp-decoder"); + str = t_str_new(128); + for (i = 0; i < N_ELEMENTS(tests); i++) { + const char *input = tests[i].input; + struct qp_decoder *qp = qp_decoder_init(str); + size_t error_pos; + const char *error; + int ret; + + /* try all at once */ + ret = qp_decoder_more(qp, (const void *)input, strlen(input), + &error_pos, &error); + if (qp_decoder_finish(qp, &error) < 0 && ret == 0) { + error_pos = strlen(input); + ret = -1; + } + test_assert_idx(ret == tests[i].ret, i); + test_assert_idx(ret == 0 || error_pos == tests[i].error_pos, i); + test_assert_idx(strcmp(str_c(str), tests[i].output) == 0, i); + + /* try in small pieces */ + str_truncate(str, 0); + ret = 0; + for (j = 0; input[j] != '\0'; j++) { + unsigned char c = input[j]; + if (qp_decoder_more(qp, &c, 1, &error_pos, &error) < 0) + ret = -1; + } + if (qp_decoder_finish(qp, &error) < 0) + ret = -1; + test_assert_idx(ret == tests[i].ret, i); + test_assert_idx(strcmp(str_c(str), tests[i].output) == 0, i); + + qp_decoder_deinit(&qp); + str_truncate(str, 0); + } + test_end(); +} + +int main(void) +{ + static void (*test_functions[])(void) = { + test_qp_decoder, + NULL + }; + return test_run(test_functions); +}