Mercurial > dovecot > core-2.2
view src/lib-mail/message-header-encode.c @ 19552:0f22db71df7a
global: freshen copyright
git ls-files | xargs perl -p -i -e 's/(\d+)-201[0-5]/$1-2016/g;s/ (201[0-5]) Dovecot/ $1-2016 Dovecot/'
author | Timo Sirainen <timo.sirainen@dovecot.fi> |
---|---|
date | Wed, 13 Jan 2016 12:24:03 +0200 |
parents | 3009a1a6f6d5 |
children | 2e2563132d5f |
line wrap: on
line source
/* Copyright (c) 2009-2016 Dovecot authors, see the included COPYING file */ #include "lib.h" #include "str.h" #include "base64.h" #include "message-header-encode.h" #define MIME_WRAPPER_LEN (strlen("=?utf-8?q?""?=")) #define MIME_MAX_LINE_LEN 76 #define IS_LWSP(c) \ ((c) == ' ' || (c) == '\t' || (c) == '\n') static bool input_idx_need_encoding(const unsigned char *input, unsigned int i, unsigned int len) { switch (input[i]) { case '\r': if (i+1 == len || input[i+1] != '\n') return TRUE; i++; /* fall through and verify the LF as well */ case '\n': if (i+1 == len) { /* trailing LF - we need to drop it */ return TRUE; } if (input[i+1] != '\t' && input[i+1] != ' ') { /* LF not followed by whitespace - we need to add the whitespace */ return TRUE; } break; case '\t': /* TAB doesn't need to be encoded */ break; case '=': /* <LWSP>=? - we need to check backwards a bit to see if there is LWSP (note that we don't want to return TRUE for the LWSP itself yet, so we need to do this backwards check) */ if ((i == 0 || IS_LWSP(input[i-1])) && i+2 <= len && memcmp(input + i, "=?", 2) == 0) return TRUE; break; default: /* 8bit chars */ if ((input[i] & 0x80) != 0) return TRUE; /* control chars */ if (input[i] < 32) return TRUE; break; } return FALSE; } void message_header_encode_q(const unsigned char *input, unsigned int len, string_t *output, unsigned int first_line_len) { unsigned int i, line_len_left; line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN; if (first_line_len >= MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - 3) { str_append(output, "\n\t"); line_len_left--; } else { line_len_left -= first_line_len; } str_append(output, "=?utf-8?q?"); for (i = 0; i < len; i++) { if (line_len_left < 3) { /* if we're not at the beginning of an UTF8 character, go backwards until we are */ while (i > 0 && (input[i] & 0xc0) == 0x80) { str_truncate(output, str_len(output)-3); i--; } str_append(output, "?=\n\t=?utf-8?q?"); line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - 1; } switch (input[i]) { case ' ': str_append_c(output, '_'); break; case '=': case '?': case '_': line_len_left -= 2; str_printfa(output, "=%02X", input[i]); break; default: if (input[i] < 32 || (input[i] & 0x80) != 0) { line_len_left -= 2; str_printfa(output, "=%02X", input[i]); } else { str_append_c(output, input[i]); } break; } line_len_left--; } str_append(output, "?="); } void message_header_encode_b(const unsigned char *input, unsigned int len, string_t *output, unsigned int first_line_len) { unsigned int line_len, line_len_left, max; line_len = first_line_len; if (line_len >= MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN) { str_append(output, "\n\t"); line_len = 1; } for (;;) { line_len_left = MIME_MAX_LINE_LEN - MIME_WRAPPER_LEN - line_len; max = MAX_BASE64_DECODED_SIZE(line_len_left); do { max--; if (max > len) max = len; else { /* all of it doesn't fit. find a character where we can split it from. */ while (max > 0 && (input[max] & 0xc0) == 0x80) max--; } } while (MAX_BASE64_ENCODED_SIZE(max) > line_len_left && max > 0); if (max > 0) { str_append(output, "=?utf-8?b?"); base64_encode(input, max, output); str_append(output, "?="); } input += max; len -= max; if (len == 0) break; str_append(output, "\n\t"); line_len = 1; } } void message_header_encode(const char *input, string_t *output) { message_header_encode_data((const void *)input, strlen(input), output); } void message_header_encode_data(const unsigned char *input, unsigned int len, string_t *output) { unsigned int i, j, first_line_len, cur_line_len, last_idx; unsigned int enc_chars, enc_len, base64_len, q_len; const unsigned char *next_line_input; unsigned int next_line_len = 0; bool use_q, cr; /* find the first word that needs encoding */ for (i = 0; i < len; i++) { if (input_idx_need_encoding(input, i, len)) break; } if (i == len) { /* no encoding necessary */ str_append_data(output, input, len); return; } /* go back to the beginning of the word so it is fully encoded */ if (input[i] != '\r' && input[i] != '\n') { while (i > 0 && !IS_LWSP(input[i-1])) i--; } /* write the prefix */ str_append_data(output, input, i); first_line_len = j = i; while (j > 0 && input[j-1] != '\n') j--; if (j != 0) first_line_len = j; input += i; len -= i; /* we'll encode data only up to the next LF, the rest is handled recursively. */ next_line_input = memchr(input, '\n', len); if (next_line_input != NULL) { if (next_line_input != input && next_line_input[-1] == '\r') next_line_input--; cur_line_len = next_line_input - input; next_line_len = len - cur_line_len; len = cur_line_len; } /* find the last word that needs encoding */ last_idx = 0; enc_chars = 0; for (i = 0; i < len; i++) { if (input_idx_need_encoding(input, i, len)) { last_idx = i + 1; enc_chars++; } } while (last_idx < len && !IS_LWSP(input[last_idx])) last_idx++; /* figure out if we should use Q or B encoding. Prefer Q if it's not too much larger. */ enc_len = last_idx; base64_len = MAX_BASE64_ENCODED_SIZE(enc_len); q_len = enc_len + enc_chars*3; use_q = q_len*2/3 <= base64_len; /* and do it */ if (enc_len == 0) ; else if (use_q) message_header_encode_q(input, enc_len, output, first_line_len); else message_header_encode_b(input, enc_len, output, first_line_len); str_append_data(output, input + last_idx, len - last_idx); if (next_line_input != NULL) { /* we're at [CR]LF */ i = 0; if (next_line_input[0] == '\r') { cr = TRUE; i++; } else { cr = FALSE; } i_assert(next_line_input[i] == '\n'); if (++i == next_line_len) return; /* drop trailing [CR]LF */ if (cr) str_append_c(output, '\r'); str_append_c(output, '\n'); if (next_line_input[i] == ' ' || next_line_input[i] == '\t') { str_append_c(output, next_line_input[i]); i++; } else { /* make it valid folding whitespace by adding a TAB */ str_append_c(output, '\t'); } message_header_encode_data(next_line_input+i, next_line_len-i, output); } }