Mercurial > dovecot > core-2.2

/* Copyright (c) 2002-2016 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "base64.h"
#include "buffer.h"
#include "unichar.h"
#include "charset-utf8.h"
#include "quoted-printable.h"
#include "message-header-decode.h"

static size_t
message_header_decode_encoded(const unsigned char *data, size_t size,
			      buffer_t *decodebuf, size_t *charsetlen_r)
{
#define QCOUNT 3
	unsigned int num = 0;
	size_t i, start_pos[QCOUNT];

	/* data should contain "charset?encoding?text?=" */
	for (i = 0; i < size; i++) {
		if (data[i] == '?') {
			start_pos[num++] = i;
			if (num == QCOUNT)
				break;
		}
	}
	if (i == size || data[i+1] != '=') {
		/* invalid block */
		return 0;
	}

	buffer_append(decodebuf, data, start_pos[0]);
	buffer_append_c(decodebuf, '\0');
	*charsetlen_r = decodebuf->used;

	switch (data[start_pos[0]+1]) {
	case 'q':
	case 'Q':
		if (quoted_printable_q_decode(data + start_pos[1] + 1,
					      start_pos[2] - start_pos[1] - 1,
					      decodebuf) < 0) {
			/* we skipped over some invalid data */
		}
		break;
	case 'b':
	case 'B':
		if (base64_decode(data + start_pos[1] + 1,
				  start_pos[2] - start_pos[1] - 1,
				  NULL, decodebuf) < 0) {
			/* contains invalid data. show what we got so far. */
		}
		break;
	default:
		/* unknown encoding */
		return 0;
	}

	return start_pos[2] + 2;
}

static bool is_only_lwsp(const unsigned char *data, size_t size)
{
	size_t i;

	for (i = 0; i < size; i++) {
		if (!(data[i] == ' ' || data[i] == '\t' ||
		      data[i] == '\r' || data[i] == '\n'))
			return FALSE;
	}
	return TRUE;
}

void message_header_decode(const unsigned char *data, size_t size,
			   message_header_decode_callback_t *callback,
			   void *context)
{
	buffer_t *decodebuf = NULL;
	size_t charsetlen = 0;
	size_t pos, start_pos, ret;

	/* =?charset?Q|B?text?= */
	start_pos = 0;
	for (pos = 0; pos + 1 < size; ) {
		if (data[pos] != '=' || data[pos+1] != '?') {
			pos++;
			continue;
		}

		/* encoded string beginning */
		if (pos != start_pos &&
		    !is_only_lwsp(data+start_pos, pos-start_pos)) {
			/* send the unencoded data so far */
			if (!callback(data + start_pos, pos - start_pos,
				      NULL, context)) {
				start_pos = size;
				break;
			}
		}

		if (decodebuf == NULL) {
			decodebuf = buffer_create_dynamic(default_pool,
							  size - pos);
		} else {
			buffer_set_used_size(decodebuf, 0);
		}

		pos += 2;
		ret = message_header_decode_encoded(data + pos, size - pos,
						    decodebuf, &charsetlen);
		if (ret == 0) {
			start_pos = pos-2;
			continue;
		}
		pos += ret;

		if (decodebuf->used > charsetlen) {
			/* decodebuf contains <charset> NUL <text> */
			if (!callback(CONST_PTR_OFFSET(decodebuf->data,
						       charsetlen),
				      decodebuf->used - charsetlen,
				      decodebuf->data, context)) {
				start_pos = size;
				break;
			}
		}

		start_pos = pos;
	}

	if (size != start_pos) {
		(void)callback(data + start_pos, size - start_pos,
			       NULL, context);
	}
	if (decodebuf != NULL)
		buffer_free(&decodebuf);
}

struct decode_utf8_context {
	buffer_t *dest;
	normalizer_func_t *normalizer;
	unsigned int changed:1;
};

static bool
decode_utf8_callback(const unsigned char *data, size_t size,
		     const char *charset, void *context)
{
	struct decode_utf8_context *ctx = context;
	struct charset_translation *t;

	if (charset == NULL || charset_is_utf8(charset)) {
		/* ASCII / UTF-8 */
		if (ctx->normalizer != NULL) {
			(void)ctx->normalizer(data, size, ctx->dest);
		} else {
			if (uni_utf8_get_valid_data(data, size, ctx->dest))
				buffer_append(ctx->dest, data, size);
		}
		return TRUE;
	}

	if (charset_to_utf8_begin(charset, ctx->normalizer, &t) < 0) {
		/* data probably still contains some valid ASCII characters.
		   append them. */
		if (uni_utf8_get_valid_data(data, size, ctx->dest))
			buffer_append(ctx->dest, data, size);
		return TRUE;
	}

	/* ignore any errors */
	(void)charset_to_utf8(t, data, &size, ctx->dest);
	charset_to_utf8_end(&t);
	return TRUE;
}

void message_header_decode_utf8(const unsigned char *data, size_t size,
				buffer_t *dest, normalizer_func_t *normalizer)
{
	struct decode_utf8_context ctx;

	memset(&ctx, 0, sizeof(ctx));
	ctx.dest = dest;
	ctx.normalizer = normalizer;
	message_header_decode(data, size, decode_utf8_callback, &ctx);
}
author	Timo Sirainen <timo.sirainen@dovecot.fi>
date	Mon, 12 Dec 2016 07:19:55 +0200
parents	0f22db71df7a
children	59437f8764c6