view src/lib-mail/message-header-search.c @ 765:553f050c8313 HEAD

Added buffer API. Point is to hide all buffer writing behind this API which verifies that nothing overflows. Much better than doing the same checks all around the code, even if it is slightly slower. Buffer reading is still mostly done directly, that isn't such a big security risk and I can't think of a reasonable API for it anyway.
author Timo Sirainen <tss@iki.fi>
date Sun, 08 Dec 2002 07:23:07 +0200
parents eae61c3f4f3c
children d826ab8c8d62
line wrap: on
line source

/* Copyright (C) 2002 Timo Sirainen */

#include "lib.h"
#include "base64.h"
#include "buffer.h"
#include "charset-utf8.h"
#include "rfc822-tokenize.h"
#include "quoted-printable.h"
#include "message-header-search.h"

#include <ctype.h>

struct _HeaderSearchContext {
	Pool pool;

	unsigned char *key;
	size_t key_len;
	char *key_charset;

	size_t *matches; /* size of strlen(key) */
	ssize_t match_count;

	unsigned int found:1;
	unsigned int last_newline:1;
	unsigned int submatch:1;
	unsigned int key_ascii:1;
	unsigned int unknown_charset:1;
};

HeaderSearchContext *
message_header_search_init(Pool pool, const char *key, const char *charset,
			   int *unknown_charset)
{
	HeaderSearchContext *ctx;
	Buffer *keybuf;
	size_t key_len;
	const char *p;

	ctx = p_new(pool, HeaderSearchContext, 1);
	ctx->pool = pool;

	/* get the key uppercased */
	keybuf = buffer_create_const_data(data_stack_pool, key, strlen(key));
	key = charset_to_ucase_utf8_string(charset, unknown_charset,
					   keybuf, &key_len);

	if (key == NULL) {
		/* invalid key */
		t_pop();
		return NULL;
	}

	ctx->key = p_strdup(pool, key);
	ctx->key_len = key_len;
	ctx->key_charset = p_strdup(pool, charset);
	ctx->unknown_charset = charset == NULL;

	ctx->key_ascii = TRUE;
	for (p = ctx->key; *p != '\0'; p++) {
		if ((*p & 0x80) != 0) {
			ctx->key_ascii = FALSE;
			break;
		}
	}

	i_assert(ctx->key_len <= SSIZE_T_MAX/sizeof(size_t));
	ctx->matches = p_new(pool, size_t, ctx->key_len);
	return ctx;
}

void message_header_search_free(HeaderSearchContext *ctx)
{
	Pool pool;

	pool = ctx->pool;
	p_free(pool, ctx->key);
	p_free(pool, ctx->key_charset);
	p_free(pool, ctx->matches);
	p_free(pool, ctx);
}

static int match_data(const Buffer *buffer, const char *charset,
		      HeaderSearchContext *ctx)
{
	const char *utf8_data;
	size_t size;
	int ret;

	if (ctx->unknown_charset) {
		/* we don't know the source charset, so assume we want to
		   match using same charsets */
		charset = NULL;
	} else if (charset != NULL && strcasecmp(charset, "x-unknown") == 0) {
		/* compare with same charset as search key. the key is already
		   in utf-8 so we can't use charset = NULL comparing. */
		charset = ctx->key_charset;
	}

	utf8_data = charset_to_ucase_utf8_string(charset, NULL, buffer, &size);
	if (utf8_data == NULL) {
		/* unknown character set, or invalid data */
		return FALSE;
	}

	ctx->submatch = TRUE;
	ret = message_header_search(utf8_data, size, ctx);
	ctx->submatch = FALSE;

	return ret;
}

static int split_encoded(Buffer *buffer, size_t *last_pos,
			 const char **charset, const char **encoding)
{
	const char *p;
	size_t size, pos, textpos;

	p = buffer_get_data(buffer, &size);

	/* get charset */
	for (pos = 0; pos < size && p[pos] != '?'; pos++) ;
	if (p[pos] != '?') return FALSE;
	*charset = t_strndup(p, pos);

	/* get encoding */
	pos++;
	if (pos+2 >= size || p[pos+1] != '?')
		return FALSE;

	if (p[pos] == 'Q' || p[pos] == 'q')
		*encoding = "Q";
	else if (p[pos] == 'B' || p[pos] == 'b')
		*encoding = "B";
	else
		return FALSE;

	/* get text */
	pos += 2;
	textpos = pos;
	for (; pos < size && p[pos] != '?'; pos++) ;
	if (p[pos] != '?' || pos+1 >= size || p[pos+1] != '=') return FALSE;

	buffer_set_limit(buffer, pos);
	buffer_set_start_pos(buffer, textpos + buffer_get_start_pos(buffer));
	*last_pos = pos+1;

	return TRUE;
}

static int match_encoded(Buffer *buffer, size_t *last_pos,
			 HeaderSearchContext *ctx)
{
	const char *charset, *encoding, *text;
	Buffer *decodebuf;
	size_t textsize;

	/* first split the string charset?encoding?text?= */
	if (!split_encoded(buffer, last_pos, &charset, &encoding)) {
		ctx->match_count = 0;
		return FALSE;
	}

	/* buffer is now limited to only the text portion */
	text = buffer_get_data(buffer, &textsize);
	decodebuf = buffer_create_static_hard(data_stack_pool, textsize);

	if (*encoding == 'Q')
		quoted_printable_decode(text, textsize, NULL, decodebuf);
	else {
		if (base64_decode(text, textsize, NULL, decodebuf) < 0) {
			/* corrupted encoding */
			ctx->match_count = 0;
			return FALSE;
		}
	}

	return match_data(decodebuf, charset, ctx);
}

int message_header_search(const unsigned char *header_block, size_t size,
			  HeaderSearchContext *ctx)
{
	Buffer *buf;
	ssize_t i;
	size_t pos, subpos;
	unsigned char chr;
	int last_newline, ret;

	if (ctx->found)
		return TRUE;

	t_push();
	buf = buffer_create_const_data(data_stack_pool, header_block, size);

	last_newline = ctx->last_newline;
	for (pos = 0; pos < size; pos++) {
		chr = header_block[pos];

		if (chr == '=' && pos+1 < size &&
		    header_block[pos+1] == '?' && !ctx->submatch) {
			/* encoded string. read it. */
                        buffer_set_start_pos(buf, pos+2);

			t_push();
			ret = match_encoded(buf, &subpos, ctx);
			t_pop();

			if (ret) {
				ctx->found = TRUE;
				break;
			}

			buffer_set_start_pos(buf, 0);
			buffer_set_limit(buf, (size_t)-1);

			pos += subpos - 1;
			last_newline = FALSE;
			continue;
		}

		if (!ctx->submatch) {
			if ((chr & 0x80) == 0)
				chr = i_toupper(chr);
			else if (!ctx->key_ascii && !ctx->unknown_charset) {
				/* we have non-ascii in header and key contains
				   non-ascii characters. treat the rest of the
				   header as encoded with the key's charset */
				t_push();
				ctx->found = match_data(buf, ctx->key_charset,
							ctx);
				t_pop();
				break;
			}
		}

		if (last_newline && !ctx->submatch) {
			if (!IS_LWSP(chr)) {
				/* not a long header, reset matches */
				ctx->match_count = 0;
			}
			chr = ' ';
		}
		last_newline = chr == '\n';

		if (chr == '\r' || chr == '\n')
			continue;

		for (i = ctx->match_count-1; i >= 0; i--) {
			if (ctx->key[ctx->matches[i]] == chr) {
				if (++ctx->matches[i] == ctx->key_len) {
					/* full match */
					ctx->found = TRUE;
					t_pop();
					return TRUE;
				}
			} else {
				/* non-match */
				ctx->match_count--;
				if (i != ctx->match_count) {
					memmove(ctx->matches + i,
						ctx->matches + i + 1,
						ctx->match_count - i);
				}
			}
		}

		if (chr == ctx->key[0]) {
			if (ctx->key_len == 1) {
				/* only one character in search key */
				ctx->found = TRUE;
				break;
			}
			i_assert((size_t)ctx->match_count < ctx->key_len);
			ctx->matches[ctx->match_count++] = 1;
		}
	}
	t_pop();

	ctx->last_newline = last_newline;
	return ctx->found;
}

void message_header_search_reset(HeaderSearchContext *ctx)
{
	ctx->match_count = 0;
	ctx->found = FALSE;
}