changeset 608:debb8468514e HEAD

SEARCH CHARSET now works properly with message bodies, and in general body searching works more correctly by decoding base64/qp data. Non-text MIME parts are currently not included in search, that could be made optional. Also the body is parsed separately for each keyword, that could be optimized. Changed base64_decode() behaviour so that it can accept non-base64 data as well, ie. line feeds etc.
author Timo Sirainen <tss@iki.fi>
date Wed, 13 Nov 2002 13:08:18 +0200
parents c857ebe48596
children 5470c0cb13a7
files src/lib-charset/charset-ascii.c src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.h src/lib-mail/Makefile.am src/lib-mail/message-body-search.c src/lib-mail/message-body-search.h src/lib-mail/message-header-search.c src/lib-mail/message-header-search.h src/lib-mail/quoted-printable.c src/lib-mail/quoted-printable.h src/lib-storage/index/index-search.c src/lib/base64.c src/lib/base64.h src/login/client-authenticate.c
diffstat 14 files changed, 738 insertions(+), 147 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-charset/charset-ascii.c	Wed Nov 13 13:01:11 2002 +0200
+++ b/src/lib-charset/charset-ascii.c	Wed Nov 13 13:08:18 2002 +0200
@@ -5,12 +5,63 @@
 
 #ifndef HAVE_ICONV_H
 
-const char *charset_to_ucase_utf8(const unsigned char *data,
-				  size_t *size __attr_unused__,
-				  const char *charset, int *unknown_charset)
+#include <ctype.h>
+
+struct _CharsetTranslation {
+	int dummy;
+};
+
+static CharsetTranslation ascii_translation;
+
+CharsetTranslation *charset_to_utf8_begin(const char *charset,
+					  int *unknown_charset)
+{
+	if (unknown_charset != NULL)
+		*unknown_charset = FALSE;
+
+	if (strcasecmp(charset, "us-ascii") != 0 &&
+	    strcasecmp(charset, "ascii") != 0) {
+		/* no support for non-ascii charsets */
+		if (unknown_charset != NULL)
+			*unknown_charset = TRUE;
+		return NULL;
+	}
+
+	return &ascii_translation;
+}
+
+void charset_to_utf8_end(CharsetTranslation *t __attr_unused__)
 {
-	if (charset == NULL || strcasecmp(charset, "us-ascii") == 0)
-		return str_ucase(t_strdup_noconst(data));
+}
+
+void charset_to_utf8_reset(CharsetTranslation *t __attr_unused__)
+{
+}
+
+int charset_to_ucase_utf8(CharsetTranslation *t __attr_unused__,
+			  const unsigned char **inbuf, size_t *insize,
+			  unsigned char *outbuf, size_t *outsize)
+{
+	size_t max_size, i;
+
+	max_size = I_MIN(*insize, *outsize);
+	for (i = 0; i < max_size; i++)
+		outbuf[i] = i_toupper((*inbuf)[i]);
+
+	*insize = 0;
+	*outsize = max_size;
+
+	return TRUE;
+}
+
+const char *
+charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
+			     const unsigned char *buf,
+			     size_t *size __attr_unused__)
+{
+	if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
+	    strcasecmp(charset, "ascii") == 0)
+		return str_ucase(t_strdup_noconst(buf));
 
 	if (unknown_charset != NULL)
 		*unknown_charset = TRUE;
--- a/src/lib-charset/charset-iconv.c	Wed Nov 13 13:01:11 2002 +0200
+++ b/src/lib-charset/charset-iconv.c	Wed Nov 13 13:08:18 2002 +0200
@@ -6,16 +6,102 @@
 #ifdef HAVE_ICONV_H
 
 #include <iconv.h>
+#include <ctype.h>
 
-const char *charset_to_ucase_utf8(const unsigned char *data, size_t *size,
-				  const char *charset, int *unknown_charset)
+struct _CharsetTranslation {
+	iconv_t cd;
+};
+
+CharsetTranslation *charset_to_utf8_begin(const char *charset,
+					  int *unknown_charset)
+{
+	CharsetTranslation *t;
+	iconv_t cd;
+
+	if (unknown_charset != NULL)
+		*unknown_charset = FALSE;
+
+	if (strcasecmp(charset, "us-ascii") == 0 ||
+	    strcasecmp(charset, "ascii") == 0) {
+		/* no need to do any actual translation */
+		cd = NULL;
+	} else {
+		cd = iconv_open("UTF8", charset);
+		if (cd == (iconv_t)-1) {
+			if (unknown_charset != NULL)
+				*unknown_charset = TRUE;
+			return NULL;
+		}
+	}
+
+	t = i_new(CharsetTranslation, 1);
+	t->cd = cd;
+	return t;
+}
+
+void charset_to_utf8_end(CharsetTranslation *t)
+{
+	if (t->cd != NULL)
+		iconv_close(t->cd);
+	i_free(t);
+}
+
+void charset_to_utf8_reset(CharsetTranslation *t)
+{
+	if (t->cd != NULL)
+		(void)iconv(t->cd, NULL, NULL, NULL, NULL);
+}
+
+int charset_to_ucase_utf8(CharsetTranslation *t,
+			  const unsigned char **inbuf, size_t *insize,
+			  unsigned char *outbuf, size_t *outsize)
+{
+	char *ic_inbuf, *ic_outbuf;
+	size_t outleft, max_size, i;
+
+	if (t->cd == NULL) {
+		/* ascii - just copy it to outbuf uppercased */
+		max_size = I_MIN(*insize, *outsize);
+		for (i = 0; i < max_size; i++)
+			outbuf[i] = i_toupper((*inbuf)[i]);
+		*insize = 0;
+		*outsize = max_size;
+		return TRUE;
+	}
+
+	ic_inbuf = (char *) *inbuf;
+	ic_outbuf = (char *) outbuf;
+	outleft = *outsize;
+
+	if (iconv(t->cd, &ic_inbuf, insize,
+		  &ic_outbuf, &outleft) == (size_t)-1) {
+		if (errno != E2BIG && errno != EINVAL) {
+			/* should be EILSEQ - invalid input */
+			return FALSE;
+		}
+	}
+
+	*inbuf = (const unsigned char *) ic_inbuf;
+	*outsize -= outleft;
+
+	max_size = *outsize;
+	for (i = 0; i < max_size; i++)
+		outbuf[i] = i_toupper(outbuf[i]);
+
+	return TRUE;
+}
+
+const char *
+charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
+			     const unsigned char *buf, size_t *size)
 {
 	iconv_t cd;
 	char *inbuf, *outbuf, *outpos;
 	size_t inleft, outleft, outsize, pos;
 
-	if (charset == NULL || strcasecmp(charset, "us-ascii") == 0)
-		return str_ucase(t_strdup_noconst(data));
+	if (charset == NULL || strcasecmp(charset, "us-ascii") == 0 ||
+	    strcasecmp(charset, "ascii") == 0)
+		return str_ucase(t_strdup_noconst(buf));
 
 	cd = iconv_open("UTF8", charset);
 	if (cd == (iconv_t)-1) {
@@ -27,7 +113,7 @@
 	if (unknown_charset != NULL)
 		*unknown_charset = FALSE;
 
-	inbuf = (char *) data;
+	inbuf = (char *) buf;
 	inleft = *size;
 
 	outsize = outleft = *size * 2;
--- a/src/lib-charset/charset-utf8.h	Wed Nov 13 13:01:11 2002 +0200
+++ b/src/lib-charset/charset-utf8.h	Wed Nov 13 13:08:18 2002 +0200
@@ -1,7 +1,28 @@
 #ifndef __CHARSET_UTF8_H
 #define __CHARSET_UTF8_H
 
-const char *charset_to_ucase_utf8(const unsigned char *data, size_t *size,
-				  const char *charset, int *unknown_charset);
+typedef struct _CharsetTranslation CharsetTranslation;
+
+/* Begin translation to UTF-8. */
+CharsetTranslation *charset_to_utf8_begin(const char *charset,
+					  int *unknown_charset);
+
+void charset_to_utf8_end(CharsetTranslation *t);
+
+void charset_to_utf8_reset(CharsetTranslation *t);
+
+/* Convert inbuf to UTF-8. inbuf and inbuf_size is updated to specify beginning
+   of data that was not written to outbuf, either because of inbuf ended with
+   incomplete character sequence or because the outbuf got full. Returns TRUE
+   if no conversion errors were detected. */
+int charset_to_ucase_utf8(CharsetTranslation *t,
+			  const unsigned char **inbuf, size_t *insize,
+			  unsigned char *outbuf, size_t *outsize);
+
+/* Simple wrapper for above functions. size is updated to strlen() of
+   returned UTF-8 string. */
+const char *
+charset_to_ucase_utf8_string(const char *charset, int *unknown_charset,
+			     const unsigned char *buf, size_t *size);
 
 #endif
--- a/src/lib-mail/Makefile.am	Wed Nov 13 13:01:11 2002 +0200
+++ b/src/lib-mail/Makefile.am	Wed Nov 13 13:08:18 2002 +0200
@@ -5,6 +5,7 @@
 	-I$(top_srcdir)/src/lib-charset
 
 libmail_a_SOURCES = \
+	message-body-search.c \
 	message-content-parser.c \
 	message-header-search.c \
 	message-parser.c \
@@ -13,9 +14,11 @@
 	message-size.c \
 	rfc822-address.c \
 	rfc822-date.c \
-	rfc822-tokenize.c
+	rfc822-tokenize.c \
+	quoted-printable.c
 
 noinst_HEADERS = \
+	message-body-search.h \
 	message-content-parser.h \
 	message-header-search.h \
 	message-parser.h \
@@ -24,4 +27,5 @@
 	message-size.h \
 	rfc822-address.h \
 	rfc822-date.h \
-	rfc822-tokenize.h
+	rfc822-tokenize.h \
+	quoted-printable.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-body-search.c	Wed Nov 13 13:08:18 2002 +0200
@@ -0,0 +1,400 @@
+/* Copyright (C) 2002 Timo Sirainen */
+
+#include "lib.h"
+#include "base64.h"
+#include "ibuffer.h"
+#include "charset-utf8.h"
+#include "rfc822-tokenize.h"
+#include "quoted-printable.h"
+#include "message-parser.h"
+#include "message-content-parser.h"
+#include "message-header-search.h"
+#include "message-body-search.h"
+
+#define DECODE_BLOCK_SIZE 8192
+
+typedef struct {
+	Pool pool;
+
+	const char *key;
+	size_t key_len;
+
+	const char *charset;
+	unsigned int unknown_charset:1;
+} BodySearchContext;
+
+typedef struct {
+	BodySearchContext *body_ctx;
+
+	HeaderSearchContext *hdr_search_ctx;
+	CharsetTranslation *translation;
+
+	unsigned char decode_buf[DECODE_BLOCK_SIZE];
+	size_t decode_buf_used;
+
+	size_t *matches;
+	ssize_t match_count;
+
+	const char *content_type;
+	const char *content_charset;
+
+	unsigned int content_qp:1;
+	unsigned int content_base64:1;
+	unsigned int content_unknown:1;
+	unsigned int content_type_text:1; /* text/any or message/any */
+	unsigned int found:1;
+} PartSearchContext;
+
+static void parse_content_type(const Rfc822Token *tokens, int count,
+			       void *context)
+{
+	PartSearchContext *ctx = context;
+
+	if (ctx->content_type != NULL && tokens[0].token == 'A') {
+		ctx->content_type = rfc822_tokens_get_value(tokens, count);
+		ctx->content_type_text =
+			strncasecmp(ctx->content_type, "text/", 5) == 0 ||
+			strncasecmp(ctx->content_type, "message/", 8) == 0;
+	}
+}
+
+static void parse_content_type_param(const Rfc822Token *name,
+				     const Rfc822Token *value,
+				     int value_count, void *context)
+{
+	PartSearchContext *ctx = context;
+
+	if (name->len != 7 || strncasecmp(name->ptr, "charset", 7) != 0)
+		return;
+
+	if (ctx->content_charset == NULL) {
+		ctx->content_charset =
+			rfc822_tokens_get_value(value, value_count);
+	}
+}
+
+static void parse_content_encoding(const Rfc822Token *tokens,
+				   int count __attr_unused__, void *context)
+{
+	PartSearchContext *ctx = context;
+
+	if (tokens[0].token != 'A')
+		return;
+
+	switch (tokens[0].len) {
+	case 4:
+		if (strncasecmp(tokens[0].ptr, "7bit", 4) != 0 &&
+		    strncasecmp(tokens[0].ptr, "8bit", 4) != 0)
+			ctx->content_unknown = TRUE;
+		break;
+	case 6:
+		if (strncasecmp(tokens[0].ptr, "base64", 6) == 0)
+			ctx->content_base64 = TRUE;
+		else if (strncasecmp(tokens[0].ptr, "binary", 6) != 0)
+			ctx->content_unknown = TRUE;
+		break;
+	case 16:
+		if (strncasecmp(tokens[0].ptr, "quoted-printable", 16) == 0)
+			ctx->content_qp = TRUE;
+		else
+			ctx->content_unknown = TRUE;
+		break;
+	default:
+		ctx->content_unknown = TRUE;
+		break;
+	}
+}
+
+static void header_find(MessagePart *part __attr_unused__,
+			const char *name, size_t name_len,
+			const char *value, size_t value_len, void *context)
+{
+	PartSearchContext *ctx = context;
+
+	if (ctx->found)
+		return;
+
+	ctx->found = message_header_search(value, &value_len,
+					   ctx->hdr_search_ctx);
+
+	if (name_len == 12 && strncasecmp(name, "Content-Type", 12) == 0) {
+		(void)message_content_parse_header(t_strndup(value, value_len),
+						   parse_content_type,
+						   parse_content_type_param,
+						   ctx);
+	} else if (name_len == 25 &&
+		   strncasecmp(name, "Content-Transfer-Encoding", 25) == 0) {
+		(void)message_content_parse_header(t_strndup(value, value_len),
+						   parse_content_encoding,
+						   NULL, ctx);
+	}
+}
+
+static int message_search_header(PartSearchContext *ctx, IBuffer *inbuf)
+{
+	ctx->hdr_search_ctx = message_header_search_init(data_stack_pool,
+							 ctx->body_ctx->key,
+							 ctx->body_ctx->charset,
+							 NULL);
+
+	/* we default to text content-type */
+	ctx->content_type_text = TRUE;
+	message_parse_header(NULL, inbuf, NULL, header_find, ctx);
+
+	return ctx->found;
+}
+
+static int message_search_decoded_block(PartSearchContext *ctx,
+					const unsigned char *data, size_t size)
+{
+	const unsigned char *p, *end, *key;
+	size_t key_len;
+	ssize_t i;
+	int found;
+
+	key = (const unsigned char *) ctx->body_ctx->key;
+	key_len = ctx->body_ctx->key_len;
+
+	end = data + size; found = 0;
+	for (p = data; p != end; p++) {
+		for (i = ctx->match_count-1; i >= 0; i--) {
+			if (key[ctx->matches[i]] == *p) {
+				if (++ctx->matches[i] == key_len) {
+					/* full match */
+					p++;
+					found = TRUE;
+					break;
+				}
+			} else {
+				/* non-match */
+				ctx->match_count--;
+				if (i != ctx->match_count) {
+					memmove(ctx->matches + i,
+						ctx->matches + i + 1,
+						ctx->match_count - i);
+				}
+			}
+		}
+
+		if (found)
+			break;
+
+		if (*p == key[0]) {
+			if (key_len == 1) {
+				/* only one character in search key */
+				p++;
+				found = 1;
+				break;
+			}
+			i_assert((size_t)ctx->match_count < key_len);
+			ctx->matches[ctx->match_count++] = 1;
+		}
+	}
+
+	return found;
+}
+
+static int message_search_body_block(PartSearchContext *ctx,
+				     const unsigned char *data, size_t size)
+{
+	const unsigned char *inbuf;
+	unsigned char outbuf[DECODE_BLOCK_SIZE];
+	size_t inbuf_size, outbuf_size, max_size;
+
+	if (ctx->body_ctx->unknown_charset || ctx->translation == NULL)
+		return message_search_decoded_block(ctx, data, size);
+
+	while (size > 0) {
+		if (ctx->decode_buf_used == 0) {
+			inbuf = data;
+			inbuf_size = I_MIN(size, sizeof(ctx->decode_buf));
+
+			data += inbuf_size;
+			size -= inbuf_size;
+		} else {
+			/* some characters already in buffer, ie. last
+			   conversion contained partial data */
+			max_size = sizeof(ctx->decode_buf) -
+				ctx->decode_buf_used;
+			if (max_size > size)
+				max_size = size;
+
+			memcpy(ctx->decode_buf + ctx->decode_buf_used,
+			       data, max_size);
+			ctx->decode_buf_used += max_size;
+
+			inbuf = ctx->decode_buf;
+			inbuf_size = ctx->decode_buf_used;
+
+			data += max_size;
+			size -= max_size;
+		}
+
+		outbuf_size = sizeof(outbuf);
+		if (!charset_to_ucase_utf8(ctx->translation,
+					   &inbuf, &inbuf_size,
+					   outbuf, &outbuf_size)) {
+			/* something failed */
+			return -1;
+		}
+
+		if (message_search_decoded_block(ctx, outbuf, outbuf_size))
+			return 1;
+
+		if (inbuf_size > 0) {
+			/* partial input, save it */
+			memmove(ctx->decode_buf, inbuf, inbuf_size);
+			ctx->decode_buf_used = inbuf_size;
+		}
+	}
+
+	return 0;
+}
+
+static int message_search_body(PartSearchContext *ctx, IBuffer *inbuf,
+			       MessagePart *part)
+{
+	const unsigned char *data, *decoded;
+	unsigned char *decodebuf;
+	size_t data_size, decoded_size, pos;
+	uoff_t old_limit;
+	ssize_t ret;
+	int found;
+
+	if (ctx->content_unknown) {
+		/* unknown content-encoding-type, ignore */
+		return FALSE;
+	}
+
+	if (!ctx->content_type_text) {
+		/* non-text content, ignore - FIXME: should be configurable? */
+		return FALSE;
+	}
+
+	ctx->translation = charset_to_utf8_begin(ctx->content_charset != NULL ?
+						 ctx->content_charset : "ascii",
+						 NULL);
+
+	ctx->match_count = 0;
+	ctx->matches = t_malloc(sizeof(size_t) * ctx->body_ctx->key_len);
+
+	i_buffer_skip(inbuf, part->physical_pos +
+		      part->header_size.physical_size - inbuf->v_offset);
+
+	old_limit = inbuf->v_limit;
+	i_buffer_set_read_limit(inbuf, inbuf->v_offset +
+				part->body_size.physical_size);
+
+	found = FALSE; pos = 0;
+	while (i_buffer_read_data(inbuf, &data, &data_size, pos) > 0) {
+		/* limit the size of t_malloc()s */
+		if (data_size > DECODE_BLOCK_SIZE)
+			data_size = DECODE_BLOCK_SIZE;
+		pos = data_size;
+
+		t_push();
+		if (ctx->content_qp) {
+			decoded = decodebuf = t_malloc(data_size);
+			decoded_size = quoted_printable_decode(data, &data_size,
+							       decodebuf);
+		} else if (ctx->content_base64) {
+			decoded_size = MAX_BASE64_DECODED_SIZE(data_size);
+			decoded = decodebuf = t_malloc(decoded_size);
+
+			ret = base64_decode(data, &data_size, decodebuf);
+			decoded_size = ret < 0 ? 0 : (size_t)decoded_size;
+		} else {
+			decoded = data;
+			decoded_size = data_size;
+		}
+
+		ret = message_search_body_block(ctx, decoded, decoded_size);
+		if (ret != 0) {
+			t_pop();
+			found = ret > 0;
+			break;
+		}
+
+		t_pop();
+		i_buffer_skip(inbuf, data_size);
+		pos -= data_size;
+	}
+
+	i_buffer_set_read_limit(inbuf, old_limit);
+
+	if (ctx->translation != NULL)
+		charset_to_utf8_end(ctx->translation);
+	return found;
+}
+
+static int message_body_search_init(BodySearchContext *ctx, const char *key,
+				    const char *charset, int *unknown_charset)
+{
+	size_t size;
+
+	memset(ctx, 0, sizeof(BodySearchContext));
+
+	/* get the key uppercased */
+	size = strlen(key);
+	key = charset_to_ucase_utf8_string(charset, unknown_charset,
+					   (const unsigned char *) key, &size);
+	if (key == NULL)
+		return FALSE;
+
+	i_assert(size <= SSIZE_T_MAX/sizeof(size_t));
+
+	ctx->key = key;
+	ctx->key_len = size;
+	ctx->charset = charset;
+	ctx->unknown_charset = charset == NULL;
+
+	return TRUE;
+}
+
+static int message_body_search_ctx(BodySearchContext *ctx, IBuffer *inbuf,
+				   MessagePart *part)
+{
+	PartSearchContext part_ctx;
+	int found;
+
+	found = FALSE;
+	while (part != NULL && !found) {
+		i_assert(inbuf->v_offset <= part->physical_pos);
+
+		i_buffer_skip(inbuf, part->physical_pos - inbuf->v_offset);
+
+		memset(&part_ctx, 0, sizeof(part_ctx));
+		part_ctx.body_ctx = ctx;
+
+		t_push();
+
+		if (message_search_header(&part_ctx, inbuf)) {
+			found = TRUE;
+		} else if (part->children != NULL) {
+			/* multipart/xxx or message/rfc822 */
+			if (message_body_search_ctx(ctx, inbuf, part->children))
+				found = TRUE;
+		} else {
+			if (message_search_body(&part_ctx, inbuf, part))
+				found = TRUE;
+		}
+
+		t_pop();
+
+		part = part->next;
+	}
+
+	return found;
+}
+
+int message_body_search(const char *key, const char *charset,
+			int *unknown_charset, IBuffer *inbuf,
+			MessagePart *part)
+{
+        BodySearchContext ctx;
+
+        if (!message_body_search_init(&ctx, key, charset, unknown_charset))
+		return -1;
+
+	return message_body_search_ctx(&ctx, inbuf, part);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-body-search.h	Wed Nov 13 13:08:18 2002 +0200
@@ -0,0 +1,12 @@
+#ifndef __MESSAGE_BODY_SEARCH_H
+#define __MESSAGE_BODY_SEARCH_H
+
+/* Returns 1 if key is found from input buffer, 0 if not and -1 if error.
+   There's two possible errors: either the charset is unknown or the key
+   is invalid. If charset is NULL, the key isn't assumed to be in any
+   specific charset but is compared to message data without any translation. */
+int message_body_search(const char *key, const char *charset,
+			int *unknown_charset, IBuffer *inbuf,
+			MessagePart *part);
+
+#endif
--- a/src/lib-mail/message-header-search.c	Wed Nov 13 13:01:11 2002 +0200
+++ b/src/lib-mail/message-header-search.c	Wed Nov 13 13:08:18 2002 +0200
@@ -2,15 +2,17 @@
 
 #include "lib.h"
 #include "base64.h"
-#include "hex-binary.h"
 #include "charset-utf8.h"
 #include "rfc822-tokenize.h"
+#include "quoted-printable.h"
 #include "message-header-search.h"
 
 #include <ctype.h>
 
 struct _HeaderSearchContext {
-	const unsigned char *key;
+	Pool pool;
+
+	unsigned char *key;
 	size_t key_len;
 
 	size_t *matches; /* size of strlen(key) */
@@ -30,56 +32,33 @@
 	size_t size;
 
 	ctx = p_new(pool, HeaderSearchContext, 1);
+	ctx->pool = pool;
 
 	/* get the key uppercased */
 	size = strlen(key);
-	ctx->key = charset_to_ucase_utf8((const unsigned char *) key, &size,
-					 charset, unknown_charset);
-	if (ctx->key == NULL)
+	key = charset_to_ucase_utf8_string(charset, unknown_charset,
+					   (const unsigned char *) key, &size);
+	if (key == NULL)
 		return NULL;
 
-	ctx->key = p_strdup(pool, ctx->key);
+	i_assert(size <= SSIZE_T_MAX/sizeof(size_t));
+
+	ctx->key = p_strdup(pool, key);
 	ctx->key_len = size;
 	ctx->unknown_charset = charset == NULL;
 
 	ctx->matches = p_malloc(pool, sizeof(size_t) * ctx->key_len);
-	i_assert(ctx->key_len <= SSIZE_T_MAX);
 	return ctx;
 }
 
-static size_t quoted_printable_decode(const unsigned char *src, size_t size,
-				      unsigned char *dest)
+void message_header_search_free(HeaderSearchContext *ctx)
 {
-	const unsigned char *end;
-	unsigned char *dest_start;
-	char hexbuf[3];
-
-	hexbuf[2] = '\0';
-
-	dest_start = dest;
-	end = src + size;
+	Pool pool;
 
-	for (; src != end; src++) {
-		if (*src == '_') {
-			*dest++ = ' ';
-			continue;
-		}
-
-		if (*src == '=' && src+2 < end) {
-			hexbuf[0] = src[1];
-			hexbuf[1] = src[2];
-
-			if (hex_to_binary(hexbuf, dest) == 1) {
-				dest++;
-				src += 2;
-				continue;
-			}
-		}
-
-		*dest++ = *src;
-	}
-
-	return (size_t) (dest - dest_start);
+	pool = ctx->pool;
+	p_free(pool, ctx->key);
+	p_free(pool, ctx->matches);
+	p_free(pool, ctx);
 }
 
 static int match_data(const unsigned char *data, size_t size,
@@ -93,8 +72,8 @@
 		charset = NULL;
 	}
 
-	data = (const unsigned char *) charset_to_ucase_utf8(data, &size,
-							     charset, NULL);
+	data = (const unsigned char *)
+		charset_to_ucase_utf8_string(charset, NULL, data, &size);
 	if (data == NULL) {
 		/* unknown character set, or invalid data */
 		return FALSE;
@@ -113,7 +92,7 @@
 	const unsigned char *p, *encoding, *text, *new_end;
 	const char *charset;
 	unsigned char *buf;
-	ssize_t size;
+	ssize_t size, buf_size;
 	int ok, ret;
 
 	/* first split the string =?charset?encoding?text?= */
@@ -154,12 +133,14 @@
 		t_push();
 
 		size = (ssize_t) (end - text);
-		buf = t_malloc(size);
+
+		buf_size = size;
+		buf = t_malloc(buf_size);
 
 		if (*encoding == 'Q')
-			size = quoted_printable_decode(text, size, buf);
+			size = quoted_printable_decode(text, &buf_size, buf);
 		else
-			size = base64_decode(text, size, buf);
+			size = base64_decode(text, &buf_size, buf);
 
 		if (size >= 0) {
 			/* non-corrupted encoding */
--- a/src/lib-mail/message-header-search.h	Wed Nov 13 13:01:11 2002 +0200
+++ b/src/lib-mail/message-header-search.h	Wed Nov 13 13:08:18 2002 +0200
@@ -3,12 +3,15 @@
 
 typedef struct _HeaderSearchContext HeaderSearchContext;
 
-/* Initialize new search. Allocates memory from data stack. Returns NULL
-   if charset is unknown or key is not valid in specified charset. */
+/* Initialize new search. Returns NULL if charset is unknown or key is not
+   valid in specified charset. */
 HeaderSearchContext *
 message_header_search_init(Pool pool, const char *key, const char *charset,
 			   int *unknown_charset);
 
+/* Free search context. Not needed if you just destroy the pool. */
+void message_header_search_free(HeaderSearchContext *ctx);
+
 /* Returns TRUE if key is found from header. This function may be called
    multiple times with partial header blocks, but the blocks must contain only
    full lines so RFC2047 parsing can be done. *header_size is updated to
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/quoted-printable.c	Wed Nov 13 13:08:18 2002 +0200
@@ -0,0 +1,44 @@
+/* Copyright (C) 2002 Timo Sirainen */
+
+#include "lib.h"
+#include "hex-binary.h"
+#include "quoted-printable.h"
+
+size_t quoted_printable_decode(const unsigned char *src, size_t *size,
+			       unsigned char *dest)
+{
+	const unsigned char *end;
+	unsigned char *dest_start;
+	char hexbuf[3];
+
+	hexbuf[2] = '\0';
+
+	dest_start = dest;
+	end = src + *size;
+
+	for (; src != end; src++) {
+		if (*src == '_') {
+			*dest++ = ' ';
+			continue;
+		}
+
+		if (*src == '=') {
+			if (src+2 >= end)
+				break;
+
+			hexbuf[0] = src[1];
+			hexbuf[1] = src[2];
+
+			if (hex_to_binary(hexbuf, dest) == 1) {
+				dest++;
+				src += 2;
+				continue;
+			}
+		}
+
+		*dest++ = *src;
+	}
+
+	*size -= (end-src);
+	return (size_t) (dest - dest_start);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/quoted-printable.h	Wed Nov 13 13:08:18 2002 +0200
@@ -0,0 +1,14 @@
+#ifndef __QUOTED_PRINTABLE_H
+#define __QUOTED_PRINTABLE_H
+
+/* Translates quoted printable data into binary. dest must be at least the
+   size of src, and may be same as src. Returns size of the binary data.
+   Decoding errors are ignored.
+
+   This function may be called multiple times for parsing same stream.
+   The *size is updated at return to contain the amount of data actually
+   parsed - the rest of the data should be passed again to this function. */
+size_t quoted_printable_decode(const unsigned char *src, size_t *size,
+			       unsigned char *dest);
+
+#endif
--- a/src/lib-storage/index/index-search.c	Wed Nov 13 13:01:11 2002 +0200
+++ b/src/lib-storage/index/index-search.c	Wed Nov 13 13:08:18 2002 +0200
@@ -7,6 +7,7 @@
 #include "rfc822-tokenize.h"
 #include "rfc822-date.h"
 #include "message-size.h"
+#include "message-body-search.h"
 #include "message-header-search.h"
 #include "imap-date.h"
 #include "imap-envelope.h"
@@ -24,6 +25,9 @@
 		(arg)->result = !(arg)->not ? (res) : -(res); \
 	} STMT_END
 
+#define TXT_UNKNOWN_CHARSET "Unknown charset"
+#define TXT_INVALID_SEARCH_KEY "Invalid search key"
+
 typedef struct {
 	Pool hdr_pool;
 	IndexMailbox *ibox;
@@ -44,12 +48,10 @@
 } SearchHeaderContext;
 
 typedef struct {
-	MailSearchArg *args;
-	const char *msg;
-	size_t size;
-
-	size_t max_searchword_len;
-} SearchTextContext;
+        SearchIndexContext *index_ctx;
+	IBuffer *inbuf;
+	MessagePart *part;
+} SearchBodyContext;
 
 static int msgset_contains(const char *set, unsigned int match_num,
 			   unsigned int max_num)
@@ -320,7 +322,7 @@
 						  &unknown_charset);
 	if (arg->context == NULL) {
 		ctx->error = unknown_charset ?
-			"Unknown charset" : "Invalid search key";
+			TXT_UNKNOWN_CHARSET : TXT_INVALID_SEARCH_KEY;
 	}
 
 	return arg->context;
@@ -519,72 +521,28 @@
 	}
 }
 
-static void search_text(MailSearchArg *arg, SearchTextContext *ctx)
+static void search_body(MailSearchArg *arg, void *context)
 {
-	const char *p;
-	size_t i, len, max;
+	SearchBodyContext *ctx = context;
+	int ret, unknown_charset;
 
-	if (arg->result != 0)
+	if (ctx->index_ctx->error != NULL)
 		return;
 
-	len = strlen(arg->value.str);
-	if (len > ctx->max_searchword_len)
-		ctx->max_searchword_len = len;
-
-	if (ctx->size >= len) {
-		max = ctx->size-len;
-		for (i = 0, p = ctx->msg; i <= max; i++, p++) {
-			if (i_toupper(*p) == arg->value.str[0] &&
-			    strncasecmp(p, arg->value.str, len) == 0) {
-				/* match */
-				ARG_SET_RESULT(arg, 1);
-				return;
-			}
-		}
-	}
-}
-
-static void search_text_body(MailSearchArg *arg, void *context)
-{
-	SearchTextContext *ctx = context;
-
-	if (arg->type == SEARCH_TEXT || arg->type == SEARCH_BODY)
-		search_text(arg, ctx);
-}
+	if (arg->type == SEARCH_TEXT || arg->type == SEARCH_BODY) {
+		i_buffer_seek(ctx->inbuf, 0);
+		ret = message_body_search(arg->value.str,
+					  ctx->index_ctx->charset,
+					  &unknown_charset, ctx->inbuf,
+					  ctx->part);
 
-static void search_arg_match_data(IBuffer *inbuf, MailSearchArg *args,
-				  MailSearchForeachFunc search_func)
-{
-	SearchTextContext ctx;
-	const unsigned char *data;
-	size_t size, max_searchword_len;
-
-	memset(&ctx, 0, sizeof(ctx));
-	ctx.args = args;
-
-	/* first get the max. search keyword length */
-	mail_search_args_foreach(args, search_func, &ctx);
-        max_searchword_len = ctx.max_searchword_len;
+		if (ret < 0) {
+			ctx->index_ctx->error = unknown_charset ?
+				TXT_UNKNOWN_CHARSET : TXT_INVALID_SEARCH_KEY;
+		}
 
-	/* do this in blocks: read data, compare it for all search words, skip
-	   for block size - (strlen(largest_searchword)-1) and continue. */
-	while (i_buffer_read_data(inbuf, &data, &size,
-				  max_searchword_len-1) > 0) {
-		ctx.msg = (const char *) data;
-		ctx.size = size;
-		mail_search_args_foreach(args, search_func, &ctx);
-		i_buffer_skip(inbuf, size - (max_searchword_len-1));
+		ARG_SET_RESULT(arg, ret > 0);
 	}
-
-	if (size > 0) {
-		/* last block */
-		ctx.msg = (const char *) data;
-		ctx.size = size;
-		mail_search_args_foreach(args, search_func, &ctx);
-		i_buffer_skip(inbuf, size);
-	}
-
-	i_buffer_set_read_limit(inbuf, 0);
 }
 
 static int search_arg_match_text(MailSearchArg *args, SearchIndexContext *ctx)
@@ -606,22 +564,23 @@
 		SearchHeaderContext hdr_ctx;
 
 		memset(&hdr_ctx, 0, sizeof(hdr_ctx));
-
-		/* header checks */
 		hdr_ctx.index_context = ctx;
 		hdr_ctx.custom_header = TRUE;
 		hdr_ctx.args = args;
+
 		message_parse_header(NULL, inbuf, &hdr_size,
 				     search_header, &hdr_ctx);
 	}
 
 	if (have_text || have_body) {
-		if (inbuf->v_offset == 0) {
-			/* skip over headers */
-			i_buffer_skip(inbuf, hdr_size.physical_size);
-		}
+		SearchBodyContext body_ctx;
 
-		search_arg_match_data(inbuf, args, search_text_body);
+		memset(&body_ctx, 0, sizeof(body_ctx));
+		body_ctx.index_ctx = ctx;
+		body_ctx.inbuf = inbuf;
+		body_ctx.part = imap_msgcache_get_parts(search_open_cache(ctx));
+
+		mail_search_args_foreach(args, search_body, &body_ctx);
 	}
 	return TRUE;
 }
--- a/src/lib/base64.c	Wed Nov 13 13:01:11 2002 +0200
+++ b/src/lib/base64.c	Wed Nov 13 13:08:18 2002 +0200
@@ -50,7 +50,8 @@
 	char *buffer, *p;
 	int c1, c2, c3;
 
-	buffer = p = t_malloc(size*2 + 5);
+	/* + rounding errors + "==" + '\0' */
+	buffer = p = t_malloc(size/3*4 + 2+2+1);
 	while (size > 0) {
 		c1 = *data++; size--;
 		*p++ = basis_64[c1 >> 2];
@@ -100,15 +101,21 @@
 };
 #define CHAR64(c)  (index_64[(int)(unsigned char)(c)])
 
-ssize_t base64_decode(const char *src, size_t size, unsigned char *dest)
+ssize_t base64_decode(const char *src, size_t *size, unsigned char *dest)
 {
 	unsigned char *p;
+	size_t left;
 	int c1, c2, c3, c4;
 
-	p = dest;
-	while (size >= 4) {
+	p = dest; left = *size;
+	while (left >= 4) {
 		c1 = *src++;
 
+		if (c1 == '\n' || c1 == '\r' || c1 == ' ' || c1 == '\t') {
+			left--;
+			continue;
+		}
+
 		if (CHAR64(c1) == XX)
 			return -1;
 
@@ -124,24 +131,22 @@
 		if (c4 != '=' && CHAR64(c4) == XX)
 			return -1;
 
-		size -= 4;
+		left -= 4;
 
 		*p++ = ((CHAR64(c1) << 2) | ((CHAR64(c2) & 0x30) >> 4));
 
 		if (c3 == '=') {
-			if (size != 0 || c4 != '=')
+			if (c4 != '=')
 				return -1;
 			break;
 		}
 
 		*p++ = (((CHAR64(c2) & 0xf) << 4) | ((CHAR64(c3) & 0x3c) >> 2));
-		if (c4 == '=') {
-			if (size != 0)
-				return -1;
+		if (c4 == '=')
 			break;
-		}
 		*p++ = (((CHAR64(c3) & 0x3) << 6) | CHAR64(c4));
 	}
 
+	*size -= left;
 	return (ssize_t) (p-dest);
 }
--- a/src/lib/base64.h	Wed Nov 13 13:01:11 2002 +0200
+++ b/src/lib/base64.h	Wed Nov 13 13:08:18 2002 +0200
@@ -5,7 +5,17 @@
 const char *base64_encode(const unsigned char *data, size_t size);
 
 /* Translates base64 data into binary. dest must be large enough, and may be
-   same as src. Returns size of the binary data, or -1 if error occured. */
-ssize_t base64_decode(const char *src, size_t size, unsigned char *dest);
+   same as src. Returns size of the binary data, or -1 if error occured.
+   Any CR, LF characters are ignored, as well as whitespace at beginning or
+   end of line.
+
+   This function may be called multiple times for parsing same base64 stream.
+   The *size is updated at return to contain the amount of data actually
+   parsed - the rest of the data should be passed again to this function. */
+ssize_t base64_decode(const char *src, size_t *size, unsigned char *dest);
+
+/* max. buffer size required for base64_decode(), not including trailing \0 */
+#define MAX_BASE64_DECODED_SIZE(size) \
+	((size) / 4 * 3 + 3)
 
 #endif
--- a/src/login/client-authenticate.c	Wed Nov 13 13:01:11 2002 +0200
+++ b/src/login/client-authenticate.c	Wed Nov 13 13:08:18 2002 +0200
@@ -237,7 +237,7 @@
 {
 	Client *client = context;
 	char *line;
-	ssize_t size;
+	ssize_t size, linelen;
 
 	if (!client_read(client))
 		return;
@@ -251,7 +251,8 @@
 		return;
 	}
 
-	size = base64_decode(line, strlen(line), (unsigned char *) line);
+	linelen = strlen(line);
+	size = base64_decode(line, &linelen, (unsigned char *) line);
 	if (size < 0) {
 		/* failed */
 		client_auth_abort(client, "NO Invalid base64 data");