changeset 568:f2aa58c2afd0 HEAD

SEARCH CHARSET support. Currently we do it through iconv() and only ASCII characters are compared case-insensitively.
author Timo Sirainen <tss@iki.fi>
date Sun, 03 Nov 2002 10:39:43 +0200
parents a59f68334ad6
children cafe57140f5c
files configure.in src/Makefile.am src/imap/Makefile.am src/imap/cmd-search.c src/lib-charset/.cvsignore src/lib-charset/Makefile.am src/lib-charset/charset-ascii.c src/lib-charset/charset-iconv.c src/lib-charset/charset-utf8.h src/lib-imap/imap-message-cache.c src/lib-mail/Makefile.am src/lib-mail/message-header-search.c src/lib-mail/message-header-search.h src/lib-storage/index/index-search.c src/lib-storage/index/index-storage.h src/lib-storage/mail-search.c src/lib-storage/mail-search.h src/lib-storage/mail-storage.h
diffstat 18 files changed, 525 insertions(+), 92 deletions(-) [+]
line wrap: on
line diff
--- a/configure.in	Sun Nov 03 10:16:41 2002 +0200
+++ b/configure.in	Sun Nov 03 10:39:43 2002 +0200
@@ -14,7 +14,7 @@
 AM_PROG_LIBTOOL
 
 AC_CHECK_HEADERS(string.h stdlib.h unistd.h dirent.h)
-AC_CHECK_HEADERS(sys/uio.h linux/mman.h)
+AC_CHECK_HEADERS(sys/uio.h iconv.h linux/mman.h)
 
 # check posix headers
 AC_CHECK_HEADERS(sys/time.h)
@@ -510,6 +510,7 @@
 doc/Makefile
 src/Makefile
 src/lib/Makefile
+src/lib-charset/Makefile
 src/lib-imap/Makefile
 src/lib-index/Makefile
 src/lib-index/maildir/Makefile
--- a/src/Makefile.am	Sun Nov 03 10:16:41 2002 +0200
+++ b/src/Makefile.am	Sun Nov 03 10:39:43 2002 +0200
@@ -1,1 +1,1 @@
-SUBDIRS = lib lib-mail lib-imap lib-index lib-storage auth master login imap
+SUBDIRS = lib lib-charset lib-mail lib-imap lib-index lib-storage auth master login imap
--- a/src/imap/Makefile.am	Sun Nov 03 10:16:41 2002 +0200
+++ b/src/imap/Makefile.am	Sun Nov 03 10:39:43 2002 +0200
@@ -17,6 +17,7 @@
 	../lib-storage/subscription-file/libstorage_subscription_file.a \
 	../lib-imap/libimap.a \
 	../lib-mail/libmail.a \
+	../lib-charset/libcharset.a \
 	../lib/liblib.a
 
 cmds = \
--- a/src/imap/cmd-search.c	Sun Nov 03 10:16:41 2002 +0200
+++ b/src/imap/cmd-search.c	Sun Nov 03 10:39:43 2002 +0200
@@ -10,7 +10,7 @@
 	ImapArg *args;
 	int args_count;
 	Pool pool;
-	const char *error;
+	const char *error, *charset;
 
 	args_count = imap_parser_read_args(client->parser, 0, 0, &args);
 	if (args_count == -2)
@@ -25,6 +25,23 @@
 	if (!client_verify_open_mailbox(client))
 		return TRUE;
 
+	if (args->type == IMAP_ARG_ATOM &&
+	    strcasecmp(args->data.str, "CHARSET") == 0) {
+		/* CHARSET specified */
+		args++;
+		if (args->type != IMAP_ARG_ATOM &&
+		    args->type != IMAP_ARG_STRING) {
+			client_send_command_error(client,
+						  "Invalid charset argument.");
+			return TRUE;
+		}
+
+		charset = args->data.str;
+		args++;
+	} else {
+		charset = NULL;
+	}
+
 	pool = pool_create("MailSearchArgs", 2048, FALSE);
 
 	sargs = mail_search_args_build(pool, args, &error);
@@ -32,7 +49,7 @@
 		/* error in search arguments */
 		client_send_tagline(client, t_strconcat("NO ", error, NULL));
 	} else {
-		if (client->mailbox->search(client->mailbox, sargs,
+		if (client->mailbox->search(client->mailbox, charset, sargs,
 					    client->outbuf, client->cmd_uid)) {
 			/* NOTE: syncing isn't allowed here */
 			client_sync_without_expunges(client);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-charset/.cvsignore	Sun Nov 03 10:39:43 2002 +0200
@@ -0,0 +1,8 @@
+*.la
+*.lo
+*.o
+.deps
+.libs
+Makefile
+Makefile.in
+so_locations
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-charset/Makefile.am	Sun Nov 03 10:39:43 2002 +0200
@@ -0,0 +1,11 @@
+noinst_LIBRARIES = libcharset.a
+
+INCLUDES = \
+	-I$(top_srcdir)/src/lib
+
+libcharset_a_SOURCES = \
+	charset-ascii.c \
+	charset-iconv.c
+
+noinst_HEADERS = \
+	charset-utf8.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-charset/charset-ascii.c	Sun Nov 03 10:39:43 2002 +0200
@@ -0,0 +1,18 @@
+/* Copyright (C) 2002 Timo Sirainen */
+
+#include "lib.h"
+
+#ifndef HAVE_ICONV_H
+
+const char *charset_to_ucase_utf8(const unsigned char *data, size_t *size,
+				  const char *charset, int *unknown_charset)
+{
+	if (charset == NULL || strcasecmp(charset, "us-ascii") == 0)
+		return str_ucase(t_strdup_noconst(data));
+
+	if (unknown_charset != NULL)
+		*unknown_charset = TRUE;
+	return NULL;
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-charset/charset-iconv.c	Sun Nov 03 10:39:43 2002 +0200
@@ -0,0 +1,62 @@
+/* Copyright (C) 2002 Timo Sirainen */
+
+#include "lib.h"
+
+#ifdef HAVE_ICONV_H
+
+#include <iconv.h>
+
+const char *charset_to_ucase_utf8(const unsigned char *data, size_t *size,
+				  const char *charset, int *unknown_charset)
+{
+	iconv_t cd;
+	char *inbuf, *outbuf, *outpos;
+	size_t inleft, outleft, outsize, pos;
+
+	if (charset == NULL || strcasecmp(charset, "us-ascii") == 0)
+		return str_ucase(t_strdup_noconst(data));
+
+	cd = iconv_open("UTF8", charset);
+	if (cd == (iconv_t)-1) {
+		if (unknown_charset != NULL)
+			*unknown_charset = TRUE;
+		return NULL;
+	}
+
+	if (unknown_charset != NULL)
+		*unknown_charset = FALSE;
+
+	inbuf = (char *) data;
+	inleft = *size;
+
+	outsize = outleft = *size * 2;
+	outbuf = outpos = t_buffer_get(outsize + 1);
+
+	while (iconv(cd, &inbuf, &inleft, &outpos, &outleft) == (size_t)-1) {
+		if (errno != E2BIG) {
+			/* invalid data */
+			iconv_close(cd);
+			return NULL;
+		}
+
+		/* output buffer too small, grow it */
+		pos = outsize - outleft;
+		outsize *= 2;
+		outleft = outsize - pos;
+
+		outbuf = t_buffer_reget(outbuf, outsize + 1);
+		outpos = outbuf + pos;
+	}
+
+	*size = (size_t) (outpos - outbuf);
+	*outpos++ = '\0';
+	t_buffer_alloc(*size + 1);
+
+	/* FIXME: this works only for ASCII */
+	str_ucase(outbuf);
+
+	iconv_close(cd);
+	return outbuf;
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-charset/charset-utf8.h	Sun Nov 03 10:39:43 2002 +0200
@@ -0,0 +1,7 @@
+#ifndef __CHARSET_UTF8_H
+#define __CHARSET_UTF8_H
+
+const char *charset_to_ucase_utf8(const unsigned char *data, size_t *size,
+				  const char *charset, int *unknown_charset);
+
+#endif
--- a/src/lib-imap/imap-message-cache.c	Sun Nov 03 10:16:41 2002 +0200
+++ b/src/lib-imap/imap-message-cache.c	Sun Nov 03 10:39:43 2002 +0200
@@ -477,6 +477,8 @@
 
 	msg = cache->open_msg;
 	if (inbuf != NULL) {
+		if (msg->hdr_size == NULL)
+			cache_fields(cache, IMAP_CACHE_MESSAGE_HDR_SIZE);
 		offset = hdr_size != NULL ? 0 :
 			msg->hdr_size->physical_size;
 		if (!imap_msgcache_get_inbuf(cache, offset))
--- a/src/lib-mail/Makefile.am	Sun Nov 03 10:16:41 2002 +0200
+++ b/src/lib-mail/Makefile.am	Sun Nov 03 10:39:43 2002 +0200
@@ -1,10 +1,12 @@
 noinst_LIBRARIES = libmail.a
 
 INCLUDES = \
-	-I$(top_srcdir)/src/lib
+	-I$(top_srcdir)/src/lib \
+	-I$(top_srcdir)/src/lib-charset
 
 libmail_a_SOURCES = \
 	message-content-parser.c \
+	message-header-search.c \
 	message-parser.c \
 	message-part-serialize.c \
 	message-send.c \
@@ -15,6 +17,7 @@
 
 noinst_HEADERS = \
 	message-content-parser.h \
+	message-header-search.h \
 	message-parser.h \
 	message-part-serialize.h \
 	message-send.h \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-header-search.c	Sun Nov 03 10:39:43 2002 +0200
@@ -0,0 +1,279 @@
+/* Copyright (C) 2002 Timo Sirainen */
+
+#include "lib.h"
+#include "base64.h"
+#include "hex-binary.h"
+#include "charset-utf8.h"
+#include "rfc822-tokenize.h"
+#include "message-header-search.h"
+
+#include <ctype.h>
+
+struct _HeaderSearchContext {
+	const unsigned char *key;
+	size_t key_len;
+
+	size_t *matches; /* size of strlen(key) */
+	ssize_t match_count;
+
+	unsigned int last_newline:1;
+	unsigned int submatch:1;
+	unsigned int eoh:1;
+	unsigned int unknown_charset:1;
+};
+
+HeaderSearchContext *
+message_header_search_init(Pool pool, const char *key, const char *charset,
+			   int *unknown_charset)
+{
+	HeaderSearchContext *ctx;
+	size_t size;
+
+	ctx = p_new(pool, HeaderSearchContext, 1);
+
+	/* get the key uppercased */
+	size = strlen(key);
+	ctx->key = charset_to_ucase_utf8((const unsigned char *) key, &size,
+					 charset, unknown_charset);
+	if (ctx->key == NULL)
+		return NULL;
+
+	ctx->key = p_strdup(pool, ctx->key);
+	ctx->key_len = size;
+	ctx->unknown_charset = charset == NULL;
+
+	ctx->matches = p_malloc(pool, sizeof(size_t) * ctx->key_len);
+	i_assert(ctx->key_len <= SSIZE_T_MAX);
+	return ctx;
+}
+
+static size_t quoted_printable_decode(const unsigned char *src, size_t size,
+				      unsigned char *dest)
+{
+	const unsigned char *end;
+	unsigned char *dest_start;
+	char hexbuf[3];
+
+	hexbuf[2] = '\0';
+
+	dest_start = dest;
+	end = src + size;
+
+	for (; src != end; src++) {
+		if (*src == '_') {
+			*dest++ = ' ';
+			continue;
+		}
+
+		if (*src == '=' && src+2 < end) {
+			hexbuf[0] = src[1];
+			hexbuf[1] = src[2];
+
+			if (hex_to_binary(hexbuf, dest) == 1) {
+				dest++;
+				src += 2;
+				continue;
+			}
+		}
+
+		*dest++ = *src;
+	}
+
+	return (size_t) (dest - dest_start);
+}
+
+static int match_data(const unsigned char *data, size_t size,
+		      const char *charset, HeaderSearchContext *ctx)
+{
+	int ret;
+
+	if (ctx->unknown_charset) {
+		/* we don't know the source charset, so assume we want to
+		   match using same charsets */
+		charset = NULL;
+	}
+
+	data = (const unsigned char *) charset_to_ucase_utf8(data, &size,
+							     charset, NULL);
+	if (data == NULL) {
+		/* unknown character set, or invalid data */
+		return FALSE;
+	}
+
+	ctx->submatch = TRUE;
+	ret = message_header_search(data, &size, ctx);
+	ctx->submatch = FALSE;
+
+	return ret;
+}
+
+static int match_encoded(const unsigned char **start, const unsigned char *end,
+			 HeaderSearchContext *ctx)
+{
+	const unsigned char *p, *encoding, *text, *new_end;
+	const char *charset;
+	unsigned char *buf;
+	ssize_t size;
+	int ok, ret;
+
+	/* first split the string =?charset?encoding?text?= */
+	ok = FALSE;
+	charset = (const char *) *start; encoding = NULL; text = NULL;
+	for (p = *start; p != end; p++) {
+		if (*p == '?') {
+			if (encoding == NULL) {
+				charset = t_strdup_until(charset, p);
+				encoding = p+1;
+			} else if (text == NULL) {
+				if (p != encoding+1)
+					encoding = "?";
+				else if (*encoding == 'Q' || *encoding == 'q')
+					encoding = "Q";
+				else if (*encoding == 'B' || *encoding == 'b')
+					encoding = "B";
+				else
+					encoding = "?";
+
+				text = p+1;
+			} else {
+				new_end = p;
+
+				p++;
+				if (p != end && *p == '=')
+					p++;
+
+				end = new_end;
+				*start = p-1;
+				ok = TRUE;
+				break;
+			}
+		}
+	}
+
+	if (ok && *encoding != '?') {
+		t_push();
+
+		size = (ssize_t) (end - text);
+		buf = t_malloc(size);
+
+		if (*encoding == 'Q')
+			size = quoted_printable_decode(text, size, buf);
+		else
+			size = base64_decode(text, size, buf);
+
+		if (size >= 0) {
+			/* non-corrupted encoding */
+			ret = match_data(buf, size, charset, ctx);
+			t_pop();
+			return ret;
+		}
+
+		t_pop();
+	}
+
+	/* non-supported encoding, we can't match it */
+	ctx->match_count = 0;
+	return FALSE;
+}
+
+int message_header_search(const unsigned char *header_block,
+			  size_t *header_size, HeaderSearchContext *ctx)
+{
+	const unsigned char *p, *end;
+	unsigned char chr;
+	ssize_t i;
+	int found;
+
+	if (ctx->eoh || *header_size == 0)
+		return FALSE;
+
+	end = header_block + *header_size;
+
+	found = FALSE;
+	for (p = header_block; p != end; p++) {
+		if (p[0] == '=' && p+1 != end && p[1] == '?' &&
+		    !ctx->submatch) {
+			/* encoded string. read it. */
+			p += 2;
+			if (match_encoded(&p, end, ctx)) {
+				found = TRUE;
+				break;
+			}
+
+			i_assert(p != end);
+			continue;
+		}
+
+		chr = ctx->submatch || (*p & 0x80) != 0 ? *p : i_toupper(*p);
+
+		if (((p == header_block && ctx->last_newline) ||
+		     (p != header_block && p[-1] == '\n')) && !ctx->submatch) {
+			/* newline */
+			if (!IS_LWSP(*p)) {
+				/* not a long header, reset matches */
+				ctx->match_count = 0;
+
+				/* and see if we're at end of header */
+				if (*p == '\n') {
+					p++;
+					ctx->eoh = TRUE;
+					break;
+				}
+
+				if (*p == '\r' && p[1] == '\n') {
+					p += 2;
+					ctx->eoh = TRUE;
+					break;
+				}
+			}
+			chr = ' ';
+		}
+
+		if (*p == '\r' || *p == '\n')
+			continue;
+
+		for (i = ctx->match_count-1; i >= 0; i--) {
+			if (ctx->key[ctx->matches[i]] == chr) {
+				if (++ctx->matches[i] == ctx->key_len) {
+					/* full match */
+					p++;
+					found = TRUE;
+					break;
+				}
+			} else {
+				/* non-match */
+				ctx->match_count--;
+				if (i != ctx->match_count) {
+					memmove(ctx->matches + i,
+						ctx->matches + i + 1,
+						ctx->match_count - i);
+				}
+			}
+		}
+
+		if (found)
+			break;
+
+		if (chr == ctx->key[0]) {
+			if (ctx->key_len == 1) {
+				/* only one character in search key */
+				p++;
+				found = TRUE;
+				break;
+			}
+			i_assert((size_t)ctx->match_count < ctx->key_len);
+			ctx->matches[ctx->match_count++] = 1;
+		}
+	}
+
+	*header_size = (size_t) (p - header_block);
+
+	ctx->last_newline = end[-1] == '\n';
+	return found;
+}
+
+void message_header_search_reset(HeaderSearchContext *ctx)
+{
+	ctx->eoh = FALSE;
+	ctx->match_count = 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/message-header-search.h	Sun Nov 03 10:39:43 2002 +0200
@@ -0,0 +1,23 @@
+#ifndef __MESSAGE_HEADER_SEARCH_H
+#define __MESSAGE_HEADER_SEARCH_H
+
+typedef struct _HeaderSearchContext HeaderSearchContext;
+
+/* Initialize new search. Allocates memory from data stack. Returns NULL
+   if charset is unknown or key is not valid in specified charset. */
+HeaderSearchContext *
+message_header_search_init(Pool pool, const char *key, const char *charset,
+			   int *unknown_charset);
+
+/* Returns TRUE if key is found from header. This function may be called
+   multiple times with partial header blocks, but the blocks must contain only
+   full lines so RFC2047 parsing can be done. *header_size is updated to
+   contain the number of bytes we didn't access (either because we got a match,
+   or because end of headers). */
+int message_header_search(const unsigned char *header_block,
+			  size_t *header_size, HeaderSearchContext *ctx);
+
+/* Next call to message_header_search() will begin a new header. */
+void message_header_search_reset(HeaderSearchContext *ctx);
+
+#endif
--- a/src/lib-storage/index/index-search.c	Sun Nov 03 10:16:41 2002 +0200
+++ b/src/lib-storage/index/index-search.c	Sun Nov 03 10:39:43 2002 +0200
@@ -7,6 +7,7 @@
 #include "rfc822-tokenize.h"
 #include "rfc822-date.h"
 #include "message-size.h"
+#include "message-header-search.h"
 #include "imap-date.h"
 #include "imap-envelope.h"
 #include "index-storage.h"
@@ -24,13 +25,17 @@
 	} STMT_END
 
 typedef struct {
+	Pool hdr_pool;
 	IndexMailbox *ibox;
 	MailIndexRecord *rec;
 	unsigned int client_seq;
 	int cached;
+	const char *charset;
+	const char *error;
 } SearchIndexContext;
 
 typedef struct {
+        SearchIndexContext *index_context;
 	MailSearchArg *args;
 	int custom_header;
 
@@ -295,30 +300,44 @@
 	}
 }
 
-static int search_substr(const char *haystack, const char *needle)
+static HeaderSearchContext *search_header_context(SearchIndexContext *ctx,
+						  MailSearchArg *arg)
 {
-	size_t i, needle_len;
+	int unknown_charset;
 
-	/* note: needle is already uppercased */
-	needle_len = strlen(needle);
-	for (i = 0; haystack[i] != '\0'; i++) {
-		if (needle[0] == i_toupper(haystack[i]) &&
-		    strncasecmp(needle, haystack+i, needle_len) == 0)
-			return 1;
+	if (arg->context != NULL) {
+                message_header_search_reset(arg->context);
+		return arg->context;
 	}
 
-	return 0;
+	if (ctx->hdr_pool == NULL) {
+		ctx->hdr_pool = pool_create("message_header_search",
+					    8192, FALSE);
+	}
+
+	arg->context = message_header_search_init(ctx->hdr_pool, arg->value.str,
+						  ctx->charset,
+						  &unknown_charset);
+	if (arg->context == NULL) {
+		ctx->error = unknown_charset ?
+			"Unknown charset" : "Invalid search key";
+	}
+
+	return arg->context;
 }
 
 /* Returns >0 = matched, 0 = not matched, -1 = unknown */
-static int search_arg_match_envelope(MailIndex *index, MailIndexRecord *rec,
-				     MailSearchArgType type, const char *value)
+static int search_arg_match_envelope(SearchIndexContext *ctx,
+				     MailSearchArg *arg)
 {
-        ImapEnvelopeField env_field;
+	MailIndex *index = ctx->ibox->index;
+	ImapEnvelopeField env_field;
+        HeaderSearchContext *hdr_search_ctx;
 	const char *envelope, *field;
+	size_t size;
 	int ret;
 
-	switch (type) {
+	switch (arg->type) {
 	case SEARCH_SENTBEFORE:
 	case SEARCH_SENTON:
 	case SEARCH_SENTSINCE:
@@ -354,7 +373,7 @@
 	t_push();
 
 	/* get field from hopefully cached envelope */
-	envelope = index->lookup_field(index, rec, DATA_FIELD_ENVELOPE);
+	envelope = index->lookup_field(index, ctx->rec, DATA_FIELD_ENVELOPE);
 	if (envelope != NULL)
 		field = imap_envelope_parse(envelope, env_field);
 	else {
@@ -365,13 +384,21 @@
 	if (field == NULL)
 		ret = -1;
 	else {
-		switch (type) {
+		switch (arg->type) {
 		case SEARCH_SENTBEFORE:
 		case SEARCH_SENTON:
 		case SEARCH_SENTSINCE:
-			ret = search_sent(type, value, field);
+			ret = search_sent(arg->type, arg->value.str, field);
 		default:
-			ret = search_substr(field, value);
+			hdr_search_ctx = search_header_context(ctx, arg);
+			if (hdr_search_ctx == NULL) {
+				ret = 0;
+				break;
+			}
+
+			size = strlen(field);
+			ret = message_header_search(field, &size,
+						    hdr_search_ctx) ? 1 : 0;
 		}
 	}
 	t_pop();
@@ -382,8 +409,7 @@
 {
 	SearchIndexContext *ctx = context;
 
-	switch (search_arg_match_envelope(ctx->ibox->index, ctx->rec,
-					  arg->type, arg->value.str)) {
+	switch (search_arg_match_envelope(ctx, arg)) {
 	case -1:
 		/* unknown */
 		break;
@@ -396,52 +422,10 @@
 	}
 }
 
-/* needle must be uppercased */
-static int header_value_match(const char *haystack, size_t haystack_len,
-			      const char *needle)
-{
-	const char *n;
-	size_t i, j, needle_len, max;
-
-	if (*needle == '\0')
-		return TRUE;
-
-	needle_len = strlen(needle);
-	if (haystack_len < needle_len)
-		return FALSE;
-
-	max = haystack_len - needle_len;
-	for (i = 0; i <= max; i++) {
-		if (needle[0] != i_toupper(haystack[i]))
-			continue;
-
-		for (j = i, n = needle; j < haystack_len; j++) {
-			if (haystack[j] == '\r') {
-				if (j+1 != haystack_len)
-					j++;
-			}
-
-			if (haystack[j] == '\n' && j+1 < haystack_len &&
-			    IS_LWSP(haystack[j+1])) {
-				/* long header continuation */
-				j++;
-			}
-
-			if (*n++ != i_toupper(haystack[j]))
-				break;
-
-			if (*n == '\0')
-				return 1;
-		}
-	}
-
-	return -1;
-}
-
 static void search_header_arg(MailSearchArg *arg, void *context)
 {
 	SearchHeaderContext *ctx = context;
-	const char *value;
+        HeaderSearchContext *hdr_search_ctx;
 	size_t len;
 	int ret;
 
@@ -462,47 +446,48 @@
 		if (ctx->name_len != 4 ||
 		    strncasecmp(ctx->name, "From", 4) != 0)
 			return;
-		value = arg->value.str;
 		break;
 	case SEARCH_TO:
 		if (ctx->name_len != 2 ||
 		    strncasecmp(ctx->name, "To", 2) != 0)
 			return;
-		value = arg->value.str;
 		break;
 	case SEARCH_CC:
 		if (ctx->name_len != 2 ||
 		    strncasecmp(ctx->name, "Cc", 2) != 0)
 			return;
-		value = arg->value.str;
 		break;
 	case SEARCH_BCC:
 		if (ctx->name_len != 3 ||
 		    strncasecmp(ctx->name, "Bcc", 3) != 0)
 			return;
-		value = arg->value.str;
 		break;
 	case SEARCH_SUBJECT:
 		if (ctx->name_len != 7 ||
 		    strncasecmp(ctx->name, "Subject", 7) != 0)
 			return;
-		value = arg->value.str;
 		break;
 	case SEARCH_HEADER:
 		ctx->custom_header = TRUE;
 
-		len = strlen(arg->value.str);
+		len = strlen(arg->hdr_field_name);
 		if (ctx->name_len != len ||
-		    strncasecmp(ctx->name, arg->value.str, len) != 0)
+		    strncasecmp(ctx->name, arg->hdr_field_name, len) != 0)
 			return;
-
-		value = arg->hdr_value;
 	default:
 		return;
 	}
 
 	/* then check if the value matches */
-	ret = header_value_match(ctx->value, ctx->value_len, value);
+	hdr_search_ctx = search_header_context(ctx->index_context, arg);
+	if (hdr_search_ctx == NULL)
+		ret = 0;
+	else {
+		len = ctx->value_len;
+		ret = message_header_search(ctx->value, &len,
+					    hdr_search_ctx) ? 1 : 0;
+	}
+
         ARG_SET_RESULT(arg, ret);
 }
 
@@ -623,15 +608,16 @@
 		return FALSE;
 
 	if (have_headers) {
-		SearchHeaderContext ctx;
+		SearchHeaderContext hdr_ctx;
 
-		memset(&ctx, 0, sizeof(ctx));
+		memset(&hdr_ctx, 0, sizeof(hdr_ctx));
 
 		/* header checks */
-		ctx.custom_header = TRUE;
-		ctx.args = args;
+		hdr_ctx.index_context = ctx;
+		hdr_ctx.custom_header = TRUE;
+		hdr_ctx.args = args;
 		message_parse_header(NULL, inbuf, &hdr_size,
-				     search_header, &ctx);
+				     search_header, &hdr_ctx);
 	}
 
 	if (have_text) {
@@ -822,8 +808,8 @@
 	return TRUE;
 }
 
-static int search_messages(IndexMailbox *ibox, MailSearchArg *args,
-			   OBuffer *outbuf, int uid_result)
+static int search_messages(IndexMailbox *ibox, const char *charset,
+			   MailSearchArg *args, OBuffer *outbuf, int uid_result)
 {
 	SearchIndexContext ctx;
 	MailIndexRecord *rec;
@@ -850,7 +836,10 @@
 						   &expunges_before);
 	client_seq += expunges_before;
 
+	memset(&ctx, 0, sizeof(ctx));
 	ctx.ibox = ibox;
+	ctx.charset = charset;
+
 	for (; rec != NULL && rec->uid <= last_uid; client_seq++) {
 		while (expunges->uid1 != 0 && expunges->uid1 < rec->uid) {
 			i_assert(expunges->uid2 < rec->uid);
@@ -875,6 +864,9 @@
                 imap_msgcache_close(ibox->cache);
 		t_pop();
 
+		if (ctx.error != NULL)
+			break;
+
 		if (!failed) {
 			found = TRUE;
 			for (arg = args; arg != NULL; arg = arg->next) {
@@ -894,10 +886,15 @@
 		rec = ibox->index->next(ibox->index, rec);
 	}
 
-	return TRUE;
+	if (ctx.hdr_pool != NULL)
+		pool_unref(ctx.hdr_pool);
+
+	if (ctx.error != NULL)
+		mail_storage_set_error(ibox->box.storage, "%s", ctx.error);
+	return ctx.error == NULL;
 }
 
-int index_storage_search(Mailbox *box, MailSearchArg *args,
+int index_storage_search(Mailbox *box, const char *charset, MailSearchArg *args,
 			 OBuffer *outbuf, int uid_result)
 {
 	IndexMailbox *ibox = (IndexMailbox *) box;
@@ -907,7 +904,7 @@
 		return FALSE;
 
 	o_buffer_send(outbuf, "* SEARCH", 8);
-	failed = !search_messages(ibox, args, outbuf, uid_result);
+	failed = !search_messages(ibox, charset, args, outbuf, uid_result);
 	o_buffer_send(outbuf, "\r\n", 2);
 
 	if (!ibox->index->set_lock(ibox->index, MAIL_LOCK_UNLOCK))
--- a/src/lib-storage/index/index-storage.h	Sun Nov 03 10:16:41 2002 +0200
+++ b/src/lib-storage/index/index-storage.h	Sun Nov 03 10:39:43 2002 +0200
@@ -73,7 +73,7 @@
 			       int *all_found);
 int index_storage_fetch(Mailbox *box, MailFetchData *fetch_data,
 			OBuffer *outbuf, int *all_found);
-int index_storage_search(Mailbox *box, MailSearchArg *args,
+int index_storage_search(Mailbox *box, const char *charset, MailSearchArg *args,
 			 OBuffer *outbuf, int uid_result);
 
 #endif
--- a/src/lib-storage/mail-search.c	Sun Nov 03 10:16:41 2002 +0200
+++ b/src/lib-storage/mail-search.c	Sun Nov 03 10:39:43 2002 +0200
@@ -46,7 +46,8 @@
 			return FALSE;
 		}
 
-		sarg->hdr_value = str_ucase((*args)->data.str);
+                sarg->hdr_field_name = sarg->value.str;
+		sarg->value.str = str_ucase((*args)->data.str);
 		*args += 1;
 	}
 
--- a/src/lib-storage/mail-search.h	Sun Nov 03 10:16:41 2002 +0200
+++ b/src/lib-storage/mail-search.h	Sun Nov 03 10:39:43 2002 +0200
@@ -60,7 +60,8 @@
 		const char *str;
 	} value;
 
-	const char *hdr_value; /* for SEARCH_HEADER */
+        void *context;
+	const char *hdr_field_name; /* for SEARCH_HEADER */
 	unsigned int not:1;
 
 	int result;
--- a/src/lib-storage/mail-storage.h	Sun Nov 03 10:16:41 2002 +0200
+++ b/src/lib-storage/mail-storage.h	Sun Nov 03 10:39:43 2002 +0200
@@ -155,8 +155,10 @@
 		     OBuffer *outbuf, int *all_found);
 
 	/* Search wanted mail data. args contains the search criteria.
-	   results are written into outbuf in RFC2060 SEARCH format. */
-	int (*search)(Mailbox *box, MailSearchArg *args,
+	   Results are written into outbuf in RFC2060 SEARCH format.
+	   If charset is NULL, the given search strings are matched without
+	   any conversion. */
+	int (*search)(Mailbox *box, const char *charset, MailSearchArg *args,
 		      OBuffer *outbuf, int uid_result);
 
 	/* Save a new mail into mailbox. timezone_offset specifies the