changeset 3039:d6910d273852 HEAD

Added rfc822 parser which will probably replace message_tokenizer at some point. Rewrote address parsing with the new parser. This fixes various bugs in it.
author Timo Sirainen <tss@iki.fi>
date Thu, 06 Jan 2005 20:14:28 +0200
parents 2f7ffdcd7d67
children 50acbcc7e4d8
files src/imap/imap-sort.c src/lib-imap/imap-envelope.c src/lib-mail/Makefile.am src/lib-mail/message-address.c src/lib-mail/message-address.h src/lib-mail/rfc822-parser.c src/lib-mail/rfc822-parser.h src/lib-storage/index/index-search.c
diffstat 8 files changed, 574 insertions(+), 215 deletions(-) [+]
line wrap: on
line diff
--- a/src/imap/imap-sort.c	Thu Jan 06 20:09:08 2005 +0200
+++ b/src/imap/imap-sort.c	Thu Jan 06 20:14:28 2005 +0200
@@ -295,8 +295,8 @@
 		return NULL;
 
 	addr = message_address_parse(pool_datastack_create(),
-				     (const unsigned char *) str,
-				     (size_t)-1, 1);
+				     (const unsigned char *)str,
+				     strlen(str), 1);
 	return addr != NULL ? addr->mailbox : NULL;
 }
 
--- a/src/lib-imap/imap-envelope.c	Thu Jan 06 20:09:08 2005 +0200
+++ b/src/lib-imap/imap-envelope.c	Thu Jan 06 20:14:28 2005 +0200
@@ -145,7 +145,8 @@
 
 	if (addr_p != NULL) {
 		*addr_p = message_address_parse(pool, hdr->full_value,
-						hdr->full_value_len, 0);
+						hdr->full_value_len,
+						(unsigned int)-1);
 	}
 
 	if (str_p != NULL)
--- a/src/lib-mail/Makefile.am	Thu Jan 06 20:09:08 2005 +0200
+++ b/src/lib-mail/Makefile.am	Thu Jan 06 20:14:28 2005 +0200
@@ -17,7 +17,8 @@
 	message-send.c \
 	message-size.c \
 	message-tokenize.c \
-	quoted-printable.c
+	quoted-printable.c \
+	rfc822-parser.c
 
 noinst_HEADERS = \
 	istream-header-filter.h \
@@ -33,4 +34,5 @@
 	message-send.h \
 	message-size.h \
 	message-tokenize.h \
-	quoted-printable.h
+	quoted-printable.h \
+	rfc822-parser.h
--- a/src/lib-mail/message-address.c	Thu Jan 06 20:09:08 2005 +0200
+++ b/src/lib-mail/message-address.c	Thu Jan 06 20:14:28 2005 +0200
@@ -1,223 +1,284 @@
-/* Copyright (C) 2002 Timo Sirainen */
+/* Copyright (C) 2002-2005 Timo Sirainen */
 
 #include "lib.h"
 #include "str.h"
-#include "message-tokenize.h"
+#include "message-parser.h"
 #include "message-address.h"
+#include "rfc822-parser.h"
 
-static struct message_address *
-new_address(pool_t pool, struct message_address ***next_addr)
+struct message_address_parser_context {
+	pool_t pool;
+	struct rfc822_parser_context parser;
+
+	struct message_address *first_addr, *last_addr, addr;
+	string_t *str;
+};
+
+static void add_address(struct message_address_parser_context *ctx)
 {
 	struct message_address *addr;
 
-	addr = p_new(pool, struct message_address, 1);
+	addr = p_new(ctx->pool, struct message_address, 1);
+
+	memcpy(addr, &ctx->addr, sizeof(ctx->addr));
+	memset(&ctx->addr, 0, sizeof(ctx->addr));
+
+	if (ctx->first_addr == NULL)
+		ctx->first_addr = addr;
+	else
+		ctx->last_addr->next = addr;
+	ctx->last_addr = addr;
+}
+
+static int parse_local_part(struct message_address_parser_context *ctx)
+{
+	int ret;
+
+	/*
+	   local-part      = dot-atom / quoted-string / obs-local-part
+	   obs-local-part  = word *("." word)
+	*/
+	if (ctx->parser.data == ctx->parser.end)
+		return 0;
+
+	str_truncate(ctx->str, 0);
+	if ((ret = rfc822_parse_dot_atom(&ctx->parser, ctx->str)) < 0)
+		return -1;
+
+	ctx->addr.mailbox = p_strdup(ctx->pool, str_c(ctx->str));
+	return ret;
+}
+
+static int parse_domain(struct message_address_parser_context *ctx)
+{
+	int ret;
+
+	str_truncate(ctx->str, 0);
+	if ((ret = rfc822_parse_domain(&ctx->parser, ctx->str)) < 0)
+		return -1;
+
+	ctx->addr.domain = p_strdup(ctx->pool, str_c(ctx->str));
+	return ret;
+}
+
+static int parse_domain_list(struct message_address_parser_context *ctx)
+{
+	int ret;
+
+	/* obs-domain-list = "@" domain *(*(CFWS / "," ) [CFWS] "@" domain) */
+	str_truncate(ctx->str, 0);
+	for (;;) {
+		if (ctx->parser.data == ctx->parser.end)
+			return 0;
+
+		if (*ctx->parser.data != '@')
+			break;
+
+		if (str_len(ctx->str) > 0)
+			str_append_c(ctx->str, ',');
+
+		str_append_c(ctx->str, '@');
+		if ((ret = rfc822_parse_domain(&ctx->parser, ctx->str)) <= 0)
+			return ret;
+
+		while (rfc822_skip_lwsp(&ctx->parser) &&
+		       *ctx->parser.data == ',')
+			ctx->parser.data++;
+	}
+	ctx->addr.route = p_strdup(ctx->pool, str_c(ctx->str));
+	return 1;
+}
+
+static int parse_angle_addr(struct message_address_parser_context *ctx)
+{
+	int ret;
+
+	/* "<" [ "@" route ":" ] local-part "@" domain ">" */
+	i_assert(*ctx->parser.data == '<');
+	ctx->parser.data++;
+
+	if ((ret = rfc822_skip_lwsp(&ctx->parser)) <= 0)
+		return ret;
+
+	if (*ctx->parser.data == '@') {
+		if (parse_domain_list(ctx) <= 0 || *ctx->parser.data != ':') {
+			ctx->addr.route = p_strdup(ctx->pool, "INVALID_ROUTE");
+			return -1;
+		}
+		ctx->parser.data++;
+		if ((ret = rfc822_skip_lwsp(&ctx->parser)) <= 0)
+			return ret;
+	}
+
+	if ((ret = parse_local_part(ctx)) <= 0)
+		return ret;
+	if (*ctx->parser.data == '@') {
+		if ((ret = parse_domain(ctx)) <= 0)
+			return ret;
+	}
+
+	if (*ctx->parser.data != '>')
+		return -1;
+	ctx->parser.data++;
+
+	return rfc822_skip_lwsp(&ctx->parser);
+}
+
+static int parse_name_addr(struct message_address_parser_context *ctx)
+{
+	/*
+	   name-addr       = [display-name] angle-addr
+	   display-name    = phrase
+	*/
+	str_truncate(ctx->str, 0);
+	if (rfc822_parse_phrase(&ctx->parser, ctx->str) <= 0 ||
+	    *ctx->parser.data != '<')
+		return -1;
 
-	**next_addr = addr;
-	*next_addr = &addr->next;
+	ctx->addr.name = p_strdup(ctx->pool, str_c(ctx->str));
+	if (parse_angle_addr(ctx) < 0) {
+		/* broken */
+		ctx->addr.domain = p_strdup(ctx->pool, "SYNTAX_ERROR");
+	}
+	return ctx->parser.data != ctx->parser.end;
+}
+
+static int parse_addr_spec(struct message_address_parser_context *ctx)
+{
+	/* addr-spec       = local-part "@" domain */
+	int ret;
+
+	str_truncate(ctx->parser.last_comment, 0);
+
+	if ((ret = parse_local_part(ctx)) < 0)
+		return ret;
+	if (ret > 0 && *ctx->parser.data == '@') {
+		if ((ret = parse_domain(ctx)) < 0)
+			return ret;
+	}
+
+	if (str_len(ctx->parser.last_comment) > 0) {
+		ctx->addr.name =
+			p_strdup(ctx->pool, str_c(ctx->parser.last_comment));
+	}
+	return ret;
+}
+
+static int parse_mailbox(struct message_address_parser_context *ctx)
+{
+	const unsigned char *start;
+	int ret;
+
+	if (ctx->parser.data == ctx->parser.end)
+		return 0;
+
+	/* mailbox         = name-addr / addr-spec */
+	start = ctx->parser.data;
+	if ((ret = parse_name_addr(ctx)) < 0) {
+		/* nope, should be addr-spec */
+		ctx->parser.data = start;
+		if ((ret = parse_addr_spec(ctx)) < 0)
+			return -1;
+	}
+
+	if (ctx->addr.mailbox == NULL)
+		ctx->addr.domain = p_strdup(ctx->pool, "MISSING_MAILBOX");
+	if (ctx->addr.domain == NULL)
+		ctx->addr.domain = p_strdup(ctx->pool, "MISSING_DOMAIN");
+	add_address(ctx);
+
+	return ret;
+}
+
+static int parse_mailbox_list(struct message_address_parser_context *ctx)
+{
+	int ret;
 
-	return addr;
+	/* mailbox-list    = (mailbox *("," mailbox)) / obs-mbox-list */
+	while ((ret = parse_mailbox(ctx)) > 0) {
+		if (*ctx->parser.data != ',')
+			break;
+		ctx->parser.data++;
+		rfc822_skip_lwsp(&ctx->parser);
+	}
+	return ret;
+}
+
+static int parse_group(struct message_address_parser_context *ctx)
+{
+	/*
+	   group           = display-name ":" [mailbox-list / CFWS] ";" [CFWS]
+	   display-name    = phrase
+	*/
+	str_truncate(ctx->str, 0);
+	if (rfc822_parse_phrase(&ctx->parser, ctx->str) <= 0 ||
+	    *ctx->parser.data != ':')
+		return -1;
+
+	/* from now on don't return -1 even if there are problems, so that
+	   the caller knows this is a group */
+	ctx->parser.data++;
+	(void)rfc822_skip_lwsp(&ctx->parser);
+
+	ctx->addr.mailbox = p_strdup(ctx->pool, str_c(ctx->str));
+	add_address(ctx);
+
+	if (parse_mailbox_list(ctx) > 0) {
+		if (*ctx->parser.data == ';') {
+			ctx->parser.data++;
+			(void)rfc822_skip_lwsp(&ctx->parser);
+		}
+	}
+
+	add_address(ctx);
+	return 1;
+}
+
+static int parse_address(struct message_address_parser_context *ctx)
+{
+	const unsigned char *start;
+	int ret;
+
+	/* address         = mailbox / group */
+	start = ctx->parser.data;
+	if ((ret = parse_group(ctx)) < 0) {
+		/* not a group, try mailbox */
+		ctx->parser.data = start;
+		ret = parse_mailbox(ctx);
+	}
+
+	return ret;
+}
+
+static void parse_address_list(struct message_address_parser_context *ctx,
+			       unsigned int max_addresses)
+{
+	/* address-list    = (address *("," address)) / obs-addr-list */
+	while (max_addresses-- > 0 && parse_address(ctx) > 0) {
+		if (*ctx->parser.data != ',')
+			break;
+		ctx->parser.data++;
+		if (rfc822_skip_lwsp(&ctx->parser) <= 0)
+			break;
+	}
 }
 
 struct message_address *
 message_address_parse(pool_t pool, const unsigned char *data, size_t size,
 		      unsigned int max_addresses)
 {
-	static const enum message_token stop_tokens_init[] =
-		{ ',', '@', '<', ':', TOKEN_LAST };
-	static const enum message_token stop_tokens_group[] =
-		{ ',', '@', '<', ';', TOKEN_LAST };
-	static const enum message_token stop_tokens_domain[] =
-		{ ',', '<', TOKEN_LAST };
-	static const enum message_token stop_tokens_domain_group[] =
-		{ ',', '<', ';', TOKEN_LAST };
-	static const enum message_token stop_tokens_post_addr[] =
-		{ ',', TOKEN_LAST };
-	static const enum message_token stop_tokens_post_addr_group[] =
-		{ ',', ';', TOKEN_LAST };
-	static const enum message_token stop_tokens_addr_route[] =
-		{ ':', '>', TOKEN_LAST };
-	static const enum message_token stop_tokens_addr_mailbox[] =
-		{ '@', '>', TOKEN_LAST };
-	static const enum message_token stop_tokens_addr_domain[] =
-		{ '>', TOKEN_LAST };
-
-	struct message_address *first_addr, **next_addr, *addr;
-	struct message_tokenizer *tok;
-	const enum message_token *stop_tokens;
-	enum message_token token;
-	string_t *mailbox, *domain, *route, *name, *comment, *next_phrase;
-	size_t len;
-	int ingroup, stop;
-
-	if (size == 0)
-		return NULL;
-
-	first_addr = NULL;
-	next_addr = &first_addr;
-
-	/* 1) name <@route:mailbox@domain>, ...
-	   2) mailbox@domain (name), ...
-	   3) group: name <box@domain>, box2@domain2 (name2), ... ;, ...
-
-	   ENVELOPE wants groups to be stored like (NIL, NIL, group, NIL),
-	   ..., (NIL, NIL, NIL, NIL)
-	*/
-	tok = message_tokenize_init(data, size, NULL, NULL);
-	message_tokenize_skip_comments(tok, FALSE);
-        message_tokenize_dot_token(tok, FALSE);
+	struct message_address_parser_context ctx;
 
-	if (!pool->datastack_pool)
-		t_push();
-	mailbox = t_str_new(128);
-	domain = t_str_new(256);
-	route = t_str_new(128);
-	name = t_str_new(256);
-	comment = t_str_new(256);
-
-	ingroup = FALSE; len = 0;
-	stop_tokens = stop_tokens_init;
-
-	if (max_addresses == 0)
-		max_addresses = (unsigned int)-1;
-
-	next_phrase = mailbox; stop = FALSE;
-	while (!stop && max_addresses > 0) {
-		if (next_phrase == name && str_len(name) > 0) {
-			/* continuing previously started name,
-			   separate it from us with space */
-			str_append_c(name, ' ');
-			len = str_len(name);
-		} else {
-			len = 0;
-		}
-		message_tokenize_get_string(tok, next_phrase, comment,
-					    stop_tokens);
-
-		if (next_phrase == name && len > 0 && len == str_len(name)) {
-			/* nothing appeneded, remove the space */
-			str_truncate(name, len-1);
-		}
-
-		token = message_tokenize_get(tok);
-		switch (token) {
-		case TOKEN_LAST:
-		case ',':
-		case ';':
-			/* end of address */
-			if (str_len(mailbox) > 0 || str_len(domain) > 0 ||
-			    str_len(route) > 0 || str_len(name) > 0) {
-				addr = new_address(pool, &next_addr);
-				max_addresses--;
-				addr->mailbox = p_strdup(pool, str_c(mailbox));
-				addr->domain = str_len(domain) == 0 ? NULL :
-					p_strdup(pool, str_c(domain));
-				addr->route = str_len(route) == 0 ? NULL :
-					p_strdup(pool, str_c(route));
-				addr->name = next_phrase == name ?
-					p_strdup_empty(pool, str_c(name)) :
-					p_strdup_empty(pool, str_c(comment));
-			}
+	t_push();
+	memset(&ctx, 0, sizeof(ctx));
 
-			if (ingroup && token == ';') {
-				/* end of group - add end of group marker */
-				ingroup = FALSE;
-				(void)new_address(pool, &next_addr);
-				max_addresses--;
-			}
-
-			if (token == TOKEN_LAST) {
-				stop = TRUE;
-				break;
-			}
-
-			stop_tokens = ingroup ? stop_tokens_group :
-				stop_tokens_init;
-
-			str_truncate(mailbox, 0);
-			str_truncate(domain, 0);
-			str_truncate(route, 0);
-			str_truncate(name, 0);
-			str_truncate(comment, 0);
-
-			next_phrase = mailbox;
-			break;
-		case '@':
-			/* domain part comes next */
-			next_phrase = domain;
-			stop_tokens = ingroup ? stop_tokens_domain_group :
-				stop_tokens_domain;
-			break;
-		case '<':
-			/* route-addr */
-
-			/* mailbox/domain name so far has actually
-			   been the real name */
-			str_append_str(name, mailbox);
-			str_truncate(mailbox, 0);
-
-			if (str_len(domain) > 0) {
-                                str_append_c(name, '@');
-				str_append_str(name, domain);
-				str_truncate(domain, 0);
-			}
-
-			/* mailbox */
-			message_tokenize_get_string(tok, mailbox, NULL,
-						    stop_tokens_addr_mailbox);
+	rfc822_parser_init(&ctx.parser, data, size, t_str_new(128));
+	ctx.pool = pool;
+	ctx.str = t_str_new(128);
 
-			if (message_tokenize_get(tok) == '@' &&
-			    str_len(mailbox) == 0) {
-				/* route is given */
-				message_tokenize_get_string(tok,
-					route, NULL, stop_tokens_addr_route);
-
-				if (message_tokenize_get(tok) == ':') {
-					/* mailbox comes next */
-					message_tokenize_get_string(tok,
-						mailbox, NULL,
-						stop_tokens_addr_mailbox);
-				}
-			}
-
-			if (message_tokenize_get(tok) == '@') {
-				/* domain */
-				message_tokenize_get_string(tok,
-					domain, NULL, stop_tokens_addr_domain);
-			}
-
-			token = message_tokenize_get(tok);
-			i_assert(token == '>' || token == TOKEN_LAST);
-
-			next_phrase = name;
-			stop_tokens = ingroup ? stop_tokens_post_addr_group :
-				stop_tokens_post_addr;
-			break;
-		case ':':
-			/* beginning of group */
-			addr = new_address(pool, &next_addr);
-			max_addresses--;
-			addr->mailbox = p_strdup(pool, str_c(mailbox));
-
-			str_truncate(mailbox, 0);
-			str_truncate(comment, 0);
-
-			ingroup = TRUE;
-			stop_tokens = stop_tokens_group;
-			break;
-		default:
-			i_unreached();
-			break;
-		}
-	}
-
-	if (ingroup)
-		(void)new_address(pool, &next_addr);
-
-	if (!pool->datastack_pool)
-		t_pop();
-	message_tokenize_deinit(tok);
-
-	return first_addr;
+	(void)parse_address_list(&ctx, max_addresses);
+	t_pop();
+	return ctx.first_addr;
 }
 
 void message_address_write(string_t *str, const struct message_address *addr)
@@ -234,14 +295,14 @@
 		else
 			str_append(str, ", ");
 
-		if (addr->mailbox == NULL && addr->domain == NULL) {
+		if (addr->domain == NULL) {
 			if (!in_group) {
-				if (addr->name != NULL)
-					str_append(str, addr->name);
+				if (addr->mailbox != NULL)
+					str_append(str, addr->mailbox);
 				str_append(str, ": ");
 				first = TRUE;
 			} else {
-				i_assert(addr->name == NULL);
+				i_assert(addr->mailbox == NULL);
 
 				/* cut out the ", " */
 				str_truncate(str, str_len(str)-2);
--- a/src/lib-mail/message-address.h	Thu Jan 06 20:09:08 2005 +0200
+++ b/src/lib-mail/message-address.h	Thu Jan 06 20:14:28 2005 +0200
@@ -2,7 +2,7 @@
 #define __MESSAGE_ADDRESS_H
 
 /* group: ... ; will be stored like:
-   {name = "group", NULL, NULL, NULL}, ..., {NULL, NULL, NULL, NULL}
+   {name = NULL, NULL, "group", NULL}, ..., {NULL, NULL, NULL, NULL}
 */
 struct message_address {
 	struct message_address *next;
@@ -10,9 +10,7 @@
 	const char *name, *route, *mailbox, *domain;
 };
 
-/* data and size are passed directly to message_tokenize_init(), so (size_t)-1
-   can be given if data is \0 terminated. If there's more than max_addresses,
-   the rest are skipped. Setting max_addresses to 0 disables this. */
+/* Parse message addresses from given data. */
 struct message_address *
 message_address_parse(pool_t pool, const unsigned char *data, size_t size,
 		      unsigned int max_addresses);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/rfc822-parser.c	Thu Jan 06 20:14:28 2005 +0200
@@ -0,0 +1,275 @@
+/* Copyright (C) 2005 Timo Sirainen */
+
+#include "lib.h"
+#include "str.h"
+#include "rfc822-parser.h"
+
+/*
+   atext        =       ALPHA / DIGIT / ; Any character except controls,
+                        "!" / "#" /     ;  SP, and specials.
+                        "$" / "%" /     ;  Used for atoms
+                        "&" / "'" /
+                        "*" / "+" /
+                        "-" / "/" /
+                        "=" / "?" /
+                        "^" / "_" /
+                        "`" / "{" /
+                        "|" / "}" /
+                        "~"
+*/
+
+/* atext chars are marked with 1, alpha and digits with 2 */
+static unsigned char atext_chars[256] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
+	0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, /* 32-47 */
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 0, 1, /* 48-63 */
+	0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1, /* 80-95 */
+	1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 96-111 */
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, /* 112-127 */
+
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+};
+#define IS_ATEXT(c) \
+	(atext_chars[(int)(unsigned char)(c)] != 0)
+
+void rfc822_parser_init(struct rfc822_parser_context *ctx,
+			const unsigned char *data, size_t size,
+			string_t *last_comment)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->data = data;
+	ctx->end = data + size;
+	ctx->last_comment = last_comment;
+}
+
+int rfc822_skip_comment(struct rfc822_parser_context *ctx)
+{
+	const unsigned char *start;
+	int level = 1;
+
+	i_assert(*ctx->data == '(');
+
+	if (ctx->last_comment != NULL)
+		str_truncate(ctx->last_comment, 0);
+
+	start = ++ctx->data;
+	for (; ctx->data != ctx->end; ctx->data++) {
+		switch (*ctx->data) {
+		case '(':
+			level++;
+			break;
+		case ')':
+			if (--level == 0) {
+				if (ctx->last_comment != NULL) {
+					str_append_n(ctx->last_comment, start,
+						     ctx->data - start);
+				}
+				ctx->data++;
+				return ctx->data != ctx->end;
+			}
+			break;
+		case '\\':
+			if (ctx->last_comment != NULL) {
+				str_append_n(ctx->last_comment, start,
+					     ctx->data - start);
+			}
+			start = ctx->data + 1;
+
+			ctx->data++;
+			if (ctx->data == ctx->end)
+				return -1;
+			break;
+		}
+	}
+
+	/* missing ')' */
+	return -1;
+}
+
+int rfc822_skip_lwsp(struct rfc822_parser_context *ctx)
+{
+	for (; ctx->data != ctx->end;) {
+		if (*ctx->data == ' ' || *ctx->data == '\t' ||
+		    *ctx->data == '\r' || *ctx->data == '\n') {
+                        ctx->data++;
+			continue;
+		}
+
+		if (*ctx->data != '(')
+			break;
+
+		if (rfc822_skip_comment(ctx) < 0)
+			break;
+	}
+	return ctx->data != ctx->end;
+}
+
+int rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str)
+{
+	const unsigned char *start;
+
+	/*
+	   atom            = [CFWS] 1*atext [CFWS]
+	   atext           =
+	     ; Any character except controls, SP, and specials.
+	*/
+	for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
+		if (IS_ATEXT(*ctx->data))
+			continue;
+
+		str_append_n(str, start, ctx->data - start);
+		return rfc822_skip_lwsp(ctx);
+	}
+
+	str_append_n(str, start, ctx->data - start);
+	return 0;
+}
+
+int rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str)
+{
+	const unsigned char *start;
+	int ret;
+
+	/*
+	   dot-atom        = [CFWS] dot-atom-text [CFWS]
+	   dot-atom-text   = 1*atext *("." 1*atext)
+
+	   atext           =
+	     ; Any character except controls, SP, and specials.
+
+	   For RFC-822 compatibility allow LWSP around '.'
+	*/
+	for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
+		if (IS_ATEXT(*ctx->data))
+			continue;
+
+		str_append_n(str, start, ctx->data - start);
+
+		if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
+			return ret;
+
+		if (*ctx->data != '.')
+			return 1;
+
+		ctx->data++;
+		str_append_c(str, '.');
+
+		if ((ret = rfc822_skip_lwsp(ctx)) <= 0)
+			return ret;
+		start = ctx->data;
+	}
+
+	str_append_n(str, start, ctx->data - start);
+	return 0;
+}
+
+int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx, string_t *str)
+{
+	const unsigned char *start;
+
+	i_assert(*ctx->data == '"');
+	ctx->data++;
+
+	for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
+		if (*ctx->data == '"') {
+			str_append_n(str, start, ctx->data - start);
+			return rfc822_skip_lwsp(ctx);
+		}
+
+		if (*ctx->data != '\\')
+			continue;
+
+		ctx->data++;
+		if (ctx->data == ctx->end)
+			return -1;
+
+		str_append_n(str, start, ctx->data - start);
+		start = ctx->data;
+	}
+
+	/* missing '"' */
+	return -1;
+}
+
+int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str)
+{
+	int ret;
+
+	for (;;) {
+		if (*ctx->data == '"')
+			ret = rfc822_parse_quoted_string(ctx, str);
+		else
+			ret = rfc822_parse_atom(ctx, str);
+		if (ret <= 0)
+			return ret;
+
+		if (!IS_ATEXT(*ctx->data) && *ctx->data != '"')
+			break;
+		str_append_c(str, ' ');
+	}
+	return rfc822_skip_lwsp(ctx);
+}
+
+static int
+rfc822_parse_domain_literal(struct rfc822_parser_context *ctx, string_t *str)
+{
+	const unsigned char *start;
+
+	/*
+	   domain-literal  = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
+	   dcontent        = dtext / quoted-pair
+	   dtext           = NO-WS-CTL /     ; Non white space controls
+			     %d33-90 /       ; The rest of the US-ASCII
+			     %d94-126        ;  characters not including "[",
+					     ;  "]", or "\"
+	*/
+	i_assert(*ctx->data == '[');
+
+	for (start = ctx->data; ctx->data != ctx->end; ctx->data++) {
+		if (*ctx->data == '\\') {
+			ctx->data++;
+			if (ctx->data == ctx->end)
+				break;
+		} else if (*ctx->data == ']') {
+			ctx->data++;
+			str_append_n(str, start, ctx->data - start);
+			return ctx->data != ctx->end;
+		}
+	}
+
+	/* missing ']' */
+	return -1;
+}
+
+int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str)
+{
+	/*
+	   domain          = dot-atom / domain-literal / obs-domain
+	   domain-literal  = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
+	   obs-domain      = atom *("." atom)
+	*/
+	i_assert(*ctx->data == '@');
+	ctx->data++;
+
+	if (rfc822_skip_lwsp(ctx) <= 0)
+		return -1;
+
+	if (*ctx->data == '[') {
+		if (rfc822_parse_domain_literal(ctx, str) < 0)
+			return -1;
+	} else {
+		if (rfc822_parse_dot_atom(ctx, str) < 0)
+			return -1;
+	}
+
+	return ctx->data != ctx->end;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-mail/rfc822-parser.h	Thu Jan 06 20:14:28 2005 +0200
@@ -0,0 +1,22 @@
+#ifndef __RFC822_PARSER_H
+#define __RFC822_PARSER_H
+
+struct rfc822_parser_context {
+	const unsigned char *data, *end;
+	string_t *last_comment;
+};
+
+void rfc822_parser_init(struct rfc822_parser_context *ctx,
+			const unsigned char *data, size_t size,
+			string_t *last_comment);
+
+int rfc822_skip_comment(struct rfc822_parser_context *ctx);
+int rfc822_skip_lwsp(struct rfc822_parser_context *ctx);
+int rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str);
+int rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str);
+int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx,
+			       string_t *str);
+int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str);
+int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str);
+
+#endif
--- a/src/lib-storage/index/index-search.c	Thu Jan 06 20:09:08 2005 +0200
+++ b/src/lib-storage/index/index-search.c	Thu Jan 06 20:14:28 2005 +0200
@@ -367,7 +367,7 @@
 			addr = message_address_parse(pool_datastack_create(),
 						     ctx->hdr->full_value,
 						     ctx->hdr->full_value_len,
-						     0);
+						     (unsigned int)-1);
 			str = t_str_new(ctx->hdr->value_len);
 			message_address_write(str, addr);
 			ret = message_header_search(str_data(str), str_len(str),