Mercurial > dovecot > core-2.2

--- a/src/imap/cmd-list.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/imap/cmd-list.c	Fri Jan 03 17:57:12 2003 +0200
@@ -1,6 +1,7 @@
 /* Copyright (C) 2002 Timo Sirainen */

 #include "common.h"
+#include "strescape.h"
 #include "commands.h"
 #include "imap-match.h"

@@ -48,7 +49,7 @@

 		/* escaping is done here to make sure we don't try to escape
 		   the separator char */
-		name = imap_escape(t_strdup_until(name, path));
+		name = str_escape(t_strdup_until(name, path));

 		/* find the node */
 		while (*node != NULL) {
--- a/src/lib-imap/imap-bodystructure.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-imap/imap-bodystructure.c	Fri Jan 03 17:57:12 2003 +0200
@@ -33,92 +33,95 @@
 static void part_write_bodystructure(MessagePart *part, String *str,
 				     int extended);

-static void parse_content_type(const Rfc822Token *tokens,
-			       int count, void *context)
+static void parse_content_type(const char *value, size_t value_len,
+			       void *context)
 {
         MessagePartBodyData *data = context;
-	const char *value;
-	int i;
+	size_t i;

-	/* find the content type separator */
-	for (i = 0; i < count; i++) {
-		if (tokens[i].token == '/')
+	for (i = 0; i < value_len; i++) {
+		if (value[i] == '/')
 			break;
 	}

-	value = rfc822_tokens_get_value_quoted(tokens, i);
-	data->content_type = p_strdup(data->pool, value);
+	if (i == value_len) {
+		data->content_type =
+                        imap_quote_value(data->pool, value, value_len);
+	} else {
+		data->content_type =
+                        imap_quote_value(data->pool, value, i);

-	value = rfc822_tokens_get_value_quoted(tokens+i+1, count-i-1);
-	data->content_subtype = p_strdup(data->pool, value);
+		i++;
+		data->content_subtype =
+                        imap_quote_value(data->pool, value+i, value_len-i);
+	}
 }

-static void parse_save_params_list(const Rfc822Token *name,
-				   const Rfc822Token *value, int value_count,
+static void parse_save_params_list(const char *name, size_t name_len,
+				   const char *value, size_t value_len,
+				   int value_quoted __attr_unused__,
 				   void *context)
 {
         MessagePartBodyData *data = context;
-	const char *str;

 	if (str_len(data->str) != 0)
 		str_append_c(data->str, ' ');

 	str_append_c(data->str, '"');
-	str_append_n(data->str, name->ptr, name->len);
+	str_append_n(data->str, name, name_len);
 	str_append(data->str, "\" ");

-        str = rfc822_tokens_get_value_quoted(value, value_count);
-	str_append(data->str, str);
+	str_append_c(data->str, '"');
+	str_append_n(data->str, value, value_len);
+	str_append_c(data->str, '"');
 }

-static void parse_content_transfer_encoding(const Rfc822Token *tokens,
-					    int count, void *context)
+static void parse_content_transfer_encoding(const char *value, size_t value_len,
+					    void *context)
 {
         MessagePartBodyData *data = context;
-	const char *value;

-	value = rfc822_tokens_get_value_quoted(tokens, count);
-	data->content_transfer_encoding = p_strdup(data->pool, value);
+	data->content_transfer_encoding =
+		imap_quote_value(data->pool, value, value_len);
 }

-static void parse_content_disposition(const Rfc822Token *tokens,
-				      int count, void *context)
+static void parse_content_disposition(const char *value, size_t value_len,
+				      void *context)
 {
         MessagePartBodyData *data = context;
-	const char *value;

-	value = rfc822_tokens_get_value_quoted(tokens, count);
-	data->content_disposition = p_strdup(data->pool, value);
+	data->content_disposition =
+		imap_quote_value(data->pool, value, value_len);
 }

-static void parse_content_language(const Rfc822Token *tokens,
-				   int count, void *context)
+static void parse_content_language(const char *value, size_t value_len,
+				   MessagePartBodyData *data)
 {
-        MessagePartBodyData *data = context;
+	Rfc822TokenizeContext *ctx;
+        Rfc822Token token;
 	String *str;
 	int quoted;

 	/* Content-Language: en-US, az-arabic (comments allowed) */

-	if (count <= 0)
-		return;
+	ctx = rfc822_tokenize_init(value, value_len, NULL, NULL);

+	t_push();
 	str = t_str_new(256);

 	quoted = FALSE;
-	for (; count > 0; count--, tokens++) {
-		switch (tokens->token) {
-		case '(':
-			/* ignore comment */
+	while (rfc822_tokenize_next(ctx)) {
+		token = rfc822_tokenize_get(ctx);
+		if (token == TOKEN_LAST)
 			break;
-		case ',':
+
+		if (token == ',') {
 			/* list separator */
 			if (quoted) {
 				str_append_c(str, '"');
 				quoted = FALSE;
 			}
-			break;
-		default:
+		} else {
 			/* anything else goes as-is. only alphabetic characters
 			   and '-' is allowed, so anything else is error
 			   which we can deal with however we want. */
@@ -129,11 +132,13 @@
 				quoted = TRUE;
 			}

-			if (IS_TOKEN_STRING(tokens->token))
-				str_append_n(str, tokens->ptr, tokens->len);
-			else
-				str_append_c(str, tokens->token);
-			break;
+			if (!IS_TOKEN_STRING(token))
+				str_append_c(str, token);
+			else {
+				value = rfc822_tokenize_get_value(ctx,
+								  &value_len);
+				str_append_n(str, value, value_len);
+			}
 		}
 	}

@@ -141,6 +146,10 @@
 		str_append_c(str, '"');

 	data->content_language = p_strdup(data->pool, str_c(str));
+
+	t_pop();
+
+	rfc822_tokenize_deinit(ctx);
 }

 static void parse_header(MessagePart *part,
@@ -174,17 +183,16 @@
 	if (strcasecmp(name, "Content-Type") == 0 &&
 	    part_data->content_type == NULL) {
 		part_data->str = t_str_new(256);
-		(void)message_content_parse_header(t_strndup(value, value_len),
-						   parse_content_type,
-						   parse_save_params_list,
-						   part_data);
+		message_content_parse_header(value, value_len,
+					     parse_content_type,
+					     parse_save_params_list, part_data);
 		part_data->content_type_params =
 			p_strdup_empty(pool, str_c(part_data->str));
 	} else if (strcasecmp(name, "Content-Transfer-Encoding") == 0 &&
 		   part_data->content_transfer_encoding == NULL) {
-		(void)message_content_parse_header(t_strndup(value, value_len),
-						parse_content_transfer_encoding,
-						NULL, part_data);
+		message_content_parse_header(value, value_len,
+					     parse_content_transfer_encoding,
+					     NULL, part_data);
 	} else if (strcasecmp(name, "Content-ID") == 0 &&
 		   part_data->content_id == NULL) {
 		part_data->content_id =
@@ -196,16 +204,13 @@
 	} else if (strcasecmp(name, "Content-Disposition") == 0 &&
 		   part_data->content_disposition_params == NULL) {
 		part_data->str = t_str_new(256);
-		(void)message_content_parse_header(t_strndup(value, value_len),
-						   parse_content_disposition,
-						   parse_save_params_list,
-						   part_data);
+		message_content_parse_header(value, value_len,
+					     parse_content_disposition,
+					     parse_save_params_list, part_data);
 		part_data->content_disposition_params =
 			p_strdup_empty(pool, str_c(part_data->str));
 	} else if (strcasecmp(name, "Content-Language") == 0) {
-		(void)message_content_parse_header(t_strndup(value, value_len),
-						   parse_content_language, NULL,
-						   part_data);
+		parse_content_language(value, value_len, part_data);
 	} else if (strcasecmp(name, "Content-MD5") == 0 &&
 		   part_data->content_md5 == NULL) {
 		part_data->content_md5 =
@@ -262,7 +267,7 @@
 	if (data->content_subtype != NULL)
 		str_append(str, data->content_subtype);
 	else
-		str_append(str, "x-unknown");
+		str_append(str, "\"x-unknown\"");

 	if (!extended)
 		return;
--- a/src/lib-imap/imap-parser.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-imap/imap-parser.c	Fri Jan 03 17:57:12 2003 +0200
@@ -3,6 +3,7 @@
 #include "lib.h"
 #include "istream.h"
 #include "ostream.h"
+#include "strescape.h"
 #include "imap-parser.h"

 #define is_linebreak(c) \
@@ -215,8 +216,8 @@
 		if (parser->str_first_escape >= 0 &&
 		    (parser->flags & IMAP_PARSE_FLAG_NO_UNESCAPE) == 0) {
 			/* -1 because we skipped the '"' prefix */
-			str_remove_escapes(arg->_data.str +
-					   parser->str_first_escape-1);
+			str_unescape(arg->_data.str +
+				     parser->str_first_escape-1);
 		}
 		break;
 	case ARG_PARSE_LITERAL_DATA:
--- a/src/lib-imap/imap-util.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-imap/imap-util.c	Fri Jan 03 17:57:12 2003 +0200
@@ -47,29 +47,3 @@

 	return str_c(str);
 }
-
-const char *imap_escape(const char *str)
-{
-	char *ret, *p;
-	size_t i, esc;
-
-	/* get length of string and number of chars to escape */
-	esc = 0;
-	for (i = 0; str[i] != '\0'; i++) {
-		if (IS_ESCAPED_CHAR(str[i]))
-			esc++;
-	}
-
-	if (esc == 0)
-		return str;
-
-	/* @UNSAFE: escape them */
-	p = ret = t_malloc(i + esc + 1);
-	for (; *str != '\0'; str++) {
-		if (IS_ESCAPED_CHAR(*str))
-			*p++ = '\\';
-		*p++ = *str;
-	}
-	*p = '\0';
-	return ret;
-}
--- a/src/lib-imap/imap-util.h	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-imap/imap-util.h	Fri Jan 03 17:57:12 2003 +0200
@@ -28,14 +28,9 @@
 	MAIL_FLAGS_COUNT	= 32
 };

-#define IS_ESCAPED_CHAR(c) ((c) == '"' || (c) == '\\')
-
 /* Return flags as a space separated string. custom_flags[] is a list of
    names for custom flags, flags having NULL or "" entry are ignored. */
 const char *imap_write_flags(MailFlags flags, const char *custom_flags[],
 			     unsigned int custom_flags_count);

-/* Escape the string */
-const char *imap_escape(const char *str);
-
 #endif
--- a/src/lib-index/mail-index-update.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-index/mail-index-update.c	Fri Jan 03 17:57:12 2003 +0200
@@ -5,7 +5,6 @@
 #include "istream.h"
 #include "ioloop.h"
 #include "rfc822-date.h"
-#include "rfc822-tokenize.h"
 #include "message-parser.h"
 #include "message-part-serialize.h"
 #include "message-size.h"
--- a/src/lib-index/mbox/mbox-index.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-index/mbox/mbox-index.c	Fri Jan 03 17:57:12 2003 +0200
@@ -3,7 +3,6 @@
 #include "lib.h"
 #include "buffer.h"
 #include "istream.h"
-#include "rfc822-tokenize.h"
 #include "mbox-index.h"
 #include "mbox-lock.h"
 #include "mail-index-util.h"
--- a/src/lib-mail/message-body-search.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/message-body-search.c	Fri Jan 03 17:57:12 2003 +0200
@@ -4,8 +4,8 @@
 #include "base64.h"
 #include "buffer.h"
 #include "istream.h"
+#include "strescape.h"
 #include "charset-utf8.h"
-#include "rfc822-tokenize.h"
 #include "quoted-printable.h"
 #include "message-parser.h"
 #include "message-content-parser.h"
@@ -45,57 +45,51 @@
 	unsigned int found:1;
 } PartSearchContext;

-static void parse_content_type(const Rfc822Token *tokens, int count,
+static void parse_content_type(const char *value, size_t value_len,
 			       void *context)
 {
 	PartSearchContext *ctx = context;

-	if (ctx->content_type != NULL && tokens[0].token == 'A') {
-		ctx->content_type =
-			i_strdup(rfc822_tokens_get_value(tokens, count));
+	if (ctx->content_type != NULL) {
+		ctx->content_type = i_strndup(value, value_len);
 		ctx->content_type_text =
 			strncasecmp(ctx->content_type, "text/", 5) == 0 ||
 			strncasecmp(ctx->content_type, "message/", 8) == 0;
 	}
 }

-static void parse_content_type_param(const Rfc822Token *name,
-				     const Rfc822Token *value,
-				     int value_count, void *context)
+static void parse_content_type_param(const char *name, size_t name_len,
+				     const char *value, size_t value_len,
+				     int value_quoted, void *context)
 {
 	PartSearchContext *ctx = context;

-	if (name->len != 7 || strncasecmp(name->ptr, "charset", 7) != 0)
-		return;
-
-	if (ctx->content_charset == NULL) {
-		ctx->content_charset =
-			i_strdup(rfc822_tokens_get_value(value, value_count));
+	if (name_len == 7 && strncasecmp(name, "charset", 7) == 0 &&
+	    ctx->content_charset == NULL) {
+		ctx->content_charset = i_strndup(value, value_len);
+		if (value_quoted) str_unescape(ctx->content_charset);
 	}
 }

-static void parse_content_encoding(const Rfc822Token *tokens,
-				   int count __attr_unused__, void *context)
+static void parse_content_encoding(const char *value, size_t value_len,
+				   void *context)
 {
 	PartSearchContext *ctx = context;

-	if (tokens[0].token != 'A')
-		return;
-
-	switch (tokens[0].len) {
+	switch (value_len) {
 	case 4:
-		if (strncasecmp(tokens[0].ptr, "7bit", 4) != 0 &&
-		    strncasecmp(tokens[0].ptr, "8bit", 4) != 0)
+		if (strncasecmp(value, "7bit", 4) != 0 &&
+		    strncasecmp(value, "8bit", 4) != 0)
 			ctx->content_unknown = TRUE;
 		break;
 	case 6:
-		if (strncasecmp(tokens[0].ptr, "base64", 6) == 0)
+		if (strncasecmp(value, "base64", 6) == 0)
 			ctx->content_base64 = TRUE;
-		else if (strncasecmp(tokens[0].ptr, "binary", 6) != 0)
+		else if (strncasecmp(value, "binary", 6) != 0)
 			ctx->content_unknown = TRUE;
 		break;
 	case 16:
-		if (strncasecmp(tokens[0].ptr, "quoted-printable", 16) == 0)
+		if (strncasecmp(value, "quoted-printable", 16) == 0)
 			ctx->content_qp = TRUE;
 		else
 			ctx->content_unknown = TRUE;
@@ -120,21 +114,17 @@
 						   ctx->hdr_search_ctx);
 	}

-	t_push();
-
 	if (name_len == 12 && strncasecmp(name, "Content-Type", 12) == 0) {
-		(void)message_content_parse_header(t_strndup(value, value_len),
-						   parse_content_type,
-						   parse_content_type_param,
-						   ctx);
+		message_content_parse_header(value, value_len,
+					     parse_content_type,
+					     parse_content_type_param,
+					     ctx);
 	} else if (name_len == 25 &&
 		   strncasecmp(name, "Content-Transfer-Encoding", 25) == 0) {
-		(void)message_content_parse_header(t_strndup(value, value_len),
-						   parse_content_encoding,
-						   NULL, ctx);
+		message_content_parse_header(value, value_len,
+					     parse_content_encoding,
+					     NULL, ctx);
 	}
-
-	t_pop();
 }

 static int message_search_header(PartSearchContext *ctx, IStream *input)
--- a/src/lib-mail/message-content-parser.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/message-content-parser.c	Fri Jan 03 17:57:12 2003 +0200
@@ -1,52 +1,61 @@
 /* Copyright (C) 2002 Timo Sirainen */

 #include "lib.h"
+#include "str.h"
 #include "rfc822-tokenize.h"
 #include "message-content-parser.h"

-int message_content_parse_header(const char *value, ParseContentFunc func,
-				 ParseContentParamFunc param_func,
-				 void *context)
+void message_content_parse_header(const char *data, size_t size,
+				  ParseContentFunc func,
+				  ParseContentParamFunc param_func,
+				  void *context)
 {
-	const Rfc822Token *tokens;
-	int i, next, ntokens;
+	static const Rfc822Token stop_tokens[] = { ';', TOKEN_LAST };
+	Rfc822TokenizeContext *ctx;
+	Rfc822Token token;
+	String *str;
+	const char *key, *value;
+	size_t key_len, value_len;

-	tokens = rfc822_tokenize(value, &ntokens, NULL, NULL);
-	if (tokens == NULL) {
-		/* error */
-		return FALSE;
-	}
+	ctx = rfc822_tokenize_init(data, size, NULL, NULL);
+        rfc822_tokenize_dot_token(ctx, FALSE);

-	/* first ';' separates the parameters */
-	for (i = 0; i < ntokens; i++) {
-		if (tokens[i].token == ';')
-			break;
-	}
+	t_push();
+	str = t_str_new(256);
+
+        /* first ';' separates the parameters */
+	(void)rfc822_tokenize_get_string(ctx, str, NULL, stop_tokens);

 	if (func != NULL)
-		func(tokens, i, context);
+		func(str_c(str), str_len(str), context);

-	if (param_func != NULL) {
+	t_pop();
+
+	if (param_func != NULL && rfc822_tokenize_get(ctx) == ';') {
 		/* parse the parameters */
-		i++;
-		while (i < ntokens) {
-			/* find the next ';' */
-			for (next = i; next < ntokens; next++) {
-				if (tokens[next].token == ';')
-					break;
-			}
+		while (rfc822_tokenize_next(ctx)) {
+			token = rfc822_tokenize_get(ctx);
+
+			/* <token> "=" <token> | <quoted-string> */
+			if (token != TOKEN_ATOM)
+				continue;
+
+			key = rfc822_tokenize_get_value(ctx, &key_len);

-			if (i+2 < next &&
-			    tokens[i].token == 'A' &&
-			    tokens[i+1].token == '=') {
-				/* <atom> = <value> */
-				param_func(tokens + i, tokens + i + 2,
-					   next - (i+2), context);
-			}
+			(void)rfc822_tokenize_next(ctx);
+			if (rfc822_tokenize_get(ctx) != '=')
+				continue;

-                        i = next+1;
+			(void)rfc822_tokenize_next(ctx);
+			token = rfc822_tokenize_get(ctx);
+			if (token != TOKEN_ATOM && token != TOKEN_QSTRING)
+				continue;
+
+			value = rfc822_tokenize_get_value(ctx, &value_len);
+			param_func(key, key_len, value, value_len,
+				   token == TOKEN_QSTRING, context);
 		}
 	}

-	return TRUE;
+	rfc822_tokenize_deinit(ctx);
 }
--- a/src/lib-mail/message-content-parser.h	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/message-content-parser.h	Fri Jan 03 17:57:12 2003 +0200
@@ -1,20 +1,16 @@
 #ifndef __MESSAGE_CONTENT_PARSER_H
 #define __MESSAGE_CONTENT_PARSER_H

-/* functions can safely store data into data stack,
-   ie. message_content_parse_header() is guaranteed not to call
-   t_push()/t_pop() */
-
-/* Note that count can be 0 */
-typedef void (*ParseContentFunc)(const Rfc822Token *tokens, int count,
+/* NOTE: name and value aren't \0-terminated. */
+typedef void (*ParseContentFunc)(const char *value, size_t value_len,
 				 void *context);
-/* name is always atom, value_count is always > 0 */
-typedef void (*ParseContentParamFunc)(const Rfc822Token *name,
-				      const Rfc822Token *value,
-				      int value_count, void *context);
+typedef void (*ParseContentParamFunc)(const char *name, size_t name_len,
+				      const char *value, size_t value_len,
+				      int value_quoted, void *context);

-int message_content_parse_header(const char *value, ParseContentFunc func,
-				 ParseContentParamFunc param_func,
-				 void *context);
+void message_content_parse_header(const char *data, size_t size,
+				  ParseContentFunc func,
+				  ParseContentParamFunc param_func,
+				  void *context);

 #endif
--- a/src/lib-mail/message-header-search.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/message-header-search.c	Fri Jan 03 17:57:12 2003 +0200
@@ -4,8 +4,8 @@
 #include "base64.h"
 #include "buffer.h"
 #include "charset-utf8.h"
-#include "rfc822-tokenize.h"
 #include "quoted-printable.h"
+#include "message-parser.h"
 #include "message-header-decode.h"
 #include "message-header-search.h"
--- a/src/lib-mail/message-parser.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/message-parser.c	Fri Jan 03 17:57:12 2003 +0200
@@ -2,7 +2,7 @@

 #include "lib.h"
 #include "istream.h"
-#include "rfc822-tokenize.h"
+#include "strescape.h"
 #include "message-content-parser.h"
 #include "message-parser.h"
 #include "message-size.h"
@@ -68,20 +68,17 @@
 	return part;
 }

-static void parse_content_type(const Rfc822Token *tokens, int count,
+static void parse_content_type(const char *value, size_t value_len,
 			       void *context)
 {
 	MessageParseContext *parse_ctx = context;
 	const char *str;

-	if (tokens[0].token != 'A')
+	if (parse_ctx->last_content_type != NULL || value_len == 0)
 		return;

-	if (parse_ctx->last_content_type != NULL)
-		return;
-
-	str = rfc822_tokens_get_value(tokens, count);
-	parse_ctx->last_content_type = p_strdup(parse_ctx->pool, str);
+	str = parse_ctx->last_content_type =
+		p_strndup(parse_ctx->pool, value, value_len);

 	if (strcasecmp(str, "message/rfc822") == 0)
 		parse_ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
@@ -97,20 +94,21 @@
 	}
 }

-static void parse_content_type_param(const Rfc822Token *name,
-				     const Rfc822Token *value,
-				     int value_count, void *context)
+static void parse_content_type_param(const char *name, size_t name_len,
+				     const char *value, size_t value_len,
+				     int value_quoted, void *context)
 {
 	MessageParseContext *parse_ctx = context;
-	const char *str;

 	if ((parse_ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
-	    name->len != 8 || strncasecmp(name->ptr, "boundary", 8) != 0)
+	    name_len != 8 || strncasecmp(name, "boundary", 8) != 0)
 		return;

 	if (parse_ctx->last_boundary == NULL) {
-		str = rfc822_tokens_get_value(value, value_count);
-		parse_ctx->last_boundary = p_strdup(parse_ctx->pool, str);
+		parse_ctx->last_boundary =
+			p_strndup(parse_ctx->pool, value, value_len);
+		if (value_quoted)
+			str_unescape(parse_ctx->last_boundary);
 	}
 }

@@ -129,10 +127,10 @@

 	if (name_len == 12 && strncasecmp(name, "Content-Type", 12) == 0) {
 		/* we need to know the boundary */
-		(void)message_content_parse_header(t_strndup(value, value_len),
-						   parse_content_type,
-						   parse_content_type_param,
-						   parse_ctx);
+		message_content_parse_header(value, value_len,
+					     parse_content_type,
+					     parse_content_type_param,
+					     parse_ctx);
 	}
 }
--- a/src/lib-mail/message-parser.h	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/message-parser.h	Fri Jan 03 17:57:12 2003 +0200
@@ -1,6 +1,9 @@
 #ifndef __MESSAGE_PARSER_H
 #define __MESSAGE_PARSER_H

+#define IS_LWSP(c) \
+	((c) == ' ' || (c) == '\t')
+
 typedef struct _MessagePart MessagePart;
 typedef struct _MessagePosition MessagePosition;
 typedef struct _MessageSize MessageSize;
--- a/src/lib-mail/rfc822-address.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/rfc822-address.c	Fri Jan 03 17:57:12 2003 +0200
@@ -17,56 +17,34 @@
 	return addr;
 }

-static int read_until(const Rfc822Token *tokens, const char *stop_tokens,
-		      String *comment)
-{
-	char *c_str;
-	int i, pos;
-
-	/* find the stop token */
-	for (i = 0; tokens[i].token != 0; i++) {
-		if (strchr(stop_tokens, tokens[i].token) != NULL)
-			break;
-
-		if (tokens[i].token == '(' && comment != NULL) {
-			/* save comment */
-			if (str_len(comment) > 0)
-				str_append_c(comment, ' ');
-			pos = str_len(comment);
-
-			str_append_n(comment, tokens[i].ptr, tokens[i].len);
-			c_str = str_c_modifyable(comment);
-
-			str_remove_escapes(c_str + pos);
-			str_truncate(comment, strlen(c_str));
-		}
-	}
-
-	return i;
-}
-
-static void read_until_get(const Rfc822Token **tokens, const char *stop_tokens,
-			   String *phrase, String *comment)
-{
-	const char *value;
-	int count;
-
-	count = read_until(*tokens, stop_tokens, comment);
-	if (count > 0) {
-		value = rfc822_tokens_get_value(*tokens, count);
-		str_append(phrase, value);
-
-		*tokens += count;
-	}
-}
-
 Rfc822Address *rfc822_address_parse(Pool pool, const char *str)
 {
+	static const Rfc822Token stop_tokens_init[] =
+		{ ',', '@', '<', ':', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_group[] =
+		{ ',', '@', '<', ';', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_domain[] =
+		{ ',', '<', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_domain_group[] =
+		{ ',', '<', ';', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_post_addr[] =
+		{ ',', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_post_addr_group[] =
+		{ ',', ';', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_addr_route[] =
+		{ ':', '>', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_addr_mailbox[] =
+		{ '@', '>', TOKEN_LAST };
+	static const Rfc822Token stop_tokens_addr_domain[] =
+		{ '>', TOKEN_LAST };
+
 	Rfc822Address *first_addr, **next_addr, *addr;
+	Rfc822TokenizeContext *ctx;
+	const Rfc822Token *stop_tokens;
+	Rfc822Token token;
 	String *mailbox, *domain, *route, *name, *comment, *next_phrase;
-	const Rfc822Token *tokens;
-	const char *list, *value;
-	int ingroup, stop, count;
+	size_t len;
+	int ingroup, stop;

 	if (str == NULL || *str == '\0')
 		return NULL;
@@ -81,36 +59,40 @@
 	   ENVELOPE wants groups to be stored like (NIL, NIL, group, NIL),
 	   ..., (NIL, NIL, NIL, NIL)
 	*/
-	tokens = rfc822_tokenize(str, NULL, NULL, NULL);
+	ctx = rfc822_tokenize_init(str, (size_t)-1, NULL, NULL);
+	rfc822_tokenize_skip_comments(ctx, FALSE);

 	t_push();
 	mailbox = t_str_new(128);
-	domain = t_str_new(128);
+	domain = t_str_new(256);
 	route = t_str_new(128);
-	name = t_str_new(128);
-	comment = t_str_new(128);
+	name = t_str_new(256);
+	comment = t_str_new(256);

-	ingroup = FALSE;
-	list = ",@<:";
+	ingroup = FALSE; len = 0;
+	stop_tokens = stop_tokens_init;

 	next_phrase = mailbox; stop = FALSE;
 	while (!stop) {
-		count = read_until(tokens, list, comment);
-		if (count > 0) {
-			if ((tokens[count].token == '<' ||
-			     next_phrase == name) && str_len(next_phrase) > 0) {
-				/* continuing previously started name,
-				   separate it from us with space */
-				str_append_c(next_phrase, ' ');
-			}
+		if (next_phrase == name && str_len(name) > 0) {
+			/* continuing previously started name,
+			   separate it from us with space */
+			str_append_c(name, ' ');
+			len = str_len(name);
+		} else {
+			len = 0;
+		}
+		(void)rfc822_tokenize_get_string(ctx, next_phrase, comment,
+						 stop_tokens);

-			value = rfc822_tokens_get_value(tokens, count);
-			str_append(next_phrase, value);
-			tokens += count;
+		if (next_phrase == name && len > 0 && len == str_len(name)) {
+			/* nothing appeneded, remove the space */
+			str_truncate(name, len-1);
 		}

-		switch (tokens->token) {
-		case 0:
+		token = rfc822_tokenize_get(ctx);
+		switch (token) {
+		case TOKEN_LAST:
 		case ',':
 		case ';':
 			/* end of address */
@@ -127,18 +109,19 @@
 					p_strdup(pool, str_c(comment));
 			}

-			if (ingroup && tokens->token == ';') {
+			if (ingroup && token == ';') {
 				/* end of group - add end of group marker */
 				ingroup = FALSE;
 				(void)new_address(pool, &next_addr);
 			}

-			if (tokens->token == 0) {
+			if (token == TOKEN_LAST) {
 				stop = TRUE;
 				break;
 			}

-			list = ingroup ? ",@<;" :  ",@<:";
+			stop_tokens = ingroup ? stop_tokens_group :
+				stop_tokens_init;

 			str_truncate(mailbox, 0);
 			str_truncate(domain, 0);
@@ -146,53 +129,58 @@
 			str_truncate(name, 0);
 			str_truncate(comment, 0);

-			tokens++;
 			next_phrase = mailbox;
 			break;
 		case '@':
 			/* domain part comes next */
-			tokens++;
 			next_phrase = domain;
-			list = ingroup ? ",<;" : ",<";
+			stop_tokens = ingroup ? stop_tokens_domain_group :
+				stop_tokens_domain;
 			break;
 		case '<':
 			/* route-addr */
-			tokens++;

 			/* mailbox/domain name so far has actually
 			   been the real name */
 			str_append_str(name, mailbox);
+			str_truncate(mailbox, 0);
+
 			if (str_len(domain) > 0) {
                                 str_append_c(name, '@');
 				str_append_str(name, domain);
+				str_truncate(domain, 0);
 			}

-			str_truncate(mailbox, 0);
-			str_truncate(domain, 0);
+			/* mailbox */
+			(void)rfc822_tokenize_get_string(ctx,
+				mailbox, NULL, stop_tokens_addr_mailbox);

-			read_until_get(&tokens, "@>", mailbox, NULL);
-			if (tokens->token == '@' && str_len(mailbox) == 0) {
+			if (rfc822_tokenize_get(ctx) == '@' &&
+			    str_len(mailbox) == 0) {
 				/* route is given */
-				tokens++;
-				read_until_get(&tokens, ":>", route, NULL);
-				if (tokens->token == ':') {
+				(void)rfc822_tokenize_get_string(ctx,
+					route, NULL, stop_tokens_addr_route);
+
+				if (rfc822_tokenize_get(ctx) == ':') {
 					/* mailbox comes next */
-					tokens++;
-					read_until_get(&tokens, "@>",
-						       mailbox, NULL);
+					(void)rfc822_tokenize_get_string(ctx,
+						mailbox, NULL,
+						stop_tokens_addr_mailbox);
 				}
 			}

-			if (tokens->token == '@') {
-				tokens++;
-				read_until_get(&tokens, ">", domain, NULL);
+			if (rfc822_tokenize_get(ctx) == '@') {
+				/* domain */
+				(void)rfc822_tokenize_get_string(ctx,
+					domain, NULL, stop_tokens_addr_domain);
 			}

-			if (tokens->token == '>')
-				tokens++;
+			token = rfc822_tokenize_get(ctx);
+			i_assert(token == '>' || token == TOKEN_LAST);

 			next_phrase = name;
-			list = ingroup ? ",;" : ",";
+			stop_tokens = ingroup ? stop_tokens_post_addr_group :
+				stop_tokens_post_addr;
 			break;
 		case ':':
 			/* beginning of group */
@@ -200,10 +188,13 @@
 			addr->name = p_strdup(pool, str_c(mailbox));

 			str_truncate(mailbox, 0);
-			tokens++;
+			str_truncate(comment, 0);

 			ingroup = TRUE;
-			list = ",@<;";
+			stop_tokens = stop_tokens_group;
+			break;
+		default:
+			i_unreached();
 			break;
 		}
 	}
@@ -212,6 +203,8 @@
 		(void)new_address(pool, &next_addr);

 	t_pop();
+	rfc822_tokenize_deinit(ctx);
+
 	return first_addr;
 }
--- a/src/lib-mail/rfc822-date.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/rfc822-date.c	Fri Jan 03 17:57:12 2003 +0200
@@ -86,64 +86,58 @@
 	return 0;
 }

-static const Rfc822Token *next_token(const Rfc822Token **tokens)
+static Rfc822Token next_token(Rfc822TokenizeContext *ctx,
+			      const char **value, size_t *value_len)
 {
-	const Rfc822Token *ret;
+	Rfc822Token token;

-	if ((*tokens)->token == 0)
-		return NULL;
+	(void)rfc822_tokenize_next(ctx);

-	ret = *tokens;
-	(*tokens)++;
-	return ret;
+	token = rfc822_tokenize_get(ctx);
+	if (token == 'A')
+		*value = rfc822_tokenize_get_value(ctx, value_len);
+	return token;
 }

-int rfc822_parse_date(const char *str, time_t *time, int *timezone_offset)
+static int rfc822_parse_date_tokens(Rfc822TokenizeContext *ctx, time_t *time,
+				    int *timezone_offset)
 {
 	struct tm tm;
-	const Rfc822Token *tokens, *tok;
-	size_t i;
-
-	if (str == NULL || *str == '\0')
-		return FALSE;
+	Rfc822Token token;
+	const char *value;
+	size_t i, len;

-	/* [weekday_name "," ] dd month_name [yy]yy hh:mi[:ss] timezone
-
-	   we support comments here even while no-one ever uses them */
-
-	tokens = rfc822_tokenize(str, NULL, NULL, NULL);
-
+	/* [weekday_name "," ] dd month_name [yy]yy hh:mi[:ss] timezone */
 	memset(&tm, 0, sizeof(tm));

 	/* skip the optional weekday */
-	tok = next_token(&tokens);
-	if (tok != NULL && tok->token == 'A' && tok->len == 3) {
-		tok = next_token(&tokens);
-		if (tok == NULL || tok->token != ',')
+	token = next_token(ctx, &value, &len);
+	if (token == 'A' && len == 3) {
+		token = next_token(ctx, &value, &len);
+		if (token != ',')
 			return FALSE;

-		tok = next_token(&tokens);
+		token = next_token(ctx, &value, &len);
 	}

 	/* dd */
-	if (tok == NULL || tok->token != 'A' || tok->len > 2 ||
-	    !i_isdigit(tok->ptr[0]))
+	if (token != 'A' || len > 2 || !i_isdigit(value[0]))
 		return FALSE;

-	tm.tm_mday = tok->ptr[0]-'0';
-	if (tok->len == 2) {
-		if (!i_isdigit(tok->ptr[1]))
+	tm.tm_mday = value[0]-'0';
+	if (len == 2) {
+		if (!i_isdigit(value[1]))
 			return FALSE;
-		tm.tm_mday = (tm.tm_mday * 10) + (tok->ptr[1]-'0');
+		tm.tm_mday = (tm.tm_mday * 10) + (value[1]-'0');
 	}

 	/* month name */
-	tok = next_token(&tokens);
-	if (tok == NULL || tok->token != 'A' || tok->len != 3)
+	token = next_token(ctx, &value, &len);
+	if (token != 'A' || len != 3)
 		return FALSE;

 	for (i = 0; i < 12; i++) {
-		if (strncasecmp(month_names[i], tok->ptr, 3) == 0) {
+		if (strncasecmp(month_names[i], value, 3) == 0) {
 			tm.tm_mon = i;
 			break;
 		}
@@ -152,18 +146,17 @@
 		return FALSE;

 	/* [yy]yy */
-	tok = next_token(&tokens);
-	if (tok == NULL || tok->token != 'A' ||
-	    (tok->len != 2 && tok->len != 4))
+	token = next_token(ctx, &value, &len);
+	if (token != 'A' || (len != 2 && len != 4))
 		return FALSE;

-	for (i = 0; i < tok->len; i++) {
-		if (!i_isdigit(tok->ptr[i]))
+	for (i = 0; i < len; i++) {
+		if (!i_isdigit(value[i]))
 			return FALSE;
-		tm.tm_year = tm.tm_year * 10 + (tok->ptr[i]-'0');
+		tm.tm_year = tm.tm_year * 10 + (value[i]-'0');
 	}

-	if (tok->len == 2) {
+	if (len == 2) {
 		/* two digit year, assume 1970+ */
 		if (tm.tm_year < 70)
 			tm.tm_year += 100;
@@ -174,36 +167,36 @@
 	}

 	/* hh */
-	tok = next_token(&tokens);
-	if (tok == NULL || tok->token != 'A' || tok->len != 2 ||
-	    !i_isdigit(tok->ptr[0]) || !i_isdigit(tok->ptr[1]))
+	token = next_token(ctx, &value, &len);
+	if (token != 'A' || len != 2 ||
+	    !i_isdigit(value[0]) || !i_isdigit(value[1]))
 		return FALSE;
-	tm.tm_hour = (tok->ptr[0]-'0') * 10 + (tok->ptr[1]-'0');
+	tm.tm_hour = (value[0]-'0') * 10 + (value[1]-'0');

 	/* :mm */
-	tok = next_token(&tokens);
-	if (tok == NULL || tok->token != ':')
+	token = next_token(ctx, &value, &len);
+	if (token != ':')
 		return FALSE;
-	tok = next_token(&tokens);
-	if (tok == NULL || tok->token != 'A' || tok->len != 2 ||
-	    !i_isdigit(tok->ptr[0]) || !i_isdigit(tok->ptr[1]))
+	token = next_token(ctx, &value, &len);
+	if (token != 'A' || len != 2 ||
+	    !i_isdigit(value[0]) || !i_isdigit(value[1]))
 		return FALSE;
-	tm.tm_min = (tok->ptr[0]-'0') * 10 + (tok->ptr[1]-'0');
+	tm.tm_min = (value[0]-'0') * 10 + (value[1]-'0');

 	/* [:ss] */
-	tok = next_token(&tokens);
-	if (tok != NULL && tok->token == ':') {
-		tok = next_token(&tokens);
-		if (tok == NULL || tok->token != 'A' || tok->len != 2 ||
-		    !i_isdigit(tok->ptr[0]) || !i_isdigit(tok->ptr[1]))
+	token = next_token(ctx, &value, &len);
+	if (token == ':') {
+		token = next_token(ctx, &value, &len);
+		if (token != 'A' || len != 2 ||
+		    !i_isdigit(value[0]) || !i_isdigit(value[1]))
 			return FALSE;
-		tm.tm_sec = (tok->ptr[0]-'0') * 10 + (tok->ptr[1]-'0');
+		tm.tm_sec = (value[0]-'0') * 10 + (value[1]-'0');
 	}

 	/* timezone */
-	if (tok == NULL || tok->token != 'A')
+	if (token != 'A')
 		return FALSE;
-	*timezone_offset = parse_timezone(tok->ptr, tok->len);
+	*timezone_offset = parse_timezone(value, len);

 	tm.tm_isdst = -1;
 	*time = utc_mktime(&tm);
@@ -215,6 +208,21 @@
 	return TRUE;
 }

+int rfc822_parse_date(const char *data, time_t *time, int *timezone_offset)
+{
+	Rfc822TokenizeContext *ctx;
+	int ret;
+
+	if (data == NULL || *data == '\0')
+		return FALSE;
+
+	ctx = rfc822_tokenize_init(data, (size_t)-1, NULL, NULL);
+	ret = rfc822_parse_date_tokens(ctx, time, timezone_offset);
+	rfc822_tokenize_deinit(ctx);
+
+	return ret;
+}
+
 const char *rfc822_to_date(time_t time)
 {
 	struct tm *tm;
--- a/src/lib-mail/rfc822-date.h	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/rfc822-date.h	Fri Jan 03 17:57:12 2003 +0200
@@ -3,7 +3,7 @@

 /* Parses RFC822 date/time string. timezone_offset is filled with the
    timezone's difference to UTC in minutes. */
-int rfc822_parse_date(const char *str, time_t *time, int *timezone_offset);
+int rfc822_parse_date(const char *data, time_t *time, int *timezone_offset);

 /* Create RFC822 date/time string from given time in local timezone. */
 const char *rfc822_to_date(time_t time);
--- a/src/lib-mail/rfc822-tokenize.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/rfc822-tokenize.c	Fri Jan 03 17:57:12 2003 +0200
@@ -1,58 +1,95 @@
 /* Copyright (C) 2002 Timo Sirainen */

 #include "lib.h"
+#include "str.h"
+#include "strescape.h"
 #include "rfc822-tokenize.h"

-#define INITIAL_COUNT 4
+struct _Rfc822TokenizeContext {
+	const char *data;
+	size_t size;
+
+	Rfc822TokenizeErrorFunc error_func;
+	void *error_context;
+
+	int token;
+	size_t token_pos, token_len;
+	size_t parse_pos;
+
+	unsigned int skip_comments:1;
+	unsigned int dot_token:1;
+
+	unsigned int in_bracket:1;
+};

 #define PARSE_ERROR() \
 	STMT_START { \
-	if (error_func != NULL && \
-	    !error_func(str, (size_t) (p-str), '\0', context)) \
-		return NULL; \
+	if (ctx->error_func != NULL && \
+	    !ctx->error_func(data, i, '\0', ctx->error_context)) \
+		return FALSE; \
 	} STMT_END

 #define PARSE_ERROR_MISSING(c) \
 	STMT_START { \
-	if (error_func != NULL && \
-	    !error_func(str, (size_t) (p-str), c, context)) \
-		return NULL; \
+	if (ctx->error_func != NULL && \
+	    !ctx->error_func(data, i, c, ctx->error_context)) \
+		return FALSE; \
 	} STMT_END

-static Rfc822Token *alloc_token(Rfc822Token **tokens, int *pos, int type)
-{
-	Rfc822Token *token;

-	/* @UNSAFE */
-	if (*pos+1 >= INITIAL_COUNT)
-		*tokens = t_buffer_reget_type(*tokens, Rfc822Token, *pos + 2);
+Rfc822TokenizeContext *
+rfc822_tokenize_init(const char *data, size_t size,
+		     Rfc822TokenizeErrorFunc error_func, void *error_context)
+{
+	Rfc822TokenizeContext *ctx;
+
+	ctx = i_new(Rfc822TokenizeContext, 1);
+	ctx->data = data;
+	ctx->size = size;

-	token = (*tokens) + *pos;
-	(*pos)++;
+	ctx->error_func = error_func;
+	ctx->error_context = error_context;
+
+	ctx->skip_comments = TRUE;
+	ctx->dot_token = TRUE;

-	token->token = type;
-	token->ptr = NULL;
-	token->len = 0;
-	return token;
+	ctx->token = -1;
+	return ctx;
+}
+
+void rfc822_tokenize_deinit(Rfc822TokenizeContext *ctx)
+{
+	i_free(ctx);
 }

-const Rfc822Token *rfc822_tokenize(const char *str, int *tokens_count,
-				   Rfc822TokenizeErrorFunc error_func,
-				   void *context)
+void rfc822_tokenize_skip_comments(Rfc822TokenizeContext *ctx, int set)
 {
-	Rfc822Token *first_token, *token;
-	const char *p, *last_atom;
-	int level, in_bracket, pos;
+	ctx->skip_comments = set;
+}
+
+void rfc822_tokenize_dot_token(Rfc822TokenizeContext *ctx, int set)
+{
+	ctx->dot_token = set;
+}

-	first_token = t_buffer_get_type(Rfc822Token, INITIAL_COUNT);
-	pos = 0;
+int rfc822_tokenize_next(Rfc822TokenizeContext *ctx)
+{
+	int token, level, last_atom;
+	const char *data;
+	size_t i, size;
+
+	if (ctx->token == TOKEN_LAST)
+		return FALSE;

-	token = NULL;
-	last_atom = NULL;
+	data = ctx->data;
+	size = ctx->size;
+
+	ctx->token = TOKEN_LAST;

-	in_bracket = FALSE;
-	for (p = str; *p != '\0'; p++) {
-		switch (*p) {
+	last_atom = FALSE;
+	for (i = ctx->parse_pos; i < size && data[i] != '\0'; i++) {
+		token = -1;
+		switch (data[i]) {
 		case ' ':
 		case '\t':
 		case '\r':
@@ -60,6 +97,112 @@
 			/* skip whitespace */
 			break;

+		case '(':
+			/* (comment) - nesting is allowed */
+			if (last_atom)
+				break;
+
+			token = '(';
+			ctx->token_pos = ++i;
+
+			level = 1;
+			for (; i < size && data[i] != '\0'; i++) {
+				if (data[i] == '\\' &&
+				    i+1 < size && data[i+1] != '\0')
+					i++;
+				else if (data[i] == '(')
+					level++;
+				else if (data[i] == ')') {
+					if (--level == 0)
+						break;
+				}
+			}
+
+			if (level > 0)
+				PARSE_ERROR_MISSING(')');
+
+			ctx->token_len = (size_t) (i - ctx->token_pos);
+			break;
+
+		case '[':
+			/* domain literal - nesting isn't allowed */
+			if (last_atom)
+				break;
+
+			token = '[';
+			ctx->token_pos = ++i;
+
+			while (i < size && data[i] != '\0' && data[i] != ']') {
+				if (data[i] == '\\' &&
+				    i+1 < size && data[i+1] != '\0')
+					i++;
+				else if (data[i] == '[') {
+					/* nesting not allowed, but
+					   continue anyway */
+					PARSE_ERROR();
+				}
+
+				i++;
+			}
+
+			if (i == size || data[i] == '\0')
+				PARSE_ERROR_MISSING(']');
+
+			ctx->token_len = (size_t) (i - ctx->token_pos);
+			break;
+
+		case '"':
+			/* quoted string */
+			if (last_atom)
+				break;
+
+			token = '"';
+			ctx->token_pos = ++i;
+
+			while (i < size && data[i] != '\0' && data[i] != '"') {
+				if (data[i] == '\\' &&
+				    i+1 < size && data[i+1] != '\0')
+					i++;
+				i++;
+			}
+
+			if (i == size || data[i] == '\0')
+				PARSE_ERROR_MISSING('"');
+
+			ctx->token_len = (size_t) (i - ctx->token_pos);
+			break;
+
+		case '<':
+			if (last_atom)
+				break;
+
+			if (ctx->in_bracket) {
+				/* '<' cannot be nested */
+				PARSE_ERROR();
+			}
+
+			token = '<';
+			ctx->in_bracket = TRUE;
+			break;
+		case '>':
+			if (last_atom)
+				break;
+
+			if (!ctx->in_bracket) {
+				/* missing '<' */
+                                PARSE_ERROR();
+			}
+
+			token = '>';
+			ctx->in_bracket = FALSE;
+			break;
+
+		case ')':
+		case ']':
+		case '\\':
+			PARSE_ERROR();
+			/* fall through */
+
 		/* RFC822 specials: */
 		case '@':
 		case ',':
@@ -70,240 +213,134 @@
 		case '/':
 		case '?':
 		case '=':
-			token = alloc_token(&first_token, &pos, *p);
-			break;
-
-		case '(':
-			/* (comment) - nesting is allowed */
-			token = alloc_token(&first_token, &pos, '(');
-			token->ptr = ++p;
-
-			level = 1;
-			for (; *p != '\0'; p++) {
-				if (*p == '\\' && p[1] != '\0')
-					p++;
-				else if (*p == '(')
-					level++;
-				else if (*p == ')') {
-					if (--level == 0)
-						break;
-				}
-			}
-
-			if (level > 0)
-				PARSE_ERROR_MISSING(')');
-
-			token->len = (size_t) (p - token->ptr);
-			break;
-
-		case '[':
-			/* domain literal - nesting isn't allowed */
-			token = alloc_token(&first_token, &pos, '[');
-			token->ptr = ++p;
-
-			for (; *p != '\0' && *p != ']'; p++) {
-				if (*p == '\\' && p[1] != '\0')
-					p++;
-				else if (*p == '[') {
-					/* nesting not allowed, but
-					   continue anyway */
-					PARSE_ERROR();
-				}
-			}
-			token->len = (size_t) (p - token->ptr);
-
-			if (*p == '\0')
-				PARSE_ERROR_MISSING(']');
-			break;
-
-		case '"':
-			/* quoted string */
-			token = alloc_token(&first_token, &pos, '"');
-			token->ptr = ++p;
-
-			for (; *p != '\0' && *p != '"'; p++) {
-				if (*p == '\\' && p[1] != '\0')
-					p++;
-			}
-			token->len = (size_t) (p - token->ptr);
-
-			if (*p == '\0')
-				PARSE_ERROR_MISSING('"');
-			break;
-
-		case '<':
-			if (in_bracket) {
-				/* '<' cannot be nested */
-				PARSE_ERROR();
+			token = ctx->data[i];
+			if (token != '.' || ctx->dot_token)
 				break;
-			}
-
-			token = alloc_token(&first_token, &pos, '<');
-			in_bracket = TRUE;
-			break;
-		case '>':
-			if (!in_bracket) {
-				/* missing '<' */
-                                PARSE_ERROR();
-				break;
-			}
-
-			token = alloc_token(&first_token, &pos, '>');
-			in_bracket = FALSE;
-			break;
-
-		case ')':
-		case ']':
-		case '\\':
-                        PARSE_ERROR();
-			break;
+			/* fall through */
 		default:
 			/* atom */
-			if (last_atom != p-1) {
-				token = alloc_token(&first_token, &pos, 'A');
-				token->ptr = p;
+			token = 'A';
+			if (!last_atom) {
+				ctx->token = token;
+				ctx->token_pos = i;
+				last_atom = TRUE;
 			}
-
-			token->len++;
-			last_atom = p;
 			break;
 		}

-		if (*p == '\0')
-			break;
-	}
-
-	if (in_bracket && error_func != NULL) {
-		if (!error_func(str, (size_t) (p-str), '>', context))
-			return NULL;
-	}
-
-	if (tokens_count != NULL)
-		*tokens_count = pos;
-
-	/* @UNSAFE */
-	first_token[pos++].token = 0;
-	t_buffer_alloc(sizeof(Rfc822Token) * pos);
-	return first_token;
-}
-
-const char *rfc822_tokens_get_value(const Rfc822Token *tokens, int count)
-{
-	/* @UNSAFE */
-	char *buf;
-	size_t i, len, buf_size;
-	int last_atom;
-
-	if (count <= 0)
-		return "";
-
-	buf_size = 256;
-	buf = t_buffer_get(buf_size);
-
-	len = 0; last_atom = FALSE;
-	for (; count > 0; count--, tokens++) {
-		if (tokens->token == '(')
-			continue; /* skip comments */
-
-		/* +4 == ' ' '[' ']' '\0' */
-		if (len + tokens->len+4 >= buf_size) {
-			buf_size = nearest_power(buf_size + tokens->len + 3);
-			buf = t_buffer_reget(buf, buf_size);
+		if (last_atom) {
+			if (token != 'A') {
+				/* end of atom */
+				ctx->token_len = (size_t) (i - ctx->token_pos);
+				last_atom = FALSE;
+				break;
+			}
+		} else {
+			if (token != -1) {
+				ctx->token = token;
+				if (i < ctx->size && data[i] != '\0')
+					i++;
+				break;
+			}
 		}

-		switch (tokens->token) {
-		case '"':
-		case '[':
-			if (tokens->token == '[')
-				buf[len++] = '[';
-
-			/* copy the string removing '\' chars */
-			for (i = 0; i < tokens->len; i++) {
-				if (tokens->ptr[i] == '\\' && i+1 < tokens->len)
-					i++;
-
-				buf[len++] = tokens->ptr[i];
-			}
-
-			if (tokens->token == '[')
-				buf[len++] = ']';
-			break;
-		case 'A':
-			if (last_atom)
-				buf[len++] = ' ';
-
-			memcpy(buf+len, tokens->ptr, tokens->len);
-			len += tokens->len;
-			break;
-		default:
-			i_assert(tokens->token != 0);
-			buf[len++] = (char) tokens->token;
+		if (i == ctx->size || data[i] == '\0') {
+			/* unexpected eol */
 			break;
 		}
+	}

-		last_atom = tokens->token == 'A';
+	if (last_atom) {
+		/* end of atom */
+		ctx->token_len = (size_t) (i - ctx->token_pos);
+	}
+
+	ctx->parse_pos = i;
+
+	if (ctx->token == TOKEN_LAST && ctx->in_bracket &&
+	    ctx->error_func != NULL) {
+		if (!ctx->error_func(data, i, '>', ctx->error_context))
+			return FALSE;
 	}

-	buf[len++] = '\0';
-        t_buffer_alloc(len);
-	return buf;
+	return TRUE;
+}
+
+Rfc822Token rfc822_tokenize_get(const Rfc822TokenizeContext *ctx)
+{
+	return ctx->token;
+}
+
+const char *rfc822_tokenize_get_value(const Rfc822TokenizeContext *ctx,
+				      size_t *len)
+{
+	i_assert(IS_TOKEN_STRING(ctx->token));
+
+	*len = ctx->token_len;
+	return ctx->data + ctx->token_pos;
 }

-const char *rfc822_tokens_get_value_quoted(const Rfc822Token *tokens,
-					   int count)
+int rfc822_tokenize_get_string(Rfc822TokenizeContext *ctx,
+			       String *str, String *comments,
+			       const Rfc822Token *stop_tokens)
 {
-	/* @UNSAFE */
-	char *buf;
-	size_t len, buf_size;
-	int last_atom;
+	Rfc822Token token;
+	const char *value;
+	size_t len;
+	int i, token_str, last_str;

-	if (count <= 0)
-		return "\"\"";
+	last_str = FALSE;
+	while (rfc822_tokenize_next(ctx)) {
+		token = rfc822_tokenize_get(ctx);
+		if (token == TOKEN_LAST)
+			return TRUE;

-	buf_size = 256;
-	buf = t_buffer_get(buf_size);
-	buf[0] = '"'; len = 1; last_atom = FALSE;
+		for (i = 0; stop_tokens[i] != TOKEN_LAST; i++)
+			if (token == stop_tokens[i])
+				return TRUE;

-	for (; count > 0; count--, tokens++) {
-		if (tokens->token == '(')
-			continue; /* skip comments */
+		if (token == TOKEN_COMMENT) {
+			/* handle comment specially */
+			if (comments != NULL) {
+				if (str_len(comments) > 0)
+					str_append_c(comments, ' ');

-		/* +5 == ' ' '[' ']' '"' '\0' */
-		if (len + tokens->len+5 >= buf_size) {
-			buf_size = nearest_power(buf_size + tokens->len + 3);
-			buf = t_buffer_reget(buf, buf_size);
+				value = rfc822_tokenize_get_value(ctx, &len);
+				str_append_unescaped(comments, value, len);
+			}
+			continue;
 		}

-		switch (tokens->token) {
-		case '"':
-		case '[':
-			if (tokens->token == '[')
-				buf[len++] = '[';
+		token_str = token == TOKEN_ATOM || token == TOKEN_QSTRING ||
+			token == TOKEN_DLITERAL || token == TOKEN_COMMENT;

-			memcpy(buf+len, tokens->ptr, tokens->len);
-			len += tokens->len;
+		if (!token_str)
+			str_append_c(str, token);
+		else if (token == TOKEN_QSTRING) {
+			/* unescape only quoted strings, since we're removing
+			   the quotes. for domain literals I don't see much
+			   point in unescaping if [] is still kept.. */
+			if (last_str)
+				str_append_c(str, ' ');

-			if (tokens->token == '[')
-				buf[len++] = ']';
-			break;
-		case 'A':
-			if (last_atom)
-				buf[len++] = ' ';
+			value = rfc822_tokenize_get_value(ctx, &len);
+			str_append_unescaped(str, value, len);
+		} else {
+			if (last_str)
+				str_append_c(str, ' ');

-			memcpy(buf+len, tokens->ptr, tokens->len);
-			len += tokens->len;
-			break;
-		default:
-			i_assert(tokens->token != 0);
-			buf[len++] = (char) tokens->token;
-			break;
+			if (token == TOKEN_DLITERAL)
+				str_append_c(str, '[');
+
+			value = rfc822_tokenize_get_value(ctx, &len);
+			str_append_n(str, value, len);
+
+			if (token == TOKEN_DLITERAL)
+				str_append_c(str, ']');
 		}

-		last_atom = tokens->token == 'A';
+		last_str = token_str;
 	}

-	buf[len++] = '"';
-	buf[len++] = '\0';
-        t_buffer_alloc(len);
-	return buf;
+	return FALSE;
 }
--- a/src/lib-mail/rfc822-tokenize.h	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-mail/rfc822-tokenize.h	Fri Jan 03 17:57:12 2003 +0200
@@ -1,38 +1,29 @@
 #ifndef __RFC822_TOKENIZE_H
 #define __RFC822_TOKENIZE_H

-typedef struct _Rfc822Token Rfc822Token;
-
 #define IS_TOKEN_STRING(token) \
-	((token) == 'A' || (token) == '"' || (token) == '(' || (token) == '[')
-
-#define IS_LWSP(c) \
-	((c) == ' ' || (c) == '\t')
+	((token) == TOKEN_ATOM || (token) == TOKEN_QSTRING || \
+	 (token) == TOKEN_COMMENT || (token) == TOKEN_DLITERAL)

-struct _Rfc822Token {
-	/*
-	   0   = last token
-	   'A' = atom
-	   '"' = quoted string
-	   '(' = comment
-	   '[' = domain literal
+typedef enum {
+	TOKEN_ATOM	= 'A',
+	TOKEN_QSTRING	= '"',
+	TOKEN_COMMENT	= '(',
+	TOKEN_DLITERAL	= '[',

-	   RFC822 specials:
+	/* RFC822 specials:

-	   '<', '>', '@', ',', ';', ':', '\', '.'
+	   '<', '>', '@', ',', ';', ':', '\'
+	   '.' (optional)

 	   RFC2045 tspecials:

-	   '/', '?', '='
-	*/
-	int token;
+	   '/', '?', '=' */

-        /* - not including enclosing "", () or []
-	   - '\' isn't expanded
-	   - [CR+]LF+LWSP (continued header) isn't removed */
-	const char *ptr;
-	size_t len;
-};
+	TOKEN_LAST	= 0
+} Rfc822Token;
+
+typedef struct _Rfc822TokenizeContext Rfc822TokenizeContext;

 /* Parsing is aborted if returns FALSE. There's two kinds of errors:

@@ -44,15 +35,36 @@
 /* Tokenize the string. Returns NULL if string is empty. Memory for
    returned array is allocated from data stack. You don't have to use
    the tokens_count, since last token is always 0. */
-const Rfc822Token *rfc822_tokenize(const char *str, int *tokens_count,
-				   Rfc822TokenizeErrorFunc error_func,
-				   void *context);
+Rfc822TokenizeContext *
+rfc822_tokenize_init(const char *data, size_t size,
+		     Rfc822TokenizeErrorFunc error_func, void *error_context);
+void rfc822_tokenize_deinit(Rfc822TokenizeContext *ctx);
+
+/* Specify whether comments should be silently skipped (default yes). */
+void rfc822_tokenize_skip_comments(Rfc822TokenizeContext *ctx, int set);
+/* Specify whether '.' should be treated as a separate token (default yes). */
+void rfc822_tokenize_dot_token(Rfc822TokenizeContext *ctx, int set);
+
+/* Parse the next token. Returns FALSE if parsing error occured and error
+   function wanted to abort. It's not required to check the return value,
+   rfc822_tokenize_get() will return TOKEN_LAST after errors. Returns FALSE
+   also when last token was already read. */
+int rfc822_tokenize_next(Rfc822TokenizeContext *ctx);

-/* Returns the tokens as a string. Tokens are merged together, except
-   spaces are added between atoms. */
-const char *rfc822_tokens_get_value(const Rfc822Token *tokens, int count);
-/* Returns the tokens as a "string". */
-const char *rfc822_tokens_get_value_quoted(const Rfc822Token *tokens,
-					   int count);
+/* Return the next token. */
+Rfc822Token rfc822_tokenize_get(const Rfc822TokenizeContext *ctx);
+
+/* - not including enclosing "", () or []
+   - '\' isn't expanded
+   - [CR+]LF+LWSP (continued header) isn't removed */
+const char *rfc822_tokenize_get_value(const Rfc822TokenizeContext *ctx,
+				      size_t *len);
+
+/* Return tokens as a string, all quoted strings will be unquoted.
+   Reads until stop_token is found. Returns FALSE if rfc822_tokenize_next()
+   failed. */
+int rfc822_tokenize_get_string(Rfc822TokenizeContext *ctx,
+			       String *str, String *comments,
+			       const Rfc822Token *stop_tokens);

 #endif
--- a/src/lib-storage/index/index-fetch-section.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-storage/index/index-fetch-section.c	Fri Jan 03 17:57:12 2003 +0200
@@ -4,7 +4,6 @@
 #include "str.h"
 #include "istream.h"
 #include "ostream.h"
-#include "rfc822-tokenize.h"
 #include "message-send.h"
 #include "index-storage.h"
 #include "index-fetch.h"
--- a/src/lib-storage/index/index-search.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib-storage/index/index-search.c	Fri Jan 03 17:57:12 2003 +0200
@@ -4,7 +4,6 @@
 #include "istream.h"
 #include "ostream.h"
 #include "mmap-util.h"
-#include "rfc822-tokenize.h"
 #include "rfc822-date.h"
 #include "message-size.h"
 #include "message-body-search.h"
--- a/src/lib/Makefile.am	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib/Makefile.am	Fri Jan 03 17:57:12 2003 +0200
@@ -44,8 +44,9 @@
 	safe-memset.c \
 	safe-mkdir.c \
 	sendfile-util.c \
+	str.c \
+	strescape.c \
 	strfuncs.c \
-	str.c \
 	unlink-directory.c \
 	unlink-lockfiles.c \
 	utc-offset.c \
@@ -91,8 +92,9 @@
 	safe-memset.h \
 	safe-mkdir.h \
 	sendfile-util.h \
+	str.h \
+	strescape.h \
 	strfuncs.h \
-	str.h \
 	unlink-directory.h \
 	unlink-lockfiles.h \
 	utc-offset.h \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/strescape.c	Fri Jan 03 17:57:12 2003 +0200
@@ -0,0 +1,90 @@
+/*
+    Copyright (c) 2003 Timo Sirainen
+
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal in the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+
+    The above copyright notice and this permission notice shall be
+    included in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include "lib.h"
+#include "str.h"
+#include "strescape.h"
+
+const char *str_escape(const char *str)
+{
+	char *ret, *p;
+	size_t i, esc;
+
+	/* get length of string and number of chars to escape */
+	esc = 0;
+	for (i = 0; str[i] != '\0'; i++) {
+		if (IS_ESCAPED_CHAR(str[i]))
+			esc++;
+	}
+
+	if (esc == 0)
+		return str;
+
+	/* @UNSAFE: escape them */
+	p = ret = t_malloc(i + esc + 1);
+	for (; *str != '\0'; str++) {
+		if (IS_ESCAPED_CHAR(*str))
+			*p++ = '\\';
+		*p++ = *str;
+	}
+	*p = '\0';
+	return ret;
+}
+
+void str_append_unescaped(String *dest, const char *src, size_t src_size)
+{
+	size_t start = 0, i = 0;
+
+	while (i < src_size) {
+		start = i;
+		for (; i < src_size; i++) {
+			if (src[i] == '\\')
+				break;
+		}
+
+		str_append_n(dest, src + start, i-start);
+
+		if (src[i] == '\\')
+			i++;
+		start = i;
+	}
+}
+
+void str_unescape(char *str)
+{
+	/* @UNSAFE */
+	char *dest;
+
+	while (*str != '\\') {
+		if (*str == '\0')
+			return;
+		str++;
+	}
+
+	for (dest = str; *str != '\0'; str++) {
+		if (*str != '\\' || str[1] == '\0')
+			*dest++ = *str;
+	}
+
+	*dest = '\0';
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/strescape.h	Fri Jan 03 17:57:12 2003 +0200
@@ -0,0 +1,15 @@
+#ifndef __STRESCAPE_H
+#define __STRESCAPE_H
+
+#define IS_ESCAPED_CHAR(c) ((c) == '"' || (c) == '\\')
+
+/* escape all '\' and '"' characters */
+const char *str_escape(const char *str);
+
+/* remove all '\' characters, append to given string */
+void str_append_unescaped(String *dest, const char *src, size_t src_size);
+
+/* remove all '\' characters */
+void str_unescape(char *str);
+
+#endif
--- a/src/lib/strfuncs.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib/strfuncs.c	Fri Jan 03 17:57:12 2003 +0200
@@ -462,18 +462,6 @@
         return str;
 }

-void str_remove_escapes(char *str)
-{
-	char *dest;
-
-	for (dest = str; *str != '\0'; str++) {
-		if (*str != '\\' || str[1] == '\0')
-			*dest++ = *str;
-	}
-
-	*dest = '\0';
-}
-
 const char **t_strsplit(const char *data, const char *separators)
 {
         const char **array;
--- a/src/lib/strfuncs.h	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/lib/strfuncs.h	Fri Jan 03 17:57:12 2003 +0200
@@ -49,7 +49,6 @@

 char *str_ucase(char *str);
 char *str_lcase(char *str);
-void str_remove_escapes(char *str);

 /* seprators is an array of separator characters, not a separator string. */
 const char **t_strsplit(const char *data, const char *separators);
--- a/src/login/client.c	Fri Jan 03 07:36:20 2003 +0200
+++ b/src/login/client.c	Fri Jan 03 17:57:12 2003 +0200
@@ -8,6 +8,7 @@
 #include "ostream.h"
 #include "process-title.h"
 #include "safe-memset.h"
+#include "strescape.h"
 #include "client.h"
 #include "client-authenticate.h"
 #include "ssl-proxy.h"
@@ -157,7 +158,7 @@

 		if (*line == '"')
 			*line++ = '\0';
-		str_remove_escapes(start);
+		str_unescape(start);
 	} else {
 		start = line;
 		while (*line != '\0' && *line != ' ')