changeset 5522:5dee807e53cf HEAD

Header parser has now flags parameter to tell it how to handle linefeeds. Changed message parser's boolean parameter to enum as well.
author Timo Sirainen <tss@iki.fi>
date Wed, 04 Apr 2007 12:27:30 +0300
parents baf3367d2450
children 2d601735ef93
files src/lib-mail/istream-header-filter.c src/lib-mail/message-body-search.c src/lib-mail/message-body-search.h src/lib-mail/message-decoder.c src/lib-mail/message-header-parser.c src/lib-mail/message-header-parser.h src/lib-mail/message-parser.c src/lib-mail/message-parser.h src/lib-storage/index/index-mail-headers.c src/lib-storage/index/index-mail.c src/lib-storage/index/index-search.c src/lib-storage/index/mbox/mbox-sync-parse.c src/plugins/fts/fts-storage.c
diffstat 13 files changed, 105 insertions(+), 50 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-mail/istream-header-filter.c	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-mail/istream-header-filter.c	Wed Apr 04 12:27:30 2007 +0300
@@ -79,7 +79,7 @@
 
 	if (mstream->hdr_ctx == NULL) {
 		mstream->hdr_ctx =
-			message_parse_header_init(mstream->input, NULL, FALSE);
+			message_parse_header_init(mstream->input, NULL, 0);
 	}
 
 	buffer_copy(mstream->hdr_buf, 0,
--- a/src/lib-mail/message-body-search.c	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-mail/message-body-search.c	Wed Apr 04 12:27:30 2007 +0300
@@ -161,6 +161,8 @@
 			struct istream *input,
 			const struct message_part *parts)
 {
+	const enum message_header_parser_flags hdr_parser_flags =
+		MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE;
 	struct message_parser_ctx *parser_ctx;
 	struct message_block raw_block, block;
 	int ret = 0;
@@ -171,7 +173,7 @@
 
 	parser_ctx =
 		message_parser_init_from_parts((struct message_part *)parts,
-					       input, TRUE);
+					       input, hdr_parser_flags, 0);
 
 	while ((ret = message_parser_parse_next_block(parser_ctx,
 						      &raw_block)) > 0) {
--- a/src/lib-mail/message-body-search.h	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-mail/message-body-search.h	Wed Apr 04 12:27:30 2007 +0300
@@ -5,8 +5,7 @@
 struct message_body_search_context;
 
 /* Returns 1 if ok, 0 if unknown charset, -1 if key contains invalid characters
-   in given charset. If charset is NULL, the key isn't assumed to be in any
-   specific charset but is compared to message data without any translation. */
+   in given charset. */
 int message_body_search_init(pool_t pool, const char *key, const char *charset,
 			     bool search_header,
 			     struct message_body_search_context **ctx_r);
--- a/src/lib-mail/message-decoder.c	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-mail/message-decoder.c	Wed Apr 04 12:27:30 2007 +0300
@@ -78,7 +78,7 @@
 	bool unknown_charset;
 
 	if (charset == NULL || charset_is_utf8(charset)) {
-		/* ASCII */
+		/* ASCII / UTF-8 */
 		_charset_utf8_ucase(data, size, ctx->buf, ctx->buf->used);
 		return TRUE;
 	}
--- a/src/lib-mail/message-header-parser.c	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-mail/message-header-parser.c	Wed Apr 04 12:27:30 2007 +0300
@@ -17,14 +17,14 @@
 	buffer_t *value_buf;
 	size_t skip;
 
-	unsigned int skip_initial_lwsp:1;
+	enum message_header_parser_flags flags;
 	unsigned int skip_line:1;
 	unsigned int has_nuls:1;
 };
 
 struct message_header_parser_ctx *
 message_parse_header_init(struct istream *input, struct message_size *hdr_size,
-			  bool skip_initial_lwsp)
+			  enum message_header_parser_flags flags)
 {
 	struct message_header_parser_ctx *ctx;
 
@@ -32,7 +32,7 @@
 	ctx->input = input;
 	ctx->hdr_size = hdr_size;
 	ctx->name = str_new(default_pool, 128);
-	ctx->skip_initial_lwsp = skip_initial_lwsp;
+	ctx->flags = flags;
 
 	if (hdr_size != NULL)
 		memset(hdr_size, 0, sizeof(*hdr_size));
@@ -72,8 +72,10 @@
 
 	startpos = 0; colon_pos = UINT_MAX;
 
-	last_crlf = line->crlf_newline;
-	last_no_newline = line->no_newline;
+	last_crlf = line->crlf_newline &&
+		(ctx->flags & MESSAGE_HEADER_PARSER_FLAG_DROP_CR) == 0;
+	last_no_newline = line->no_newline ||
+		(ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) != 0;
 	line->no_newline = FALSE;
 	line->crlf_newline = FALSE;
 
@@ -284,7 +286,7 @@
 
 		line->value = msg + colon_pos+1;
 		line->value_len = size - colon_pos - 1;
-		if (ctx->skip_initial_lwsp) {
+		if (ctx->flags & MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP) {
 			/* get value. skip all LWSP after ':'. Note that
 			   RFC2822 doesn't say we should, but history behind
 			   it..
@@ -341,7 +343,17 @@
 				buffer_append_c(ctx->value_buf, '\r');
 			buffer_append_c(ctx->value_buf, '\n');
 		}
-		buffer_append(ctx->value_buf, line->value, line->value_len);
+		if ((ctx->flags & MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE) &&
+		    line->value_len > 0 && line->value[0] != ' ') {
+			i_assert(IS_LWSP(line->value[0]));
+
+			buffer_append_c(ctx->value_buf, ' ');
+			buffer_append(ctx->value_buf,
+				      line->value + 1, line->value_len - 1);
+		} else {
+			buffer_append(ctx->value_buf,
+				      line->value, line->value_len);
+		}
 		line->full_value = buffer_get_data(ctx->value_buf,
 						   &line->full_value_len);
 	} else {
@@ -369,13 +381,14 @@
 
 #undef message_parse_header
 void message_parse_header(struct istream *input, struct message_size *hdr_size,
+			  enum message_header_parser_flags flags,
 			  message_header_callback_t *callback, void *context)
 {
 	struct message_header_parser_ctx *hdr_ctx;
 	struct message_header_line *hdr;
 	int ret;
 
-	hdr_ctx = message_parse_header_init(input, hdr_size, TRUE);
+	hdr_ctx = message_parse_header_init(input, hdr_size, flags);
 	while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0)
 		callback(hdr, context);
 	i_assert(ret != 0);
--- a/src/lib-mail/message-header-parser.h	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-mail/message-header-parser.h	Wed Apr 04 12:27:30 2007 +0300
@@ -7,6 +7,15 @@
 struct message_size;
 struct message_header_parser_ctx;
 
+enum message_header_parser_flags {
+	/* Don't add LWSP after "header: " to value. */
+	MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP	= 0x01,
+	/* Don't add CRs to full_value even if input had them */
+	MESSAGE_HEADER_PARSER_FLAG_DROP_CR		= 0x02,
+	/* Convert [CR+]LF+LWSP to a space character */
+	MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE	= 0x04
+};
+
 struct message_header_line {
 	const char *name;
 	size_t name_len;
@@ -34,13 +43,9 @@
 typedef void message_header_callback_t(struct message_header_line *hdr,
 				       void *context);
 
-/* skip_initial_lwsp controls if we should skip LWSP after "header: ".
-   Note that there may not be the single whitespace after "header:", and that
-   "header : " is also possible. These two conditions can't be determined from
-   struct message_header_line. */
 struct message_header_parser_ctx *
 message_parse_header_init(struct istream *input, struct message_size *hdr_size,
-			  bool skip_initial_lwsp);
+			  enum message_header_parser_flags flags);
 void message_parse_header_deinit(struct message_header_parser_ctx **ctx);
 
 /* Read and return next header line. Returns 1 if header is returned, 0 if
@@ -54,15 +59,16 @@
 
 /* Read and parse the header from the given stream. */
 void message_parse_header(struct istream *input, struct message_size *hdr_size,
+			  enum message_header_parser_flags flags,
 			  message_header_callback_t *callback, void *context);
 #ifdef CONTEXT_TYPE_SAFETY
-#  define message_parse_header(input, hdr_size, callback, context) \
+#  define message_parse_header(input, hdr_size, flags, callback, context) \
 	({(void)(1 ? 0 : callback((struct message_header_line *)0, context)); \
-	  message_parse_header(input, hdr_size, \
+	  message_parse_header(input, hdr_size, flags, \
 		(message_header_callback_t *)callback, context); })
 #else
-#  define message_parse_header(input, hdr_size, callback, context) \
-	  message_parse_header(input, hdr_size, \
+#  define message_parse_header(input, hdr_size, flags, callback, context) \
+	  message_parse_header(input, hdr_size, flags, \
 		(message_header_callback_t *)callback, context)
 #endif
 
--- a/src/lib-mail/message-parser.c	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-mail/message-parser.c	Wed Apr 04 12:27:30 2007 +0300
@@ -25,6 +25,9 @@
 	struct istream *input;
 	struct message_part *parts, *part;
 
+	enum message_header_parser_flags hdr_flags;
+	enum message_parser_flags flags;
+
 	const char *last_boundary;
 	struct message_boundary *boundaries;
 
@@ -37,7 +40,6 @@
 	int (*parse_next_block)(struct message_parser_ctx *ctx,
 				struct message_block *block_r);
 
-	unsigned int return_body_blocks:1;
 	unsigned int part_seen_content_type:1;
 };
 
@@ -534,7 +536,7 @@
 
 	ctx->hdr_parser_ctx =
 		message_parse_header_init(ctx->input, &ctx->part->header_size,
-					  TRUE);
+					  ctx->hdr_flags);
 	ctx->part_seen_content_type = FALSE;
 
 	ctx->parse_next_block = parse_next_header;
@@ -610,7 +612,7 @@
 	if (ctx->part->children != NULL) {
 		ctx->parse_next_block = preparsed_parse_next_header_init;
 		ctx->part = ctx->part->children;
-	} else if (ctx->return_body_blocks) {
+	} else if ((ctx->flags & MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK) == 0) {
 		ctx->parse_next_block = preparsed_parse_body_init;
 	} else {
 		preparsed_skip_to_next(ctx);
@@ -652,14 +654,17 @@
 	i_stream_skip(ctx->input, ctx->part->physical_pos -
 		      ctx->input->v_offset);
 
-	ctx->hdr_parser_ctx = message_parse_header_init(ctx->input, NULL, TRUE);
+	ctx->hdr_parser_ctx =
+		message_parse_header_init(ctx->input, NULL, ctx->hdr_flags);
 
 	ctx->parse_next_block = preparsed_parse_next_header;
 	return preparsed_parse_next_header(ctx, block_r);
 }
 
 struct message_parser_ctx *
-message_parser_init(pool_t part_pool, struct istream *input)
+message_parser_init(pool_t part_pool, struct istream *input,
+		    enum message_header_parser_flags hdr_flags,
+		    enum message_parser_flags flags)
 {
 	struct message_parser_ctx *ctx;
 	pool_t pool;
@@ -668,6 +673,8 @@
 	ctx = p_new(pool, struct message_parser_ctx, 1);
 	ctx->parser_pool = pool;
 	ctx->part_pool = part_pool;
+	ctx->hdr_flags = hdr_flags;
+	ctx->flags = flags;
 	ctx->input = input;
 	ctx->parts = ctx->part = part_pool == NULL ? NULL :
 		p_new(part_pool, struct message_part, 1);
@@ -677,12 +684,13 @@
 
 struct message_parser_ctx *
 message_parser_init_from_parts(struct message_part *parts,
-			       struct istream *input, bool return_body_blocks)
+			       struct istream *input,
+			       enum message_header_parser_flags hdr_flags,
+			       enum message_parser_flags flags)
 {
 	struct message_parser_ctx *ctx;
 
-	ctx = message_parser_init(NULL, input);
-	ctx->return_body_blocks = return_body_blocks;
+	ctx = message_parser_init(NULL, input, hdr_flags, flags);
 	ctx->parts = ctx->part = parts;
 	ctx->parse_next_block = preparsed_parse_next_header_init;
 	return ctx;
--- a/src/lib-mail/message-parser.h	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-mail/message-parser.h	Wed Apr 04 12:27:30 2007 +0300
@@ -4,6 +4,10 @@
 #include "message-header-parser.h"
 #include "message-size.h"
 
+enum message_parser_flags {
+	MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK	= 0x01
+};
+
 /* Note that these flags are used directly by message-parser-serialize, so
    existing flags can't be changed without breaking backwards compatibility */
 enum message_part_flags {
@@ -60,11 +64,15 @@
 /* Initialize message parser. part_spool specifies where struct message_parts
    are allocated from. */
 struct message_parser_ctx *
-message_parser_init(pool_t part_pool, struct istream *input);
+message_parser_init(pool_t part_pool, struct istream *input,
+		    enum message_header_parser_flags hdr_flags,
+		    enum message_parser_flags flags);
 /* Use preparsed parts to speed up parsing. */
 struct message_parser_ctx *
 message_parser_init_from_parts(struct message_part *parts,
-			       struct istream *input, bool return_body_blocks);
+			       struct istream *input,
+			       enum message_header_parser_flags hdr_flags,
+			       enum message_parser_flags flags);
 struct message_part *message_parser_deinit(struct message_parser_ctx **ctx);
 
 /* Read the next block of a message. Returns 1 if block is returned, 0 if
--- a/src/lib-storage/index/index-mail-headers.c	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-storage/index/index-mail-headers.c	Wed Apr 04 12:27:30 2007 +0300
@@ -24,6 +24,12 @@
 	const char **name;
 };
 
+static const enum message_header_parser_flags hdr_parser_flags =
+	MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP |
+	MESSAGE_HEADER_PARSER_FLAG_DROP_CR;
+static const enum message_parser_flags msg_parser_flags =
+	MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK;
+
 static int header_line_cmp(const void *p1, const void *p2)
 {
 	const struct index_mail_line *l1 = p1, *l2 = p2;
@@ -346,6 +352,18 @@
 	index_mail_parse_header(mail->data.parts, hdr, mail);
 }
 
+void index_mail_cache_parse_init(struct mail *_mail, struct istream *input)
+{
+	struct index_mail *mail = (struct index_mail *)_mail;
+
+	i_assert(mail->data.parser_ctx == NULL);
+
+	index_mail_parse_header_init(mail, NULL);
+	mail->data.parser_ctx =
+		message_parser_init(mail->data_pool, input,
+				    hdr_parser_flags, msg_parser_flags);
+}
+
 static void index_mail_init_parser(struct index_mail *mail)
 {
 	struct index_mail_data *data = &mail->data;
@@ -354,12 +372,16 @@
 		(void)message_parser_deinit(&data->parser_ctx);
 
 	if (data->parts == NULL) {
-		data->parser_ctx =
-			message_parser_init(mail->data_pool, data->stream);
+		data->parser_ctx = message_parser_init(mail->data_pool,
+						       data->stream,
+						       hdr_parser_flags,
+						       msg_parser_flags);
 	} else {
 		data->parser_ctx =
 			message_parser_init_from_parts(data->parts,
-						       data->stream, FALSE);
+						       data->stream,
+						       hdr_parser_flags,
+						       msg_parser_flags);
 	}
 }
 
@@ -387,6 +409,7 @@
 	} else {
 		/* just read the header */
 		message_parse_header(data->stream, &data->hdr_size,
+				     hdr_parser_flags,
 				     index_mail_parse_header_cb, mail);
 	}
 	data->hdr_size_set = TRUE;
@@ -423,7 +446,7 @@
 	if (mail->data.envelope == NULL && stream != NULL) {
 		/* we got the headers from cache - parse them to get the
 		   envelope */
-		message_parse_header(stream, NULL,
+		message_parse_header(stream, NULL, hdr_parser_flags,
 				     imap_envelope_parse_callback, mail);
 		mail->data.save_envelope = FALSE;
 	}
--- a/src/lib-storage/index/index-mail.c	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-storage/index/index-mail.c	Wed Apr 04 12:27:30 2007 +0300
@@ -1050,16 +1050,6 @@
 	pool_unref(mail->mail.pool);
 }
 
-void index_mail_cache_parse_init(struct mail *_mail, struct istream *input)
-{
-	struct index_mail *mail = (struct index_mail *)_mail;
-
-	i_assert(mail->data.parser_ctx == NULL);
-
-	index_mail_parse_header_init(mail, NULL);
-	mail->data.parser_ctx = message_parser_init(mail->data_pool, input);
-}
-
 void index_mail_cache_parse_continue(struct mail *_mail)
 {
 	struct index_mail *mail = (struct index_mail *)_mail;
--- a/src/lib-storage/index/index-search.c	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-storage/index/index-search.c	Wed Apr 04 12:27:30 2007 +0300
@@ -66,6 +66,9 @@
 	struct message_body_search_context *body_search_ctx;
 };
 
+static const enum message_header_parser_flags hdr_parser_flags =
+	MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE;
+
 static int search_parse_msgset_args(struct index_mailbox *ibox,
 				    const struct mail_index_header *hdr,
 				    struct mail_search_arg *args,
@@ -602,7 +605,8 @@
 
 		if (hdr_ctx.parse_headers)
 			index_mail_parse_header_init(ctx->imail, headers_ctx);
-		message_parse_header(input, NULL, search_header, &hdr_ctx);
+		message_parse_header(input, NULL, hdr_parser_flags,
+				     search_header, &hdr_ctx);
 		if (headers_ctx != NULL)
 			mailbox_header_lookup_deinit(&headers_ctx);
 	} else {
--- a/src/lib-storage/index/mbox/mbox-sync-parse.c	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/lib-storage/index/mbox/mbox-sync-parse.c	Wed Apr 04 12:27:30 2007 +0300
@@ -476,7 +476,7 @@
         mbox_md5_ctx = mbox_md5_init();
 
         line_start_pos = 0;
-	hdr_ctx = message_parse_header_init(input, NULL, FALSE);
+	hdr_ctx = message_parse_header_init(input, NULL, 0);
 	while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) {
 		if (hdr->eoh) {
 			ctx->have_eoh = TRUE;
@@ -570,7 +570,7 @@
 	memset(&ctx, 0, sizeof(ctx));
         mbox_md5_ctx = mbox_md5_init();
 
-	hdr_ctx = message_parse_header_init(mbox->mbox_stream, NULL, FALSE);
+	hdr_ctx = message_parse_header_init(mbox->mbox_stream, NULL, 0);
 	while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) {
 		if (hdr->eoh)
 			break;
--- a/src/plugins/fts/fts-storage.c	Wed Apr 04 11:57:55 2007 +0300
+++ b/src/plugins/fts/fts-storage.c	Wed Apr 04 12:27:30 2007 +0300
@@ -173,7 +173,9 @@
 		return -1;
 
 	prev_part = skip_part = NULL;
-	parser = message_parser_init(pool_datastack_create(), input);
+	parser = message_parser_init(pool_datastack_create(), input,
+				     MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE,
+				     0);
 	decoder = message_decoder_init_ucase();
 	for (;;) {
 		ret = message_parser_parse_next_block(parser, &raw_block);