changeset 5504:9414946e1eb0 HEAD

Message body search API changed to init/search/deinit. Searching now builds the init structure only once instead of for every message.
author Timo Sirainen <tss@iki.fi>
date Tue, 03 Apr 2007 17:51:26 +0300
parents b7573b83f999
children d0caa9a44d1c
files src/lib-mail/message-body-search.c src/lib-mail/message-body-search.h src/lib-storage/index/index-search.c
diffstat 3 files changed, 142 insertions(+), 118 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-mail/message-body-search.c	Tue Apr 03 16:55:08 2007 +0300
+++ b/src/lib-mail/message-body-search.c	Tue Apr 03 17:51:26 2007 +0300
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002 Timo Sirainen */
+/* Copyright (C) 2002-2007 Timo Sirainen */
 
 #include "lib.h"
 #include "base64.h"
@@ -14,19 +14,20 @@
 
 #define DECODE_BLOCK_SIZE 8192
 
-struct body_search_context {
+struct message_body_search_context {
 	pool_t pool;
 
-	const char *key;
-	size_t key_len;
+	char *key;
+	char *charset;
+	unsigned int key_len;
 
-	const char *charset;
+	struct header_search_context *hdr_search_ctx;
 	unsigned int unknown_charset:1;
 	unsigned int search_header:1;
 };
 
 struct part_search_context {
-	struct body_search_context *body_ctx;
+	struct message_body_search_context *body_ctx;
 
 	struct charset_translation *translation;
 
@@ -103,18 +104,13 @@
 				  struct istream *input,
 				  const struct message_part *part)
 {
-	struct header_search_context *hdr_search_ctx;
+	struct header_search_context *hdr_search_ctx =
+		ctx->body_ctx->hdr_search_ctx;
 	struct message_header_parser_ctx *hdr_ctx;
 	struct message_header_line *hdr;
 	int ret;
 	bool found = FALSE;
 
-	hdr_search_ctx = message_header_search_init(pool_datastack_create(),
-						    ctx->body_ctx->key,
-						    "UTF-8", NULL);
-	/* Our key is in UTF-8. It can't be invalid. */
-	i_assert(hdr_search_ctx != NULL);
-
 	/* we default to text content-type */
 	ctx->content_type_text = TRUE;
 
@@ -122,6 +118,8 @@
 				      part->header_size.physical_size);
 	i_stream_seek(input, 0);
 
+	message_header_search_reset(hdr_search_ctx);
+
 	hdr_ctx = message_parse_header_init(input, NULL, TRUE);
 	while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) {
 		if (hdr->eoh)
@@ -170,7 +168,8 @@
 					 buffer_t *block)
 {
 	const unsigned char *p, *end, *key;
-	size_t key_len, block_size, *matches, match_count, value;
+	unsigned int key_len;
+	size_t block_size, *matches, match_count, value;
 	ssize_t i;
 
 	key = (const unsigned char *) ctx->body_ctx->key;
@@ -360,41 +359,54 @@
 	return found;
 }
 
-static bool
-message_body_search_init(struct body_search_context *ctx,
-			 const char *key, const char *charset,
-			 bool *unknown_charset_r, bool search_header)
+int message_body_search_init(pool_t pool, const char *key, const char *charset,
+			     bool search_header,
+			     struct message_body_search_context **ctx_r)
 {
+	struct message_body_search_context *ctx;
+	bool unknown_charset;
 	size_t key_len;
 
-	memset(ctx, 0, sizeof(struct body_search_context));
-
 	/* get the key uppercased */
-	key = charset_to_ucase_utf8_string(charset, unknown_charset_r,
-					   (const unsigned char *) key,
+	t_push();
+	key = charset_to_ucase_utf8_string(charset, &unknown_charset,
+					   (const unsigned char *)key,
 					   strlen(key), &key_len);
-	if (key == NULL)
-		return FALSE;
+	if (key == NULL) {
+		t_pop();
+		return unknown_charset ? 0 : -1;
+	}
 
-	ctx->key = key;
+	ctx = *ctx_r = p_new(pool, struct message_body_search_context, 1);
+	ctx->pool = pool;
+	ctx->key = p_strdup(pool, key);
 	ctx->key_len = key_len;
-	ctx->charset = charset;
+	ctx->charset = p_strdup(pool, charset);
 	ctx->unknown_charset = charset == NULL;
-	ctx->search_header = search_header;
+	ctx->hdr_search_ctx = !search_header ? NULL :
+		message_header_search_init(pool, ctx->key, "UTF-8", NULL);
 
-	i_assert(ctx->key_len <= SSIZE_T_MAX/sizeof(size_t));
-
-	return TRUE;
+	t_pop();
+	return 1;
 }
 
-static int message_body_search_ctx(struct body_search_context *ctx,
-				   struct istream *input,
-				   const struct message_part *part)
+void message_body_search_deinit(struct message_body_search_context **_ctx)
+{
+	struct message_body_search_context *ctx = *_ctx;
+
+	*_ctx = NULL;
+	message_header_search_free(&ctx->hdr_search_ctx);
+	p_free(ctx->pool, ctx->key);
+	p_free(ctx->pool, ctx->charset);
+	p_free(ctx->pool, ctx);
+}
+
+int message_body_search(struct message_body_search_context *ctx,
+			struct istream *input, const struct message_part *part)
 {
 	struct part_search_context part_ctx;
-	int ret;
+	int ret = 0;
 
-	ret = 0;
 	while (part != NULL && ret == 0) {
 		i_assert(input->v_offset <= part->physical_pos);
 
@@ -403,7 +415,7 @@
 		memset(&part_ctx, 0, sizeof(part_ctx));
 		part_ctx.body_ctx = ctx;
 		part_ctx.ignore_header =
-			part->parent == NULL && !ctx->search_header;
+			part->parent == NULL && ctx->hdr_search_ctx == NULL;
 
 		t_push();
 
@@ -412,7 +424,7 @@
 			ret = 1;
 		} else if (part->children != NULL) {
 			/* multipart/xxx or message/rfc822 */
-			if (message_body_search_ctx(ctx, input, part->children))
+			if (message_body_search(ctx, input, part->children))
 				ret = 1;
 		} else {
 			if (input->v_offset != part->physical_pos +
@@ -433,25 +445,3 @@
 
 	return ret;
 }
-
-int message_body_search(const char *key, const char *charset,
-			struct istream *input,
-			const struct message_part *part, bool search_header,
-                        enum message_body_search_error *error_r)
-{
-        struct body_search_context ctx;
-	int ret;
-	bool unknown_charset;
-
-	if (!message_body_search_init(&ctx, key, charset, &unknown_charset,
-				      search_header)) {
-		*error_r = unknown_charset ?
-			MESSAGE_BODY_SEARCH_ERROR_UNKNOWN_CHARSET :
-                        MESSAGE_BODY_SEARCH_ERROR_INVALID_KEY;
-		return -1;
-	}
-
-	if ((ret = message_body_search_ctx(&ctx, input, part)) < 0)
-		*error_r = MESSAGE_BODY_SEARCH_ERROR_MESSAGE_PART_BROKEN;
-	return ret;
-}
--- a/src/lib-mail/message-body-search.h	Tue Apr 03 16:55:08 2007 +0300
+++ b/src/lib-mail/message-body-search.h	Tue Apr 03 17:51:26 2007 +0300
@@ -2,22 +2,19 @@
 #define __MESSAGE_BODY_SEARCH_H
 
 struct message_part;
+struct message_body_search_context;
 
-enum message_body_search_error {
-	/* Don't know the given charset. */
-	MESSAGE_BODY_SEARCH_ERROR_UNKNOWN_CHARSET,
-	/* Key contains invalid characters in given charset. */
-	MESSAGE_BODY_SEARCH_ERROR_INVALID_KEY,
-	/* Message_part doesn't match the reality in input stream. */
-	MESSAGE_BODY_SEARCH_ERROR_MESSAGE_PART_BROKEN
-};
+/* Returns 1 if ok, 0 if unknown charset, -1 if key contains invalid characters
+   in given charset. If charset is NULL, the key isn't assumed to be in any
+   specific charset but is compared to message data without any translation. */
+int message_body_search_init(pool_t pool, const char *key, const char *charset,
+			     bool search_header,
+			     struct message_body_search_context **ctx_r);
+void message_body_search_deinit(struct message_body_search_context **ctx);
 
-/* Returns 1 if key is found from input buffer, 0 if not and -1 if error.
-   If charset is NULL, the key isn't assumed to be in any specific charset but
-   is compared to message data without any translation. */
-int message_body_search(const char *key, const char *charset,
-			struct istream *input,
-			const struct message_part *part, bool search_header,
-                        enum message_body_search_error *error_r);
+/* Returns 1 if key is found from input buffer, 0 if not and -1 if message_part
+   is invalid. */
+int message_body_search(struct message_body_search_context *ctx,
+			struct istream *input, const struct message_part *part);
 
 #endif
--- a/src/lib-storage/index/index-search.c	Tue Apr 03 16:55:08 2007 +0300
+++ b/src/lib-storage/index/index-search.c	Tue Apr 03 17:51:26 2007 +0300
@@ -34,7 +34,7 @@
 	struct mail *mail;
 	struct index_mail *imail;
 
-	pool_t hdr_pool;
+	pool_t search_pool;
 	const char *error;
 
 	struct timeval search_start_time, last_notify;
@@ -61,6 +61,11 @@
 	const struct message_part *part;
 };
 
+struct search_arg_context {
+	struct header_search_context *hdr_search_ctx;
+	struct message_body_search_context *body_search_ctx;
+};
+
 static int search_parse_msgset_args(struct index_mailbox *ibox,
 				    const struct mail_index_header *hdr,
 				    struct mail_search_arg *args,
@@ -310,31 +315,72 @@
 	}
 }
 
+static struct search_arg_context *
+search_arg_context(struct index_search_context *ctx,
+		   struct mail_search_arg *arg)
+{
+	struct search_arg_context *arg_ctx = arg->context;
+
+	if (arg_ctx != NULL)
+		return arg_ctx;
+
+	if (ctx->search_pool == NULL)
+		ctx->search_pool = pool_alloconly_create("search pool", 8192);
+
+	arg_ctx = p_new(ctx->search_pool, struct search_arg_context, 1);
+	arg->context = arg_ctx;
+	return arg_ctx;
+}
+
 static struct header_search_context *
 search_header_context(struct index_search_context *ctx,
 		      struct mail_search_arg *arg)
 {
+	struct search_arg_context *arg_ctx;
 	bool unknown_charset;
 
-	if (arg->context != NULL) {
-                message_header_search_reset(arg->context);
-		return arg->context;
+	arg_ctx = search_arg_context(ctx, arg);
+	if (arg_ctx->hdr_search_ctx != NULL) {
+                message_header_search_reset(arg_ctx->hdr_search_ctx);
+		return arg_ctx->hdr_search_ctx;
 	}
 
-	if (ctx->hdr_pool == NULL) {
-		ctx->hdr_pool =
-			pool_alloconly_create("message_header_search", 8192);
-	}
-
-	arg->context = message_header_search_init(ctx->hdr_pool, arg->value.str,
-						  ctx->mail_ctx.charset,
-						  &unknown_charset);
-	if (arg->context == NULL) {
+	arg_ctx->hdr_search_ctx =
+		message_header_search_init(ctx->search_pool, arg->value.str,
+					   ctx->mail_ctx.charset,
+					   &unknown_charset);
+	if (arg_ctx->hdr_search_ctx == NULL) {
 		ctx->error = unknown_charset ?
 			TXT_UNKNOWN_CHARSET : TXT_INVALID_SEARCH_KEY;
 	}
 
-	return arg->context;
+	return arg_ctx->hdr_search_ctx;
+}
+
+static struct message_body_search_context *
+search_body_context(struct index_search_context *ctx,
+		    struct mail_search_arg *arg)
+{
+	struct search_arg_context *arg_ctx;
+	int ret;
+
+	arg_ctx = search_arg_context(ctx, arg);
+	if (arg_ctx->body_search_ctx != NULL)
+		return arg_ctx->body_search_ctx;
+
+	ret = message_body_search_init(ctx->search_pool, arg->value.str,
+				       ctx->mail_ctx.charset,
+				       arg->type == SEARCH_TEXT ||
+				       arg->type == SEARCH_TEXT_FAST,
+				       &arg_ctx->body_search_ctx);
+	if (ret > 0)
+		return arg_ctx->body_search_ctx;
+
+	if (ret == 0)
+		ctx->error = TXT_UNKNOWN_CHARSET;
+	else
+		ctx->error = TXT_INVALID_SEARCH_KEY;
+	return NULL;
 }
 
 static void search_header_arg(struct mail_search_arg *arg,
@@ -465,9 +511,8 @@
 static void search_body(struct mail_search_arg *arg,
 			struct search_body_context *ctx)
 {
-        enum message_body_search_error error;
+	struct message_body_search_context *body_search_ctx;
 	int ret;
-	bool retry = FALSE;
 
 	if (ctx->index_ctx->error != NULL)
 		return;
@@ -482,36 +527,28 @@
 		return;
 	}
 
-__retry:
+	body_search_ctx = search_body_context(ctx->index_ctx, arg);
+	if (body_search_ctx == NULL) {
+		ARG_SET_RESULT(arg, 0);
+		return;
+	}
+
 	i_stream_seek(ctx->input, 0);
-	ret = message_body_search(arg->value.str,
-				  ctx->index_ctx->mail_ctx.charset,
-				  ctx->input, ctx->part,
-				  arg->type == SEARCH_TEXT, &error);
-
+	ret = message_body_search(body_search_ctx, ctx->input, ctx->part);
 	if (ret < 0) {
-		switch (error) {
-		case MESSAGE_BODY_SEARCH_ERROR_UNKNOWN_CHARSET:
-			ctx->index_ctx->error = TXT_UNKNOWN_CHARSET;
-			break;
-		case MESSAGE_BODY_SEARCH_ERROR_INVALID_KEY:
-			ctx->index_ctx->error = TXT_INVALID_SEARCH_KEY;
-			break;
-		case MESSAGE_BODY_SEARCH_ERROR_MESSAGE_PART_BROKEN:
-			if (retry)
-				i_panic("Couldn't fix broken body structure");
+		mail_cache_set_corrupted(ctx->index_ctx->ibox->cache,
+			"Broken message structure for mail UID %u",
+			ctx->index_ctx->mail->uid);
 
-			mail_cache_set_corrupted(ctx->index_ctx->ibox->cache,
-				"Broken message structure for mail UID %u",
-				ctx->index_ctx->mail->uid);
+		/* get the body parts, and try again */
+		ctx->index_ctx->imail->data.parts = NULL;
+		ctx->part = mail_get_parts(ctx->index_ctx->mail);
 
-			/* get the body parts, and try again */
-			ctx->index_ctx->imail->data.parts = NULL;
-			ctx->part = mail_get_parts(ctx->index_ctx->mail);
-
-			retry = TRUE;
-			goto __retry;
-		}
+		i_stream_seek(ctx->input, 0);
+		ret = message_body_search(body_search_ctx,
+					  ctx->input, ctx->part);
+		if (ret < 0)
+			i_panic("Couldn't fix broken body structure");
 	}
 
 	ARG_SET_RESULT(arg, ret > 0);
@@ -919,8 +956,8 @@
 				       "%s", ctx->error);
 	}
 
-	if (ctx->hdr_pool != NULL)
-		pool_unref(ctx->hdr_pool);
+	if (ctx->search_pool != NULL)
+		pool_unref(ctx->search_pool);
 
 	if (ctx->mail_ctx.sort_program != NULL)
 		index_sort_program_deinit(&ctx->mail_ctx.sort_program);