changeset 8484:db78eab2ba5d HEAD

Initial support for getting full text search working nicely with virtual mailboxes.
author Timo Sirainen <tss@iki.fi>
date Sun, 23 Nov 2008 03:59:55 +0200
parents b12705704329
children 739d868c5fe9
files src/lib-storage/index/cydir/cydir-storage.c src/lib-storage/index/dbox/dbox-storage.c src/lib-storage/index/maildir/maildir-storage.c src/lib-storage/index/mbox/mbox-storage.c src/lib-storage/index/raw/raw-storage.c src/lib-storage/mail-storage-private.h src/lib-storage/mail-storage.c src/lib-storage/mail-storage.h src/plugins/fts-solr/fts-backend-solr.c src/plugins/fts-solr/solr-connection.c src/plugins/fts-solr/solr-connection.h src/plugins/fts/fts-storage.c src/plugins/fts/fts-storage.h src/plugins/virtual/virtual-storage.c src/plugins/virtual/virtual-storage.h
diffstat 15 files changed, 205 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-storage/index/cydir/cydir-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/index/cydir/cydir-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -446,6 +446,7 @@
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
--- a/src/lib-storage/index/dbox/dbox-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/index/dbox/dbox-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -717,6 +717,7 @@
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		NULL,
 		dbox_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
--- a/src/lib-storage/index/maildir/maildir-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/index/maildir/maildir-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -1105,6 +1105,7 @@
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
--- a/src/lib-storage/index/mbox/mbox-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/index/mbox/mbox-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -1027,6 +1027,7 @@
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
--- a/src/lib-storage/index/raw/raw-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/index/raw/raw-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -296,6 +296,7 @@
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		NULL,
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
--- a/src/lib-storage/mail-storage-private.h	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/mail-storage-private.h	Sun Nov 23 03:59:55 2008 +0200
@@ -138,6 +138,10 @@
 	bool (*get_expunged_uids)(struct mailbox *box, uint64_t modseq,
 				  const ARRAY_TYPE(seq_range) *uids,
 				  ARRAY_TYPE(seq_range) *expunged_uids);
+	bool (*get_virtual_uid)(struct mailbox *box,
+				const char *backend_mailbox,
+				uint32_t backend_uidvalidity,
+				uint32_t backend_uid, uint32_t *uid_r);
 
 	struct mail *
 		(*mail_alloc)(struct mailbox_transaction_context *t,
--- a/src/lib-storage/mail-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/mail-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -630,6 +630,16 @@
 	return box->v.get_expunged_uids(box, modseq, uids, expunged_uids);
 }
 
+bool mailbox_get_virtual_uid(struct mailbox *box, const char *backend_mailbox,
+			     uint32_t backend_uidvalidity,
+			     uint32_t backend_uid, uint32_t *uid_r)
+{
+	if (box->v.get_virtual_uid == NULL)
+		return FALSE;
+	return box->v.get_virtual_uid(box, backend_mailbox, backend_uidvalidity,
+				      backend_uid, uid_r);
+}
+
 struct mailbox_header_lookup_ctx *
 mailbox_header_lookup_init(struct mailbox *box, const char *const headers[])
 {
--- a/src/lib-storage/mail-storage.h	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/lib-storage/mail-storage.h	Sun Nov 23 03:59:55 2008 +0200
@@ -416,6 +416,11 @@
 bool mailbox_get_expunged_uids(struct mailbox *box, uint64_t modseq,
 			       const ARRAY_TYPE(seq_range) *uids,
 			       ARRAY_TYPE(seq_range) *expunged_uids);
+/* If box is a virtual mailbox, look up UID for the given backend message.
+   Returns TRUE if found, FALSE if not. */
+bool mailbox_get_virtual_uid(struct mailbox *box, const char *backend_mailbox,
+			     uint32_t backend_uidvalidity,
+			     uint32_t backend_uid, uint32_t *uid_r);
 
 /* Initialize header lookup for given headers. */
 struct mailbox_header_lookup_ctx *
--- a/src/plugins/fts-solr/fts-backend-solr.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/plugins/fts-solr/fts-backend-solr.c	Sun Nov 23 03:59:55 2008 +0200
@@ -11,6 +11,7 @@
 #include <curl/curl.h>
 
 #define SOLR_CMDBUF_SIZE (1024*64)
+#define SOLR_MAX_ROWS 100000
 
 struct solr_fts_backend_build_context {
 	struct fts_backend_build_context ctx;
@@ -97,7 +98,8 @@
 	solr_quote_str(str, backend->box->storage->ns->user->username);
 
 	t_array_init(&uids, 1);
-	if (solr_connection_select(solr_conn, str_c(str), &uids, NULL) < 0)
+	if (solr_connection_select(solr_conn, str_c(str),
+				   NULL, NULL, &uids, NULL) < 0)
 		return -1;
 
 	uidvals = array_get(&uids, &count);
@@ -256,6 +258,15 @@
 {
 }
 
+static bool solr_virtual_uid_map(const char *mailbox, uint32_t uidvalidity,
+				 uint32_t *uid, void *context)
+{
+	struct mailbox *box = context;
+
+	return mailbox_get_virtual_uid(box, mailbox, uidvalidity,
+				       *uid, uid);
+}
+
 static int fts_backend_solr_lookup(struct fts_backend_lookup_context *ctx,
 				   ARRAY_TYPE(seq_range) *definite_uids,
 				   ARRAY_TYPE(seq_range) *maybe_uids,
@@ -266,12 +277,20 @@
 	unsigned int i, count;
 	struct mailbox_status status;
 	string_t *str;
+	bool virtual;
 
+	virtual = strcmp(box->storage->name, "virtual") == 0;
 	mailbox_get_status(box, STATUS_UIDVALIDITY, &status);
 
 	str = t_str_new(256);
-	str_printfa(str, "fl=uid,score&rows=%u&sort=uid%%20asc&q=",
-		    status.uidnext);
+	if (!virtual) {
+		str_printfa(str, "fl=uid,score&rows=%u&sort=uid%%20asc&q=",
+			    status.uidnext);
+	} else {
+		str_printfa(str, "fl=uid,score,box,uidv&rows=%u"
+			    "&sort=box%%20asc,uid%%20asc&q=",
+			    SOLR_MAX_ROWS);
+	}
 
 	/* build a lucene search query from the fields */
 	fields = array_get(&ctx->fields, &count);
@@ -298,14 +317,24 @@
 
 	/* use a separate filter query for selecting the mailbox. it shouldn't
 	   affect the score and there could be some caching benefits too. */
-	str_printfa(str, "&fq=uidv:%u%%20box:", status.uidvalidity);
-	solr_quote_str(str, box->name);
-	str_append(str, "%20user:");
+	str_append(str, "&fq=user:");
 	solr_quote_str(str, box->storage->ns->user->username);
 
+	/* FIXME: limit what mailboxes to search with virtual storage */
+	if (!virtual) {
+		str_printfa(str, "%%20uidv:%u%%20box:", status.uidvalidity);
+		solr_quote_str(str, box->name);
+	}
+
 	array_clear(maybe_uids);
-	return solr_connection_select(solr_conn, str_c(str),
-				      definite_uids, scores);
+	if (!virtual) {
+		return solr_connection_select(solr_conn, str_c(str), NULL, NULL,
+					      definite_uids, scores);
+	} else {
+		return solr_connection_select(solr_conn, str_c(str),
+					      solr_virtual_uid_map, box,
+					      definite_uids, scores);
+	}
 }
 
 struct fts_backend fts_backend_solr = {
--- a/src/plugins/fts-solr/solr-connection.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/plugins/fts-solr/solr-connection.c	Sun Nov 23 03:59:55 2008 +0200
@@ -22,7 +22,9 @@
 enum solr_xml_content_state {
 	SOLR_XML_CONTENT_STATE_NONE = 0,
 	SOLR_XML_CONTENT_STATE_UID,
-	SOLR_XML_CONTENT_STATE_SCORE
+	SOLR_XML_CONTENT_STATE_SCORE,
+	SOLR_XML_CONTENT_STATE_MAILBOX,
+	SOLR_XML_CONTENT_STATE_UIDVALIDITY
 };
 
 struct solr_lookup_xml_context {
@@ -30,8 +32,12 @@
 	enum solr_xml_content_state content_state;
 	int depth;
 
-	uint32_t uid;
+	uint32_t uid, uidvalidity;
 	float score;
+	char *mailbox;
+
+	solr_uid_map_callback_t *callback;
+	void *context;
 
 	ARRAY_TYPE(seq_range) *uids;
 	ARRAY_TYPE(fts_score_map) *scores;
@@ -234,6 +240,8 @@
 			ctx->state++;
 			ctx->uid = 0;
 			ctx->score = 0;
+			i_free_and_null(ctx->mailbox);
+			ctx->uidvalidity = 0;
 		}
 		break;
 	case SOLR_XML_RESPONSE_STATE_DOC:
@@ -242,6 +250,10 @@
 			ctx->content_state = SOLR_XML_CONTENT_STATE_UID;
 		else if (strcmp(name_attr, "score") == 0)
 			ctx->content_state = SOLR_XML_CONTENT_STATE_SCORE;
+		else if (strcmp(name_attr, "box") == 0)
+			ctx->content_state = SOLR_XML_CONTENT_STATE_MAILBOX;
+		else if (strcmp(name_attr, "uidv") == 0)
+			ctx->content_state = SOLR_XML_CONTENT_STATE_UIDVALIDITY;
 		else 
 			break;
 		ctx->state++;
@@ -256,10 +268,20 @@
 	struct fts_score_map *score;
 
 	if (ctx->uid == 0) {
-		i_error("fts_solr: missing uid");
+		i_error("fts_solr: Query didn't return uid");
 		return;
 	}
 
+	if (ctx->callback != NULL) {
+		if (ctx->mailbox == NULL) {
+			i_error("fts_solr: Query didn't return mailbox");
+			return;
+		}
+		if (!ctx->callback(ctx->mailbox, ctx->uidvalidity,
+				   &ctx->uid, ctx->context))
+			return;
+	}
+
 	seq_range_array_add(ctx->uids, 0, ctx->uid);
 	if (ctx->scores != NULL && ctx->score != 0) {
 		score = array_append_space(ctx->scores);
@@ -283,36 +305,52 @@
 	ctx->depth--;
 }
 
+static int uint32_parse(const char *str, int len, uint32_t *value_r)
+{
+	uint32_t value = 0;
+	int i;
+
+	for (i = 0; i < len; i++) {
+		if (str[i] < '0' || str[i] > '9')
+			break;
+		value = value*10 + str[i]-'0';
+	}
+	if (i != len)
+		return -1;
+
+	*value_r = value;
+	return 0;
+}
+
 static void solr_lookup_xml_data(void *context, const char *str, int len)
 {
 	struct solr_lookup_xml_context *ctx = context;
-	uint32_t uid;
-	int i;
 
 	switch (ctx->content_state) {
 	case SOLR_XML_CONTENT_STATE_NONE:
 		break;
 	case SOLR_XML_CONTENT_STATE_UID:
-		for (i = 0, uid = 0; i < len; i++) {
-			if (str[i] < '0' || str[i] > '9')
-				break;
-			uid = uid*10 + str[i]-'0';
-		}
-		if (i != len) {
+		if (uint32_parse(str, len, &ctx->uid) < 0)
 			i_error("fts_solr: received invalid uid");
-			break;
-		}
-		ctx->uid = uid;
 		break;
 	case SOLR_XML_CONTENT_STATE_SCORE:
 		T_BEGIN {
 			ctx->score = strtod(t_strndup(str, len), NULL);
 		} T_END;
 		break;
+	case SOLR_XML_CONTENT_STATE_MAILBOX:
+		i_free(ctx->mailbox);
+		ctx->mailbox = i_strndup(str, len);
+		break;
+	case SOLR_XML_CONTENT_STATE_UIDVALIDITY:
+		if (uint32_parse(str, len, &ctx->uidvalidity) < 0)
+			i_error("fts_solr: received invalid uidvalidity");
+		break;
 	}
 }
 
 int solr_connection_select(struct solr_connection *conn, const char *query,
+			   solr_uid_map_callback_t *callback, void *context,
 			   ARRAY_TYPE(seq_range) *uids,
 			   ARRAY_TYPE(fts_score_map) *scores)
 {
@@ -326,6 +364,8 @@
 	memset(&solr_lookup_context, 0, sizeof(solr_lookup_context));
 	solr_lookup_context.uids = uids;
 	solr_lookup_context.scores = scores;
+	solr_lookup_context.callback = callback;
+	solr_lookup_context.context = context;
 
 	i_free_and_null(conn->http_failure);
 	conn->xml_failed = FALSE;
--- a/src/plugins/fts-solr/solr-connection.h	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/plugins/fts-solr/solr-connection.h	Sun Nov 23 03:59:55 2008 +0200
@@ -4,6 +4,10 @@
 #include "seq-range-array.h"
 #include "fts-api.h"
 
+/* Returns TRUE if UID conversion was done, FALSE if uid should be skipped. */
+typedef bool solr_uid_map_callback_t(const char *mailbox, uint32_t uidvalidity,
+				     uint32_t *uid, void *context);
+
 struct solr_connection *solr_connection_init(const char *url, bool debug);
 void solr_connection_deinit(struct solr_connection *conn);
 
@@ -11,6 +15,7 @@
 			       const char *str);
 
 int solr_connection_select(struct solr_connection *conn, const char *query,
+			   solr_uid_map_callback_t *callback, void *context,
 			   ARRAY_TYPE(seq_range) *uids,
 			   ARRAY_TYPE(fts_score_map) *scores);
 int solr_connection_post(struct solr_connection *conn, const char *cmd);
--- a/src/plugins/fts/fts-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/plugins/fts/fts-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -179,6 +179,11 @@
 	struct fts_backend_build_context *build;
 	uint32_t last_uid, last_uid_locked, seq1, seq2;
 
+	if (fctx->fbox->virtual) {
+		/* FIXME: update all mailboxes */
+		return 0;
+	}
+
 	if (fts_backend_get_last_uid(backend, &last_uid) < 0)
 		return -1;
 
@@ -504,6 +509,29 @@
 	return ret;
 }
 
+static bool
+fts_mailbox_search_next_update_seq_virtual(struct mail_search_context *ctx)
+{
+	struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box);
+	struct fts_search_context *fctx = FTS_CONTEXT(ctx);
+
+	while (fbox->module_ctx.super.search_next_update_seq(ctx)) {
+		if (!fctx->seqs_set)
+			return TRUE;
+
+		/* virtual mailbox searches don't return sequences sorted.
+		   just check if the suggested sequence exists. */
+		if (seq_range_exists(&fctx->definite_seqs, ctx->seq)) {
+			fts_mailbox_search_args_definite_set(fctx);
+			return TRUE;
+		}
+		if (seq_range_exists(&fctx->maybe_seqs, ctx->seq))
+			return TRUE;
+		mail_search_args_reset(ctx->args->args, FALSE);
+	}
+	return FALSE;
+}
+
 static int fts_mailbox_search_deinit(struct mail_search_context *ctx)
 {
 	struct fts_transaction_context *ft = FTS_CONTEXT(ctx->transaction);
@@ -729,12 +757,15 @@
 	struct fts_mailbox *fbox;
 
 	fbox = i_new(struct fts_mailbox, 1);
+	fbox->virtual = strcmp(box->storage->name, "virtual") == 0;
 	fbox->env = env;
 	fbox->module_ctx.super = box->v;
 	box->v.close = fts_mailbox_close;
 	box->v.search_init = fts_mailbox_search_init;
 	box->v.search_next_nonblock = fts_mailbox_search_next_nonblock;
-	box->v.search_next_update_seq = fts_mailbox_search_next_update_seq;
+	box->v.search_next_update_seq = fbox->virtual ?
+		fts_mailbox_search_next_update_seq_virtual :
+		fts_mailbox_search_next_update_seq;
 	box->v.search_deinit = fts_mailbox_search_deinit;
 	box->v.mail_alloc = fts_mail_alloc;
 	box->v.transaction_begin = fts_transaction_begin;
--- a/src/plugins/fts/fts-storage.h	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/plugins/fts/fts-storage.h	Sun Nov 23 03:59:55 2008 +0200
@@ -7,6 +7,7 @@
 	struct fts_backend *backend_fast;
 
 	const char *env;
+	unsigned int virtual:1;
 	unsigned int backend_set:1;
 };
 
--- a/src/plugins/virtual/virtual-storage.c	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/plugins/virtual/virtual-storage.c	Sun Nov 23 03:59:55 2008 +0200
@@ -277,6 +277,7 @@
 
 	mbox->storage = storage;
 	mbox->path = p_strdup(pool, path);
+	mbox->vseq_lookup_prev_mailbox = i_strdup("");
 
 	mbox->virtual_ext_id =
 		mail_index_ext_register(index, "virtual", 0,
@@ -353,6 +354,7 @@
 		array_free(&bboxes[i]->uids);
 	}
 	array_free(&mbox->backend_boxes);
+	i_free(mbox->vseq_lookup_prev_mailbox);
 
 	return index_storage_mailbox_close(box) < 0 ? -1 : ret;
 }
@@ -529,6 +531,52 @@
 	return ret;
 }
 
+static int virtual_backend_uidmap_cmp(const void *key, const void *data)
+{
+	const uint32_t *uid = key;
+	const struct virtual_backend_uidmap *map = data;
+
+	return *uid < map->real_uid ? -1 :
+		*uid > map->real_uid ? 1 : 0;
+}
+
+static bool
+virtual_get_virtual_uid(struct mailbox *box, const char *backend_mailbox,
+			uint32_t backend_uidvalidity,
+			uint32_t backend_uid, uint32_t *uid_r)
+{
+	struct virtual_mailbox *mbox = (struct virtual_mailbox *)box;
+	struct virtual_backend_box *bbox;
+	struct mailbox_status status;
+	const struct virtual_backend_uidmap *uids;
+	unsigned int count;
+
+	if (strcmp(mbox->vseq_lookup_prev_mailbox, backend_mailbox) == 0)
+		bbox = mbox->vseq_lookup_prev_bbox;
+	else {
+		i_free(mbox->vseq_lookup_prev_mailbox);
+		mbox->vseq_lookup_prev_mailbox = i_strdup(backend_mailbox);
+
+		bbox = virtual_backend_box_lookup_name(mbox, backend_mailbox);
+		mbox->vseq_lookup_prev_bbox = bbox;
+	}
+	if (bbox == NULL)
+		return FALSE;
+
+	mailbox_get_status(bbox->box, STATUS_UIDVALIDITY, &status);
+	if (status.uidvalidity != backend_uidvalidity)
+		return FALSE;
+
+	uids = array_get(&bbox->uids, &count);
+	uids = bsearch(&backend_uid, uids, count, sizeof(*uids),
+		       virtual_backend_uidmap_cmp);
+	if (uids == NULL)
+		return FALSE;
+
+	*uid_r = uids->virtual_uid;
+	return TRUE;
+}
+
 static void virtual_class_init(void)
 {
 	virtual_transaction_class_init();
@@ -582,6 +630,7 @@
 		index_storage_get_seq_range,
 		index_storage_get_uid_range,
 		index_storage_get_expunged_uids,
+		virtual_get_virtual_uid,
 		virtual_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_ref,
--- a/src/plugins/virtual/virtual-storage.h	Sun Nov 23 02:40:09 2008 +0200
+++ b/src/plugins/virtual/virtual-storage.h	Sun Nov 23 03:59:55 2008 +0200
@@ -103,6 +103,9 @@
 	uint32_t highest_mailbox_id;
 	uint32_t search_args_crc32;
 
+	char *vseq_lookup_prev_mailbox;
+	struct virtual_backend_box *vseq_lookup_prev_bbox;
+
 	/* Mailboxes this virtual mailbox consists of, sorted by mailbox_id */
 	ARRAY_TYPE(virtual_backend_box) backend_boxes;