changeset 8004:6418d4c0ff16 HEAD

fts: Added a new lookup API where the backend can look up all the fields using a single query. Implemented it to fts-solr.
author Timo Sirainen <tss@iki.fi>
date Sun, 13 Jul 2008 18:04:19 +0300
parents d6d7be735b40
children 67eb95aa7293
files src/plugins/fts-lucene/fts-backend-lucene.c src/plugins/fts-solr/fts-backend-solr.c src/plugins/fts-squat/fts-backend-squat.c src/plugins/fts/fts-api-private.h src/plugins/fts/fts-api.c src/plugins/fts/fts-api.h src/plugins/fts/fts-search.c src/plugins/fts/fts-storage.h
diffstat 8 files changed, 231 insertions(+), 93 deletions(-) [+]
line wrap: on
line diff
--- a/src/plugins/fts-lucene/fts-backend-lucene.c	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts-lucene/fts-backend-lucene.c	Sun Jul 13 18:04:19 2008 +0300
@@ -216,6 +216,7 @@
 		fts_backend_lucene_lock,
 		fts_backend_lucene_unlock,
 		fts_backend_lucene_lookup,
+		NULL,
 		NULL
 	}
 };
--- a/src/plugins/fts-solr/fts-backend-solr.c	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts-solr/fts-backend-solr.c	Sun Jul 13 18:04:19 2008 +0300
@@ -255,37 +255,50 @@
 {
 }
 
-static int
-fts_backend_solr_lookup(struct fts_backend *backend, const char *key,
-			enum fts_lookup_flags flags,
-			ARRAY_TYPE(seq_range) *definite_uids,
-			ARRAY_TYPE(seq_range) *maybe_uids)
+static int fts_backend_solr_lookup(struct fts_backend_lookup_context *ctx,
+				   ARRAY_TYPE(seq_range) *definite_uids,
+				   ARRAY_TYPE(seq_range) *maybe_uids)
 {
+	struct mailbox *box = ctx->backend->box;
+	const struct fts_backend_lookup_field *fields;
+	unsigned int i, count;
 	struct mailbox_status status;
 	string_t *str;
 
-	i_assert((flags & FTS_LOOKUP_FLAG_INVERT) == 0);
+	mailbox_get_status(box, STATUS_UIDVALIDITY, &status);
 
 	str = t_str_new(256);
-	str_append(str, "fl=uid&q=");
-	if ((flags & FTS_LOOKUP_FLAG_HEADER) == 0) {
-		/* body only */
-		i_assert((flags & FTS_LOOKUP_FLAG_BODY) != 0);
-		str_append(str, "body:");
-	} else if ((flags & FTS_LOOKUP_FLAG_BODY) == 0) {
-		/* header only */
-		str_append(str, "hdr:");
-	} else {
-		/* both */
-		str_append(str, "any:");
+	str_printfa(str, "fl=uid&rows=%u&q=", status.uidnext);
+
+	/* build a lucene search query from the fields */
+	fields = array_get(&ctx->fields, &count);
+	for (i = 0; i < count; i++) {
+		if (i > 0)
+			str_append(str, "%20");
+
+		if ((fields[i].flags & FTS_LOOKUP_FLAG_INVERT) != 0)
+			str_append_c(str, '-');
+
+		if ((fields[i].flags & FTS_LOOKUP_FLAG_HEADER) == 0) {
+			/* body only */
+			i_assert((fields[i].flags & FTS_LOOKUP_FLAG_BODY) != 0);
+			str_append(str, "body:");
+		} else if ((fields[i].flags & FTS_LOOKUP_FLAG_BODY) == 0) {
+			/* header only */
+			str_append(str, "hdr:");
+		} else {
+			/* both */
+			str_append(str, "any:");
+		}
+		solr_quote_str(str, fields[i].key);
 	}
-	solr_quote_str(str, key);
 
-	mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status);
-	str_printfa(str, "%%20uidv:%u%%20box:", status.uidvalidity);
-	solr_quote_str(str, backend->box->name);
+	/* use a separate filter query for selecting the mailbox. it shouldn't
+	   affect the score and there could be some caching benefits too. */
+	str_printfa(str, "&fq=uidv:%u%%20box:", status.uidvalidity);
+	solr_quote_str(str, box->name);
 	str_append(str, "%20user:");
-	solr_quote_str(str, backend->box->storage->user);
+	solr_quote_str(str, box->storage->user);
 
 	array_clear(maybe_uids);
 	return solr_connection_select(solr_conn, str_c(str), definite_uids);
@@ -306,7 +319,8 @@
 		fts_backend_solr_expunge_finish,
 		fts_backend_solr_lock,
 		fts_backend_solr_unlock,
-		fts_backend_solr_lookup,
-		NULL
+		NULL,
+		NULL,
+		fts_backend_solr_lookup
 	}
 };
--- a/src/plugins/fts-squat/fts-backend-squat.c	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts-squat/fts-backend-squat.c	Sun Jul 13 18:04:19 2008 +0300
@@ -252,6 +252,7 @@
 		fts_backend_squat_lock,
 		fts_backend_squat_unlock,
 		fts_backend_squat_lookup,
+		NULL,
 		NULL
 	}
 };
--- a/src/plugins/fts/fts-api-private.h	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts/fts-api-private.h	Sun Jul 13 18:04:19 2008 +0300
@@ -30,6 +30,10 @@
 		      enum fts_lookup_flags flags,
 		      ARRAY_TYPE(seq_range) *definite_uids,
 		      ARRAY_TYPE(seq_range) *maybe_uids);
+
+	int (*lookup2)(struct fts_backend_lookup_context *ctx,
+		       ARRAY_TYPE(seq_range) *definite_uids,
+		       ARRAY_TYPE(seq_range) *maybe_uids);
 };
 
 enum fts_backend_flags {
@@ -56,7 +60,24 @@
 	unsigned int failed:1;
 };
 
+struct fts_backend_lookup_field {
+	const char *key;
+	enum fts_lookup_flags flags;
+};
+
+struct fts_backend_lookup_context {
+	struct fts_backend *backend;
+	pool_t pool;
+
+	ARRAY_DEFINE(fields, struct fts_backend_lookup_field);
+};
+
 void fts_backend_register(const struct fts_backend *backend);
 void fts_backend_unregister(const char *name);
 
+void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
+		     const ARRAY_TYPE(seq_range) *definite_filter,
+		     ARRAY_TYPE(seq_range) *maybe_dest,
+		     const ARRAY_TYPE(seq_range) *maybe_filter);
+
 #endif
--- a/src/plugins/fts/fts-api.c	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts/fts-api.c	Sun Jul 13 18:04:19 2008 +0300
@@ -144,33 +144,6 @@
 	backend->v.unlock(backend);
 }
 
-static void fts_lookup_invert(ARRAY_TYPE(seq_range) *definite_uids,
-			      const ARRAY_TYPE(seq_range) *maybe_uids)
-{
-	/* we'll begin by inverting definite UIDs */
-	seq_range_array_invert(definite_uids, 1, (uint32_t)-1);
-
-	/* from that list remove UIDs in the maybe list.
-	   the maybe list itself isn't touched. */
-	(void)seq_range_array_remove_seq_range(definite_uids, maybe_uids);
-}
-
-int fts_backend_lookup(struct fts_backend *backend, const char *key,
-		       enum fts_lookup_flags flags,
-		       ARRAY_TYPE(seq_range) *definite_uids,
-		       ARRAY_TYPE(seq_range) *maybe_uids)
-{
-	int ret;
-
-	ret = backend->v.lookup(backend, key, flags & ~FTS_LOOKUP_FLAG_INVERT,
-				definite_uids, maybe_uids);
-	if (unlikely(ret < 0))
-		return -1;
-	if ((flags & FTS_LOOKUP_FLAG_INVERT) != 0)
-		fts_lookup_invert(definite_uids, maybe_uids);
-	return 0;
-}
-
 static void
 fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe,
 		  const ARRAY_TYPE(seq_range) *dest_definite,
@@ -206,10 +179,51 @@
 	}
 }
 
-int fts_backend_filter(struct fts_backend *backend, const char *key,
-		       enum fts_lookup_flags flags,
-		       ARRAY_TYPE(seq_range) *definite_uids,
-		       ARRAY_TYPE(seq_range) *maybe_uids)
+void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
+		     const ARRAY_TYPE(seq_range) *definite_filter,
+		     ARRAY_TYPE(seq_range) *maybe_dest,
+		     const ARRAY_TYPE(seq_range) *maybe_filter)
+{
+	T_BEGIN {
+		fts_merge_maybies(maybe_dest, definite_dest,
+				  maybe_filter, definite_filter);
+	} T_END;
+	/* keep only what exists in both lists. the rest is in
+	   maybies or not wanted */
+	seq_range_array_intersect(definite_dest, definite_filter);
+}
+
+static void fts_lookup_invert(ARRAY_TYPE(seq_range) *definite_uids,
+			      const ARRAY_TYPE(seq_range) *maybe_uids)
+{
+	/* we'll begin by inverting definite UIDs */
+	seq_range_array_invert(definite_uids, 1, (uint32_t)-1);
+
+	/* from that list remove UIDs in the maybe list.
+	   the maybe list itself isn't touched. */
+	(void)seq_range_array_remove_seq_range(definite_uids, maybe_uids);
+}
+
+static int fts_backend_lookup(struct fts_backend *backend, const char *key,
+			      enum fts_lookup_flags flags,
+			      ARRAY_TYPE(seq_range) *definite_uids,
+			      ARRAY_TYPE(seq_range) *maybe_uids)
+{
+	int ret;
+
+	ret = backend->v.lookup(backend, key, flags & ~FTS_LOOKUP_FLAG_INVERT,
+				definite_uids, maybe_uids);
+	if (unlikely(ret < 0))
+		return -1;
+	if ((flags & FTS_LOOKUP_FLAG_INVERT) != 0)
+		fts_lookup_invert(definite_uids, maybe_uids);
+	return 0;
+}
+
+static int fts_backend_filter(struct fts_backend *backend, const char *key,
+			      enum fts_lookup_flags flags,
+			      ARRAY_TYPE(seq_range) *definite_uids,
+			      ARRAY_TYPE(seq_range) *maybe_uids)
 {
 	ARRAY_TYPE(seq_range) tmp_definite, tmp_maybe;
 	int ret;
@@ -228,15 +242,72 @@
 		array_clear(definite_uids);
 		array_clear(maybe_uids);
 	} else {
-		T_BEGIN {
-			fts_merge_maybies(maybe_uids, definite_uids,
-					  &tmp_maybe, &tmp_definite);
-		} T_END;
-		/* keep only what exists in both lists. the rest is in
-		   maybies or not wanted */
-		seq_range_array_intersect(definite_uids, &tmp_definite);
+		fts_filter_uids(definite_uids, &tmp_definite,
+				maybe_uids, &tmp_maybe);
 	}
 	array_free(&tmp_maybe);
 	array_free(&tmp_definite);
 	return ret;
 }
+
+struct fts_backend_lookup_context *
+fts_backend_lookup_init(struct fts_backend *backend)
+{
+	struct fts_backend_lookup_context *ctx;
+	pool_t pool;
+
+	pool = pool_alloconly_create("fts backend lookup", 256);
+	ctx = p_new(pool, struct fts_backend_lookup_context, 1);
+	ctx->pool = pool;
+	ctx->backend = backend;
+	p_array_init(&ctx->fields, pool, 8);
+	return ctx;
+}
+
+void fts_backend_lookup_add(struct fts_backend_lookup_context *ctx,
+			    const char *key, enum fts_lookup_flags flags)
+{
+	struct fts_backend_lookup_field *field;
+
+	field = array_append_space(&ctx->fields);
+	field->key = p_strdup(ctx->pool, key);
+	field->flags = flags;
+}
+
+static int fts_backend_lookup_old(struct fts_backend_lookup_context *ctx,
+				  ARRAY_TYPE(seq_range) *definite_uids,
+				  ARRAY_TYPE(seq_range) *maybe_uids)
+{
+	const struct fts_backend_lookup_field *fields;
+	unsigned int i, count;
+
+	fields = array_get(&ctx->fields, &count);
+	i_assert(count > 0);
+
+	if (fts_backend_lookup(ctx->backend, fields[0].key, fields[0].flags,
+			       definite_uids, maybe_uids) < 0)
+		return -1;
+	for (i = 1; i < count; i++) {
+		if (fts_backend_filter(ctx->backend,
+				       fields[i].key, fields[i].flags,
+				       definite_uids, maybe_uids) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+int fts_backend_lookup_deinit(struct fts_backend_lookup_context **_ctx,
+			      ARRAY_TYPE(seq_range) *definite_uids,
+			      ARRAY_TYPE(seq_range) *maybe_uids)
+{
+	struct fts_backend_lookup_context *ctx = *_ctx;
+	int ret;
+
+	*_ctx = NULL;
+	if (ctx->backend->v.lookup2 != NULL)
+		ret = ctx->backend->v.lookup2(ctx, definite_uids, maybe_uids);
+	else
+		ret = fts_backend_lookup_old(ctx, definite_uids, maybe_uids);
+	pool_unref(&ctx->pool);
+	return ret;
+}
--- a/src/plugins/fts/fts-api.h	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts/fts-api.h	Sun Jul 13 18:04:19 2008 +0300
@@ -52,17 +52,15 @@
 int fts_backend_lock(struct fts_backend *backend);
 void fts_backend_unlock(struct fts_backend *backend);
 
-/* Lookup key from the index and return the found UIDs in result. */
-int fts_backend_lookup(struct fts_backend *backend, const char *key,
-		       enum fts_lookup_flags flags,
-		       ARRAY_TYPE(seq_range) *definite_uids,
-		       ARRAY_TYPE(seq_range) *maybe_uids);
-/* Drop UIDs from the result list for which the key doesn't exist. The idea
-   is that with multiple search keywords you first lookup one and then filter
-   the rest. */
-int fts_backend_filter(struct fts_backend *backend, const char *key,
-		       enum fts_lookup_flags flags,
-		       ARRAY_TYPE(seq_range) *definite_uids,
-		       ARRAY_TYPE(seq_range) *maybe_uids);
+/* Start building a FTS lookup. */
+struct fts_backend_lookup_context *
+fts_backend_lookup_init(struct fts_backend *backend);
+/* Add a new search key to the lookup. */
+void fts_backend_lookup_add(struct fts_backend_lookup_context *ctx,
+			    const char *key, enum fts_lookup_flags flags);
+/* Finish the lookup and return found UIDs. */
+int fts_backend_lookup_deinit(struct fts_backend_lookup_context **ctx,
+			      ARRAY_TYPE(seq_range) *definite_uids,
+			      ARRAY_TYPE(seq_range) *maybe_uids);
 
 #endif
--- a/src/plugins/fts/fts-search.c	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts/fts-search.c	Sun Jul 13 18:04:19 2008 +0300
@@ -42,9 +42,10 @@
 }
 
 static int fts_search_lookup_arg(struct fts_search_context *fctx,
-				 struct mail_search_arg *arg, bool filter)
+				 struct mail_search_arg *arg)
 {
 	struct fts_backend *backend;
+	struct fts_backend_lookup_context **lookup_ctx_p;
 	enum fts_lookup_flags flags = 0;
 	const char *key;
 	string_t *key_utf8;
@@ -78,7 +79,6 @@
 		break;
 	default:
 		/* can't filter this */
-		i_assert(filter);
 		return 0;
 	}
 	if (arg->not)
@@ -96,14 +96,16 @@
 		ret = 0;
 	} else if (!backend->locked && fts_backend_lock(backend) <= 0)
 		ret = -1;
-	else if (!filter) {
-		ret = fts_backend_lookup(backend, str_c(key_utf8), flags,
-					 &fctx->definite_seqs,
-					 &fctx->maybe_seqs);
-	} else {
-		ret = fts_backend_filter(backend, str_c(key_utf8), flags,
-					 &fctx->definite_seqs,
-					 &fctx->maybe_seqs);
+	else {
+		ret = 0;
+		if (backend == fctx->fbox->backend_substr)
+			lookup_ctx_p = &fctx->lookup_ctx_substr;
+		else
+			lookup_ctx_p = &fctx->lookup_ctx_fast;
+
+		if (*lookup_ctx_p == NULL)
+			*lookup_ctx_p = fts_backend_lookup_init(backend);
+		fts_backend_lookup_add(*lookup_ctx_p, str_c(key_utf8), flags);
 	}
 	return ret;
 }
@@ -111,6 +113,7 @@
 void fts_search_lookup(struct fts_search_context *fctx)
 {
 	struct mail_search_arg *arg;
+	bool have_seqs;
 	int ret;
 
 	if (fctx->best_arg == NULL)
@@ -119,25 +122,53 @@
 	i_array_init(&fctx->definite_seqs, 64);
 	i_array_init(&fctx->maybe_seqs, 64);
 
-	/* start filtering with the best arg */
+	/* start lookup with the best arg */
 	T_BEGIN {
-		ret = fts_search_lookup_arg(fctx, fctx->best_arg, FALSE);
+		ret = fts_search_lookup_arg(fctx, fctx->best_arg);
 	} T_END;
 	/* filter the rest */
 	for (arg = fctx->args->args; arg != NULL && ret == 0; arg = arg->next) {
 		if (arg != fctx->best_arg) {
 			T_BEGIN {
-				ret = fts_search_lookup_arg(fctx, arg, TRUE);
+				ret = fts_search_lookup_arg(fctx, arg);
 			} T_END;
 		}
 	}
 
-	if (fctx->fbox->backend_fast != NULL &&
-	    fctx->fbox->backend_fast->locked)
-		fts_backend_unlock(fctx->fbox->backend_fast);
-	if (fctx->fbox->backend_substr != NULL &&
-	    fctx->fbox->backend_substr->locked)
-		fts_backend_unlock(fctx->fbox->backend_substr);
+	have_seqs = FALSE;
+	if (fctx->fbox->backend_fast != NULL) {
+		if (fctx->lookup_ctx_fast != NULL) {
+			have_seqs = TRUE;
+			fts_backend_lookup_deinit(&fctx->lookup_ctx_fast,
+						  &fctx->definite_seqs,
+						  &fctx->maybe_seqs);
+		}
+		if (fctx->fbox->backend_fast->locked)
+			fts_backend_unlock(fctx->fbox->backend_fast);
+	}
+	if (fctx->fbox->backend_substr != NULL) {
+		if (fctx->lookup_ctx_substr == NULL) {
+			/* no substr lookups */
+		} else if (!have_seqs) {
+			fts_backend_lookup_deinit(&fctx->lookup_ctx_substr,
+						  &fctx->definite_seqs,
+						  &fctx->maybe_seqs);
+		} else {
+			/* have to merge the results */
+			ARRAY_TYPE(seq_range) tmp_def, tmp_maybe;
+
+			i_array_init(&tmp_def, 64);
+			i_array_init(&tmp_maybe, 64);
+			fts_backend_lookup_deinit(&fctx->lookup_ctx_substr,
+						  &tmp_def, &tmp_maybe);
+			fts_filter_uids(&fctx->definite_seqs, &tmp_def,
+					&fctx->maybe_seqs, &tmp_maybe);
+			array_free(&tmp_def);
+			array_free(&tmp_maybe);
+		}
+		if (fctx->fbox->backend_substr->locked)
+			fts_backend_unlock(fctx->fbox->backend_substr);
+	}
 
 	if (ret == 0) {
 		fctx->seqs_set = TRUE;
--- a/src/plugins/fts/fts-storage.h	Sun Jul 13 15:37:19 2008 +0300
+++ b/src/plugins/fts/fts-storage.h	Sun Jul 13 18:04:19 2008 +0300
@@ -18,6 +18,7 @@
 	struct mail_search_args *args;
 	struct mail_search_arg *best_arg;
 
+	struct fts_backend_lookup_context *lookup_ctx_substr, *lookup_ctx_fast;
 	ARRAY_TYPE(seq_range) definite_seqs, maybe_seqs;
 	unsigned int definite_idx, maybe_idx;