# HG changeset patch # User Timo Sirainen # Date 1215961459 -10800 # Node ID 6418d4c0ff16b31ba706514b1262bae8f854fceb # Parent d6d7be735b409b9e30ba841e8bdb8fa599e8dcfa fts: Added a new lookup API where the backend can look up all the fields using a single query. Implemented it to fts-solr. diff -r d6d7be735b40 -r 6418d4c0ff16 src/plugins/fts-lucene/fts-backend-lucene.c --- a/src/plugins/fts-lucene/fts-backend-lucene.c Sun Jul 13 15:37:19 2008 +0300 +++ b/src/plugins/fts-lucene/fts-backend-lucene.c Sun Jul 13 18:04:19 2008 +0300 @@ -216,6 +216,7 @@ fts_backend_lucene_lock, fts_backend_lucene_unlock, fts_backend_lucene_lookup, + NULL, NULL } }; diff -r d6d7be735b40 -r 6418d4c0ff16 src/plugins/fts-solr/fts-backend-solr.c --- a/src/plugins/fts-solr/fts-backend-solr.c Sun Jul 13 15:37:19 2008 +0300 +++ b/src/plugins/fts-solr/fts-backend-solr.c Sun Jul 13 18:04:19 2008 +0300 @@ -255,37 +255,50 @@ { } -static int -fts_backend_solr_lookup(struct fts_backend *backend, const char *key, - enum fts_lookup_flags flags, - ARRAY_TYPE(seq_range) *definite_uids, - ARRAY_TYPE(seq_range) *maybe_uids) +static int fts_backend_solr_lookup(struct fts_backend_lookup_context *ctx, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids) { + struct mailbox *box = ctx->backend->box; + const struct fts_backend_lookup_field *fields; + unsigned int i, count; struct mailbox_status status; string_t *str; - i_assert((flags & FTS_LOOKUP_FLAG_INVERT) == 0); + mailbox_get_status(box, STATUS_UIDVALIDITY, &status); str = t_str_new(256); - str_append(str, "fl=uid&q="); - if ((flags & FTS_LOOKUP_FLAG_HEADER) == 0) { - /* body only */ - i_assert((flags & FTS_LOOKUP_FLAG_BODY) != 0); - str_append(str, "body:"); - } else if ((flags & FTS_LOOKUP_FLAG_BODY) == 0) { - /* header only */ - str_append(str, "hdr:"); - } else { - /* both */ - str_append(str, "any:"); + str_printfa(str, "fl=uid&rows=%u&q=", status.uidnext); + + /* build a lucene search query from the fields */ + fields = array_get(&ctx->fields, &count); + for (i = 0; i < count; i++) { + if (i > 0) + str_append(str, "%20"); + + if ((fields[i].flags & FTS_LOOKUP_FLAG_INVERT) != 0) + str_append_c(str, '-'); + + if ((fields[i].flags & FTS_LOOKUP_FLAG_HEADER) == 0) { + /* body only */ + i_assert((fields[i].flags & FTS_LOOKUP_FLAG_BODY) != 0); + str_append(str, "body:"); + } else if ((fields[i].flags & FTS_LOOKUP_FLAG_BODY) == 0) { + /* header only */ + str_append(str, "hdr:"); + } else { + /* both */ + str_append(str, "any:"); + } + solr_quote_str(str, fields[i].key); } - solr_quote_str(str, key); - mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status); - str_printfa(str, "%%20uidv:%u%%20box:", status.uidvalidity); - solr_quote_str(str, backend->box->name); + /* use a separate filter query for selecting the mailbox. it shouldn't + affect the score and there could be some caching benefits too. */ + str_printfa(str, "&fq=uidv:%u%%20box:", status.uidvalidity); + solr_quote_str(str, box->name); str_append(str, "%20user:"); - solr_quote_str(str, backend->box->storage->user); + solr_quote_str(str, box->storage->user); array_clear(maybe_uids); return solr_connection_select(solr_conn, str_c(str), definite_uids); @@ -306,7 +319,8 @@ fts_backend_solr_expunge_finish, fts_backend_solr_lock, fts_backend_solr_unlock, - fts_backend_solr_lookup, - NULL + NULL, + NULL, + fts_backend_solr_lookup } }; diff -r d6d7be735b40 -r 6418d4c0ff16 src/plugins/fts-squat/fts-backend-squat.c --- a/src/plugins/fts-squat/fts-backend-squat.c Sun Jul 13 15:37:19 2008 +0300 +++ b/src/plugins/fts-squat/fts-backend-squat.c Sun Jul 13 18:04:19 2008 +0300 @@ -252,6 +252,7 @@ fts_backend_squat_lock, fts_backend_squat_unlock, fts_backend_squat_lookup, + NULL, NULL } }; diff -r d6d7be735b40 -r 6418d4c0ff16 src/plugins/fts/fts-api-private.h --- a/src/plugins/fts/fts-api-private.h Sun Jul 13 15:37:19 2008 +0300 +++ b/src/plugins/fts/fts-api-private.h Sun Jul 13 18:04:19 2008 +0300 @@ -30,6 +30,10 @@ enum fts_lookup_flags flags, ARRAY_TYPE(seq_range) *definite_uids, ARRAY_TYPE(seq_range) *maybe_uids); + + int (*lookup2)(struct fts_backend_lookup_context *ctx, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids); }; enum fts_backend_flags { @@ -56,7 +60,24 @@ unsigned int failed:1; }; +struct fts_backend_lookup_field { + const char *key; + enum fts_lookup_flags flags; +}; + +struct fts_backend_lookup_context { + struct fts_backend *backend; + pool_t pool; + + ARRAY_DEFINE(fields, struct fts_backend_lookup_field); +}; + void fts_backend_register(const struct fts_backend *backend); void fts_backend_unregister(const char *name); +void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest, + const ARRAY_TYPE(seq_range) *definite_filter, + ARRAY_TYPE(seq_range) *maybe_dest, + const ARRAY_TYPE(seq_range) *maybe_filter); + #endif diff -r d6d7be735b40 -r 6418d4c0ff16 src/plugins/fts/fts-api.c --- a/src/plugins/fts/fts-api.c Sun Jul 13 15:37:19 2008 +0300 +++ b/src/plugins/fts/fts-api.c Sun Jul 13 18:04:19 2008 +0300 @@ -144,33 +144,6 @@ backend->v.unlock(backend); } -static void fts_lookup_invert(ARRAY_TYPE(seq_range) *definite_uids, - const ARRAY_TYPE(seq_range) *maybe_uids) -{ - /* we'll begin by inverting definite UIDs */ - seq_range_array_invert(definite_uids, 1, (uint32_t)-1); - - /* from that list remove UIDs in the maybe list. - the maybe list itself isn't touched. */ - (void)seq_range_array_remove_seq_range(definite_uids, maybe_uids); -} - -int fts_backend_lookup(struct fts_backend *backend, const char *key, - enum fts_lookup_flags flags, - ARRAY_TYPE(seq_range) *definite_uids, - ARRAY_TYPE(seq_range) *maybe_uids) -{ - int ret; - - ret = backend->v.lookup(backend, key, flags & ~FTS_LOOKUP_FLAG_INVERT, - definite_uids, maybe_uids); - if (unlikely(ret < 0)) - return -1; - if ((flags & FTS_LOOKUP_FLAG_INVERT) != 0) - fts_lookup_invert(definite_uids, maybe_uids); - return 0; -} - static void fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe, const ARRAY_TYPE(seq_range) *dest_definite, @@ -206,10 +179,51 @@ } } -int fts_backend_filter(struct fts_backend *backend, const char *key, - enum fts_lookup_flags flags, - ARRAY_TYPE(seq_range) *definite_uids, - ARRAY_TYPE(seq_range) *maybe_uids) +void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest, + const ARRAY_TYPE(seq_range) *definite_filter, + ARRAY_TYPE(seq_range) *maybe_dest, + const ARRAY_TYPE(seq_range) *maybe_filter) +{ + T_BEGIN { + fts_merge_maybies(maybe_dest, definite_dest, + maybe_filter, definite_filter); + } T_END; + /* keep only what exists in both lists. the rest is in + maybies or not wanted */ + seq_range_array_intersect(definite_dest, definite_filter); +} + +static void fts_lookup_invert(ARRAY_TYPE(seq_range) *definite_uids, + const ARRAY_TYPE(seq_range) *maybe_uids) +{ + /* we'll begin by inverting definite UIDs */ + seq_range_array_invert(definite_uids, 1, (uint32_t)-1); + + /* from that list remove UIDs in the maybe list. + the maybe list itself isn't touched. */ + (void)seq_range_array_remove_seq_range(definite_uids, maybe_uids); +} + +static int fts_backend_lookup(struct fts_backend *backend, const char *key, + enum fts_lookup_flags flags, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids) +{ + int ret; + + ret = backend->v.lookup(backend, key, flags & ~FTS_LOOKUP_FLAG_INVERT, + definite_uids, maybe_uids); + if (unlikely(ret < 0)) + return -1; + if ((flags & FTS_LOOKUP_FLAG_INVERT) != 0) + fts_lookup_invert(definite_uids, maybe_uids); + return 0; +} + +static int fts_backend_filter(struct fts_backend *backend, const char *key, + enum fts_lookup_flags flags, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids) { ARRAY_TYPE(seq_range) tmp_definite, tmp_maybe; int ret; @@ -228,15 +242,72 @@ array_clear(definite_uids); array_clear(maybe_uids); } else { - T_BEGIN { - fts_merge_maybies(maybe_uids, definite_uids, - &tmp_maybe, &tmp_definite); - } T_END; - /* keep only what exists in both lists. the rest is in - maybies or not wanted */ - seq_range_array_intersect(definite_uids, &tmp_definite); + fts_filter_uids(definite_uids, &tmp_definite, + maybe_uids, &tmp_maybe); } array_free(&tmp_maybe); array_free(&tmp_definite); return ret; } + +struct fts_backend_lookup_context * +fts_backend_lookup_init(struct fts_backend *backend) +{ + struct fts_backend_lookup_context *ctx; + pool_t pool; + + pool = pool_alloconly_create("fts backend lookup", 256); + ctx = p_new(pool, struct fts_backend_lookup_context, 1); + ctx->pool = pool; + ctx->backend = backend; + p_array_init(&ctx->fields, pool, 8); + return ctx; +} + +void fts_backend_lookup_add(struct fts_backend_lookup_context *ctx, + const char *key, enum fts_lookup_flags flags) +{ + struct fts_backend_lookup_field *field; + + field = array_append_space(&ctx->fields); + field->key = p_strdup(ctx->pool, key); + field->flags = flags; +} + +static int fts_backend_lookup_old(struct fts_backend_lookup_context *ctx, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids) +{ + const struct fts_backend_lookup_field *fields; + unsigned int i, count; + + fields = array_get(&ctx->fields, &count); + i_assert(count > 0); + + if (fts_backend_lookup(ctx->backend, fields[0].key, fields[0].flags, + definite_uids, maybe_uids) < 0) + return -1; + for (i = 1; i < count; i++) { + if (fts_backend_filter(ctx->backend, + fields[i].key, fields[i].flags, + definite_uids, maybe_uids) < 0) + return -1; + } + return 0; +} + +int fts_backend_lookup_deinit(struct fts_backend_lookup_context **_ctx, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids) +{ + struct fts_backend_lookup_context *ctx = *_ctx; + int ret; + + *_ctx = NULL; + if (ctx->backend->v.lookup2 != NULL) + ret = ctx->backend->v.lookup2(ctx, definite_uids, maybe_uids); + else + ret = fts_backend_lookup_old(ctx, definite_uids, maybe_uids); + pool_unref(&ctx->pool); + return ret; +} diff -r d6d7be735b40 -r 6418d4c0ff16 src/plugins/fts/fts-api.h --- a/src/plugins/fts/fts-api.h Sun Jul 13 15:37:19 2008 +0300 +++ b/src/plugins/fts/fts-api.h Sun Jul 13 18:04:19 2008 +0300 @@ -52,17 +52,15 @@ int fts_backend_lock(struct fts_backend *backend); void fts_backend_unlock(struct fts_backend *backend); -/* Lookup key from the index and return the found UIDs in result. */ -int fts_backend_lookup(struct fts_backend *backend, const char *key, - enum fts_lookup_flags flags, - ARRAY_TYPE(seq_range) *definite_uids, - ARRAY_TYPE(seq_range) *maybe_uids); -/* Drop UIDs from the result list for which the key doesn't exist. The idea - is that with multiple search keywords you first lookup one and then filter - the rest. */ -int fts_backend_filter(struct fts_backend *backend, const char *key, - enum fts_lookup_flags flags, - ARRAY_TYPE(seq_range) *definite_uids, - ARRAY_TYPE(seq_range) *maybe_uids); +/* Start building a FTS lookup. */ +struct fts_backend_lookup_context * +fts_backend_lookup_init(struct fts_backend *backend); +/* Add a new search key to the lookup. */ +void fts_backend_lookup_add(struct fts_backend_lookup_context *ctx, + const char *key, enum fts_lookup_flags flags); +/* Finish the lookup and return found UIDs. */ +int fts_backend_lookup_deinit(struct fts_backend_lookup_context **ctx, + ARRAY_TYPE(seq_range) *definite_uids, + ARRAY_TYPE(seq_range) *maybe_uids); #endif diff -r d6d7be735b40 -r 6418d4c0ff16 src/plugins/fts/fts-search.c --- a/src/plugins/fts/fts-search.c Sun Jul 13 15:37:19 2008 +0300 +++ b/src/plugins/fts/fts-search.c Sun Jul 13 18:04:19 2008 +0300 @@ -42,9 +42,10 @@ } static int fts_search_lookup_arg(struct fts_search_context *fctx, - struct mail_search_arg *arg, bool filter) + struct mail_search_arg *arg) { struct fts_backend *backend; + struct fts_backend_lookup_context **lookup_ctx_p; enum fts_lookup_flags flags = 0; const char *key; string_t *key_utf8; @@ -78,7 +79,6 @@ break; default: /* can't filter this */ - i_assert(filter); return 0; } if (arg->not) @@ -96,14 +96,16 @@ ret = 0; } else if (!backend->locked && fts_backend_lock(backend) <= 0) ret = -1; - else if (!filter) { - ret = fts_backend_lookup(backend, str_c(key_utf8), flags, - &fctx->definite_seqs, - &fctx->maybe_seqs); - } else { - ret = fts_backend_filter(backend, str_c(key_utf8), flags, - &fctx->definite_seqs, - &fctx->maybe_seqs); + else { + ret = 0; + if (backend == fctx->fbox->backend_substr) + lookup_ctx_p = &fctx->lookup_ctx_substr; + else + lookup_ctx_p = &fctx->lookup_ctx_fast; + + if (*lookup_ctx_p == NULL) + *lookup_ctx_p = fts_backend_lookup_init(backend); + fts_backend_lookup_add(*lookup_ctx_p, str_c(key_utf8), flags); } return ret; } @@ -111,6 +113,7 @@ void fts_search_lookup(struct fts_search_context *fctx) { struct mail_search_arg *arg; + bool have_seqs; int ret; if (fctx->best_arg == NULL) @@ -119,25 +122,53 @@ i_array_init(&fctx->definite_seqs, 64); i_array_init(&fctx->maybe_seqs, 64); - /* start filtering with the best arg */ + /* start lookup with the best arg */ T_BEGIN { - ret = fts_search_lookup_arg(fctx, fctx->best_arg, FALSE); + ret = fts_search_lookup_arg(fctx, fctx->best_arg); } T_END; /* filter the rest */ for (arg = fctx->args->args; arg != NULL && ret == 0; arg = arg->next) { if (arg != fctx->best_arg) { T_BEGIN { - ret = fts_search_lookup_arg(fctx, arg, TRUE); + ret = fts_search_lookup_arg(fctx, arg); } T_END; } } - if (fctx->fbox->backend_fast != NULL && - fctx->fbox->backend_fast->locked) - fts_backend_unlock(fctx->fbox->backend_fast); - if (fctx->fbox->backend_substr != NULL && - fctx->fbox->backend_substr->locked) - fts_backend_unlock(fctx->fbox->backend_substr); + have_seqs = FALSE; + if (fctx->fbox->backend_fast != NULL) { + if (fctx->lookup_ctx_fast != NULL) { + have_seqs = TRUE; + fts_backend_lookup_deinit(&fctx->lookup_ctx_fast, + &fctx->definite_seqs, + &fctx->maybe_seqs); + } + if (fctx->fbox->backend_fast->locked) + fts_backend_unlock(fctx->fbox->backend_fast); + } + if (fctx->fbox->backend_substr != NULL) { + if (fctx->lookup_ctx_substr == NULL) { + /* no substr lookups */ + } else if (!have_seqs) { + fts_backend_lookup_deinit(&fctx->lookup_ctx_substr, + &fctx->definite_seqs, + &fctx->maybe_seqs); + } else { + /* have to merge the results */ + ARRAY_TYPE(seq_range) tmp_def, tmp_maybe; + + i_array_init(&tmp_def, 64); + i_array_init(&tmp_maybe, 64); + fts_backend_lookup_deinit(&fctx->lookup_ctx_substr, + &tmp_def, &tmp_maybe); + fts_filter_uids(&fctx->definite_seqs, &tmp_def, + &fctx->maybe_seqs, &tmp_maybe); + array_free(&tmp_def); + array_free(&tmp_maybe); + } + if (fctx->fbox->backend_substr->locked) + fts_backend_unlock(fctx->fbox->backend_substr); + } if (ret == 0) { fctx->seqs_set = TRUE; diff -r d6d7be735b40 -r 6418d4c0ff16 src/plugins/fts/fts-storage.h --- a/src/plugins/fts/fts-storage.h Sun Jul 13 15:37:19 2008 +0300 +++ b/src/plugins/fts/fts-storage.h Sun Jul 13 18:04:19 2008 +0300 @@ -18,6 +18,7 @@ struct mail_search_args *args; struct mail_search_arg *best_arg; + struct fts_backend_lookup_context *lookup_ctx_substr, *lookup_ctx_fast; ARRAY_TYPE(seq_range) definite_seqs, maybe_seqs; unsigned int definite_idx, maybe_idx;