Mercurial > dovecot > original-hg > dovecot-1.2
changeset 8006:0a3b5ddb0869 HEAD
Added support for sorting by X-SCORE. Currently it's only set by fts-solr.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sun, 13 Jul 2008 19:09:55 +0300 |
parents | 67eb95aa7293 |
children | b3dd6db685a4 |
files | src/imap/cmd-sort.c src/lib-storage/index/index-mail.c src/lib-storage/index/index-sort.c src/lib-storage/mail-storage.h src/plugins/fts-solr/fts-backend-solr.c src/plugins/fts-solr/solr-connection.c src/plugins/fts-solr/solr-connection.h src/plugins/fts/fts-api-private.h src/plugins/fts/fts-api.c src/plugins/fts/fts-api.h src/plugins/fts/fts-search.c src/plugins/fts/fts-storage.c src/plugins/fts/fts-storage.h |
diffstat | 13 files changed, 227 insertions(+), 34 deletions(-) [+] |
line wrap: on
line diff
--- a/src/imap/cmd-sort.c Sun Jul 13 19:08:13 2008 +0300 +++ b/src/imap/cmd-sort.c Sun Jul 13 19:09:55 2008 +0300 @@ -12,15 +12,16 @@ }; static struct sort_name sort_names[] = { - { MAIL_SORT_ARRIVAL, "arrival" }, - { MAIL_SORT_CC, "cc" }, - { MAIL_SORT_DATE, "date" }, - { MAIL_SORT_FROM, "from" }, - { MAIL_SORT_SIZE, "size" }, - { MAIL_SORT_SUBJECT, "subject" }, - { MAIL_SORT_TO, "to" }, + { MAIL_SORT_ARRIVAL, "arrival" }, + { MAIL_SORT_CC, "cc" }, + { MAIL_SORT_DATE, "date" }, + { MAIL_SORT_FROM, "from" }, + { MAIL_SORT_SIZE, "size" }, + { MAIL_SORT_SUBJECT, "subject" }, + { MAIL_SORT_TO, "to" }, + { MAIL_SORT_SEARCH_SCORE, "x-score" }, - { MAIL_SORT_END, NULL } + { MAIL_SORT_END, NULL } }; static int
--- a/src/lib-storage/index/index-mail.c Sun Jul 13 19:08:13 2008 +0300 +++ b/src/lib-storage/index/index-mail.c Sun Jul 13 19:09:55 2008 +0300 @@ -1007,6 +1007,7 @@ case MAIL_FETCH_FROM_ENVELOPE: case MAIL_FETCH_UIDL_FILE_NAME: case MAIL_FETCH_UIDL_BACKEND: + case MAIL_FETCH_SEARCH_SCORE: *value_r = ""; return 0; case MAIL_FETCH_HEADER_MD5:
--- a/src/lib-storage/index/index-sort.c Sun Jul 13 19:08:13 2008 +0300 +++ b/src/lib-storage/index/index-sort.c Sun Jul 13 19:09:55 2008 +0300 @@ -23,6 +23,12 @@ }; ARRAY_DEFINE_TYPE(mail_sort_node_size, struct mail_sort_node_size); +struct mail_sort_node_float { + uint32_t seq; + float num; +}; +ARRAY_DEFINE_TYPE(mail_sort_node_float, struct mail_sort_node_float); + struct sort_cmp_context { struct mail_search_sort_program *program; struct mail *mail; @@ -74,6 +80,28 @@ node->size = 0; } +static float index_sort_get_score(struct mail *mail) +{ + const char *str; + + if (mail_get_special(mail, MAIL_FETCH_SEARCH_SCORE, &str) < 0) + return 0; + else + return strtod(str, NULL); +} + +static void +index_sort_list_add_score(struct mail_search_sort_program *program, + struct mail *mail) +{ + ARRAY_TYPE(mail_sort_node_float) *nodes = program->context; + struct mail_sort_node_float *node; + + node = array_append_space(nodes); + node->seq = mail->seq; + node->num = index_sort_get_score(mail); +} + void index_sort_list_add(struct mail_search_sort_program *program, struct mail *mail) { @@ -142,6 +170,36 @@ program->context = NULL; } +static int sort_node_float_cmp(const void *p1, const void *p2) +{ + struct sort_cmp_context *ctx = &static_node_cmp_context; + const struct mail_sort_node_float *n1 = p1, *n2 = p2; + + if (n1->num < n2->num) + return !ctx->reverse ? -1 : 1; + if (n1->num > n2->num) + return !ctx->reverse ? 1 : -1; + + return index_sort_node_cmp_type(ctx->mail, + ctx->program->sort_program + 1, + n1->seq, n2->seq); +} + +static void +index_sort_list_finish_float(struct mail_search_sort_program *program) +{ + ARRAY_TYPE(mail_sort_node_float) *nodes = program->context; + struct mail_sort_node_float *float_nodes; + unsigned int count; + + float_nodes = array_get_modifiable(nodes, &count); + qsort(float_nodes, count, sizeof(struct mail_sort_node_float), + sort_node_float_cmp); + memcpy(&program->seqs, nodes, sizeof(program->seqs)); + i_free(nodes); + program->context = NULL; +} + void index_sort_list_finish(struct mail_search_sort_program *program) { memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context)); @@ -224,6 +282,16 @@ program->sort_list_finish = index_sort_list_finish_string; index_sort_list_init_string(program); break; + case MAIL_SORT_SEARCH_SCORE: { + ARRAY_TYPE(mail_sort_node_float) *nodes; + + nodes = i_malloc(sizeof(*nodes)); + i_array_init(nodes, 128); + program->sort_list_add = index_sort_list_add_score; + program->sort_list_finish = index_sort_list_finish_float; + program->context = nodes; + break; + } default: i_unreached(); } @@ -303,6 +371,7 @@ enum mail_sort_type sort_type; time_t time1, time2; uoff_t size1, size2; + float float1, float2; int ret = 0; sort_type = *sort_program & MAIL_SORT_MASK; @@ -366,6 +435,15 @@ ret = size1 < size2 ? -1 : (size1 > size2 ? 1 : 0); break; + case MAIL_SORT_SEARCH_SCORE: + mail_set_seq(mail, seq1); + float1 = index_sort_get_score(mail); + mail_set_seq(mail, seq2); + float2 = index_sort_get_score(mail); + + ret = float1 < float2 ? -1 : + (float1 > float2 ? 1 : 0); + break; case MAIL_SORT_END: return seq1 < seq2 ? -1 : (seq1 > seq2 ? 1 : 0);
--- a/src/lib-storage/mail-storage.h Sun Jul 13 19:08:13 2008 +0300 +++ b/src/lib-storage/mail-storage.h Sun Jul 13 19:09:55 2008 +0300 @@ -86,7 +86,7 @@ enum mail_sort_type { /* Maximum size for sort program (each one separately + END) */ -#define MAX_SORT_PROGRAM_SIZE (7 + 1) +#define MAX_SORT_PROGRAM_SIZE (8 + 1) MAIL_SORT_ARRIVAL = 0x0001, MAIL_SORT_CC = 0x0002, @@ -95,6 +95,7 @@ MAIL_SORT_SIZE = 0x0010, MAIL_SORT_SUBJECT = 0x0020, MAIL_SORT_TO = 0x0040, + MAIL_SORT_SEARCH_SCORE = 0x0080, MAIL_SORT_MASK = 0x0fff, MAIL_SORT_FLAG_REVERSE = 0x1000, /* reverse this mask type */ @@ -126,7 +127,8 @@ MAIL_FETCH_HEADER_MD5 = 0x00010000, MAIL_FETCH_UIDL_FILE_NAME = 0x00020000, MAIL_FETCH_UIDL_BACKEND = 0x00040000, - MAIL_FETCH_MAILBOX_NAME = 0x00080000 + MAIL_FETCH_MAILBOX_NAME = 0x00080000, + MAIL_FETCH_SEARCH_SCORE = 0x00100000 }; enum mailbox_transaction_flags {
--- a/src/plugins/fts-solr/fts-backend-solr.c Sun Jul 13 19:08:13 2008 +0300 +++ b/src/plugins/fts-solr/fts-backend-solr.c Sun Jul 13 19:09:55 2008 +0300 @@ -96,7 +96,7 @@ solr_quote_str(str, backend->box->storage->user); t_array_init(&uids, 1); - if (solr_connection_select(solr_conn, str_c(str), &uids) < 0) + if (solr_connection_select(solr_conn, str_c(str), &uids, NULL) < 0) return -1; uidvals = array_get(&uids, &count); @@ -257,7 +257,8 @@ static int fts_backend_solr_lookup(struct fts_backend_lookup_context *ctx, ARRAY_TYPE(seq_range) *definite_uids, - ARRAY_TYPE(seq_range) *maybe_uids) + ARRAY_TYPE(seq_range) *maybe_uids, + ARRAY_TYPE(fts_score_map) *scores) { struct mailbox *box = ctx->backend->box; const struct fts_backend_lookup_field *fields; @@ -268,7 +269,8 @@ mailbox_get_status(box, STATUS_UIDVALIDITY, &status); str = t_str_new(256); - str_printfa(str, "fl=uid&rows=%u&q=", status.uidnext); + str_printfa(str, "fl=uid,score&rows=%u&sort=uid%%20asc&q=", + status.uidnext); /* build a lucene search query from the fields */ fields = array_get(&ctx->fields, &count); @@ -301,7 +303,8 @@ solr_quote_str(str, box->storage->user); array_clear(maybe_uids); - return solr_connection_select(solr_conn, str_c(str), definite_uids); + return solr_connection_select(solr_conn, str_c(str), + definite_uids, scores); } struct fts_backend fts_backend_solr = {
--- a/src/plugins/fts-solr/solr-connection.c Sun Jul 13 19:08:13 2008 +0300 +++ b/src/plugins/fts-solr/solr-connection.c Sun Jul 13 19:09:55 2008 +0300 @@ -3,6 +3,7 @@ /* curl: 7.16.0 curl_multi_timeout */ #include "lib.h" +#include "array.h" #include "str.h" #include "strescape.h" #include "solr-connection.h" @@ -29,7 +30,11 @@ enum solr_xml_content_state content_state; int depth; + uint32_t uid; + float score; + ARRAY_TYPE(seq_range) *uids; + ARRAY_TYPE(fts_score_map) *scores; }; struct solr_connection_post { @@ -225,8 +230,11 @@ ctx->state++; break; case SOLR_XML_RESPONSE_STATE_RESULT: - if (strcmp(name, "doc") == 0) + if (strcmp(name, "doc") == 0) { ctx->state++; + ctx->uid = 0; + ctx->score = 0; + } break; case SOLR_XML_RESPONSE_STATE_DOC: name_attr = attrs_get_name(attrs); @@ -243,6 +251,23 @@ } } +static void solr_lookup_add_doc(struct solr_lookup_xml_context *ctx) +{ + struct fts_score_map *score; + + if (ctx->uid == 0) { + i_error("fts_solr: missing uid"); + return; + } + + seq_range_array_add(ctx->uids, 0, ctx->uid); + if (ctx->scores != NULL && ctx->score != 0) { + score = array_append_space(ctx->scores); + score->uid = ctx->uid; + score->score = ctx->score; + } +} + static void solr_lookup_xml_end(void *context, const char *name ATTR_UNUSED) { struct solr_lookup_xml_context *ctx = context; @@ -250,6 +275,8 @@ i_assert(ctx->depth >= (int)ctx->state); if (ctx->depth == (int)ctx->state) { + if (ctx->state == SOLR_XML_RESPONSE_STATE_DOC) + solr_lookup_add_doc(ctx); ctx->state--; ctx->content_state = SOLR_XML_CONTENT_STATE_NONE; } @@ -275,16 +302,19 @@ i_error("fts_solr: received invalid uid"); break; } - seq_range_array_add(ctx->uids, 0, uid); + ctx->uid = uid; break; case SOLR_XML_CONTENT_STATE_SCORE: - /* FIXME */ + T_BEGIN { + ctx->score = strtod(t_strndup(str, len), NULL); + } T_END; break; } } int solr_connection_select(struct solr_connection *conn, const char *query, - ARRAY_TYPE(seq_range) *uids) + ARRAY_TYPE(seq_range) *uids, + ARRAY_TYPE(fts_score_map) *scores) { struct solr_lookup_xml_context solr_lookup_context; string_t *str; @@ -295,6 +325,7 @@ memset(&solr_lookup_context, 0, sizeof(solr_lookup_context)); solr_lookup_context.uids = uids; + solr_lookup_context.scores = scores; i_free_and_null(conn->http_failure); conn->xml_failed = FALSE;
--- a/src/plugins/fts-solr/solr-connection.h Sun Jul 13 19:08:13 2008 +0300 +++ b/src/plugins/fts-solr/solr-connection.h Sun Jul 13 19:09:55 2008 +0300 @@ -2,6 +2,7 @@ #define SOLR_CONNECTION_H #include "seq-range-array.h" +#include "fts-api.h" struct solr_connection *solr_connection_init(const char *url, bool debug); void solr_connection_deinit(struct solr_connection *conn); @@ -10,7 +11,8 @@ const char *str); int solr_connection_select(struct solr_connection *conn, const char *query, - ARRAY_TYPE(seq_range) *uids); + ARRAY_TYPE(seq_range) *uids, + ARRAY_TYPE(fts_score_map) *scores); int solr_connection_post(struct solr_connection *conn, const char *cmd); struct solr_connection_post *
--- a/src/plugins/fts/fts-api-private.h Sun Jul 13 19:08:13 2008 +0300 +++ b/src/plugins/fts/fts-api-private.h Sun Jul 13 19:09:55 2008 +0300 @@ -33,7 +33,8 @@ int (*lookup2)(struct fts_backend_lookup_context *ctx, ARRAY_TYPE(seq_range) *definite_uids, - ARRAY_TYPE(seq_range) *maybe_uids); + ARRAY_TYPE(seq_range) *maybe_uids, + ARRAY_TYPE(fts_score_map) *scores); }; enum fts_backend_flags {
--- a/src/plugins/fts/fts-api.c Sun Jul 13 19:08:13 2008 +0300 +++ b/src/plugins/fts/fts-api.c Sun Jul 13 19:09:55 2008 +0300 @@ -298,16 +298,20 @@ int fts_backend_lookup_deinit(struct fts_backend_lookup_context **_ctx, ARRAY_TYPE(seq_range) *definite_uids, - ARRAY_TYPE(seq_range) *maybe_uids) + ARRAY_TYPE(seq_range) *maybe_uids, + ARRAY_TYPE(fts_score_map) *scores) { struct fts_backend_lookup_context *ctx = *_ctx; int ret; *_ctx = NULL; - if (ctx->backend->v.lookup2 != NULL) - ret = ctx->backend->v.lookup2(ctx, definite_uids, maybe_uids); - else + if (ctx->backend->v.lookup2 != NULL) { + ret = ctx->backend->v.lookup2(ctx, definite_uids, maybe_uids, + scores); + } else { + array_clear(scores); ret = fts_backend_lookup_old(ctx, definite_uids, maybe_uids); + } pool_unref(&ctx->pool); return ret; }
--- a/src/plugins/fts/fts-api.h Sun Jul 13 19:08:13 2008 +0300 +++ b/src/plugins/fts/fts-api.h Sun Jul 13 19:09:55 2008 +0300 @@ -13,6 +13,12 @@ FTS_LOOKUP_FLAG_INVERT = 0x04 }; +struct fts_score_map { + uint32_t uid; + float score; +}; +ARRAY_DEFINE_TYPE(fts_score_map, struct fts_score_map); + struct fts_backend * fts_backend_init(const char *backend_name, struct mailbox *box); void fts_backend_deinit(struct fts_backend **backend); @@ -61,6 +67,7 @@ /* Finish the lookup and return found UIDs. */ int fts_backend_lookup_deinit(struct fts_backend_lookup_context **ctx, ARRAY_TYPE(seq_range) *definite_uids, - ARRAY_TYPE(seq_range) *maybe_uids); + ARRAY_TYPE(seq_range) *maybe_uids, + ARRAY_TYPE(fts_score_map) *scores); #endif
--- a/src/plugins/fts/fts-search.c Sun Jul 13 19:08:13 2008 +0300 +++ b/src/plugins/fts/fts-search.c Sun Jul 13 19:09:55 2008 +0300 @@ -121,6 +121,7 @@ i_array_init(&fctx->definite_seqs, 64); i_array_init(&fctx->maybe_seqs, 64); + i_array_init(&fctx->score_map, 64); /* start lookup with the best arg */ T_BEGIN { @@ -141,7 +142,8 @@ have_seqs = TRUE; fts_backend_lookup_deinit(&fctx->lookup_ctx_fast, &fctx->definite_seqs, - &fctx->maybe_seqs); + &fctx->maybe_seqs, + &fctx->score_map); } if (fctx->fbox->backend_fast->locked) fts_backend_unlock(fctx->fbox->backend_fast); @@ -152,19 +154,26 @@ } else if (!have_seqs) { fts_backend_lookup_deinit(&fctx->lookup_ctx_substr, &fctx->definite_seqs, - &fctx->maybe_seqs); + &fctx->maybe_seqs, + &fctx->score_map); } else { /* have to merge the results */ ARRAY_TYPE(seq_range) tmp_def, tmp_maybe; + ARRAY_TYPE(fts_score_map) tmp_scores; i_array_init(&tmp_def, 64); i_array_init(&tmp_maybe, 64); + i_array_init(&tmp_scores, 64); + /* FIXME: for now we just ignore the other scores, + since squat doesn't support it anyway */ fts_backend_lookup_deinit(&fctx->lookup_ctx_substr, - &tmp_def, &tmp_maybe); + &tmp_def, &tmp_maybe, + &tmp_scores); fts_filter_uids(&fctx->definite_seqs, &tmp_def, &fctx->maybe_seqs, &tmp_maybe); array_free(&tmp_def); array_free(&tmp_maybe); + array_free(&tmp_scores); } if (fctx->fbox->backend_substr->locked) fts_backend_unlock(fctx->fbox->backend_substr);
--- a/src/plugins/fts/fts-storage.c Sun Jul 13 19:08:13 2008 +0300 +++ b/src/plugins/fts/fts-storage.c Sun Jul 13 19:09:55 2008 +0300 @@ -23,6 +23,11 @@ #define FTS_SEARCH_NONBLOCK_COUNT 10 #define FTS_BUILD_NOTIFY_INTERVAL_SECS 10 +struct fts_mail { + union mail_module_context module_ctx; + char score[30]; +}; + struct fts_storage_build_context { struct mail_search_context *search_ctx; struct mail_search_args *search_args; @@ -39,6 +44,7 @@ union mailbox_transaction_module_context module_ctx; struct fts_storage_build_context *build_ctx; + ARRAY_TYPE(fts_score_map) *score_map; struct mail *mail; uint32_t last_uid; @@ -334,6 +340,7 @@ struct mail_search_args *args, const enum mail_sort_type *sort_program) { + struct fts_transaction_context *ft = FTS_CONTEXT(t); struct fts_mailbox *fbox = FTS_CONTEXT(t->box); struct mail_search_context *ctx; struct fts_search_context *fctx; @@ -349,6 +356,8 @@ if (fbox->backend_substr == NULL && fbox->backend_fast == NULL) return ctx; + ft->score_map = &fctx->score_map; + fts_search_analyze(fctx); (void)fts_try_build_init(fctx); return ctx; @@ -486,9 +495,13 @@ static int fts_mailbox_search_deinit(struct mail_search_context *ctx) { + struct fts_transaction_context *ft = FTS_CONTEXT(ctx->transaction); struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box); struct fts_search_context *fctx = FTS_CONTEXT(ctx); + if (ft->score_map == &fctx->score_map) + ft->score_map = NULL; + if (fctx->build_ctx != NULL) { /* the search was cancelled */ fts_build_deinit(&fctx->build_ctx); @@ -498,6 +511,8 @@ array_free(&fctx->definite_seqs); if (array_is_created(&fctx->maybe_seqs)) array_free(&fctx->maybe_seqs); + if (array_is_created(&fctx->score_map)) + array_free(&fctx->score_map); i_free(fctx); return fbox->module_ctx.super.search_deinit(ctx); } @@ -505,7 +520,7 @@ static void fts_mail_expunge(struct mail *_mail) { struct mail_private *mail = (struct mail_private *)_mail; - union mail_module_context *fmail = FTS_MAIL_CONTEXT(mail); + struct fts_mail *fmail = FTS_MAIL_CONTEXT(mail); struct fts_mailbox *fbox = FTS_CONTEXT(_mail->box); struct fts_transaction_context *ft = FTS_CONTEXT(_mail->transaction); @@ -515,7 +530,44 @@ if (fbox->backend_fast != NULL) fts_backend_expunge(fbox->backend_fast, _mail); - fmail->super.expunge(_mail); + fmail->module_ctx.super.expunge(_mail); +} + +static int fts_score_cmp(const void *key, const void *data) +{ + const uint32_t *uid = key; + const struct fts_score_map *score = data; + + return *uid < score->uid ? -1 : + (*uid > score->uid ? 1 : 0); +} + +static int fts_mail_get_special(struct mail *_mail, enum mail_fetch_field field, + const char **value_r) +{ + struct mail_private *mail = (struct mail_private *)_mail; + struct fts_mail *fmail = FTS_MAIL_CONTEXT(mail); + struct fts_transaction_context *ft = FTS_CONTEXT(_mail->transaction); + const struct fts_score_map *scores; + unsigned int count; + + if (field != MAIL_FETCH_SEARCH_SCORE || ft->score_map == NULL || + !array_is_created(ft->score_map)) + scores = NULL; + else { + scores = array_get(ft->score_map, &count); + scores = bsearch(&_mail->uid, scores, count, sizeof(*scores), + fts_score_cmp); + } + if (scores != NULL) { + i_assert(scores->uid == _mail->uid); + i_snprintf(fmail->score, sizeof(fmail->score), + "%f", scores->score); + *value_r = fmail->score; + return 0; + } + + return fmail->module_ctx.super.get_special(_mail, field, value_r); } static struct mail * @@ -524,7 +576,7 @@ struct mailbox_header_lookup_ctx *wanted_headers) { struct fts_mailbox *fbox = FTS_CONTEXT(t->box); - union mail_module_context *fmail; + struct fts_mail *fmail; struct mail *_mail; struct mail_private *mail; @@ -533,11 +585,12 @@ if (fbox->backend_substr != NULL || fbox->backend_fast != NULL) { mail = (struct mail_private *)_mail; - fmail = p_new(mail->pool, union mail_module_context, 1); - fmail->super = mail->v; + fmail = p_new(mail->pool, struct fts_mail, 1); + fmail->module_ctx.super = mail->v; mail->v.expunge = fts_mail_expunge; - MODULE_CONTEXT_SET_SELF(mail, fts_mail_module, fmail); + mail->v.get_special = fts_mail_get_special; + MODULE_CONTEXT_SET(mail, fts_mail_module, fmail); } return _mail; }
--- a/src/plugins/fts/fts-storage.h Sun Jul 13 19:08:13 2008 +0300 +++ b/src/plugins/fts/fts-storage.h Sun Jul 13 19:09:55 2008 +0300 @@ -20,6 +20,7 @@ struct fts_backend_lookup_context *lookup_ctx_substr, *lookup_ctx_fast; ARRAY_TYPE(seq_range) definite_seqs, maybe_seqs; + ARRAY_TYPE(fts_score_map) score_map; unsigned int definite_idx, maybe_idx; struct fts_backend *build_backend;