Mercurial > dovecot > original-hg > dovecot-1.2
changeset 8492:ee1ecdda8a6b HEAD
fts-solr: Keep last_uid field updated containing the mailbox's last added UID.
Use the last_uid document for finding the last added UID instead of creating
a query with rows=1&sort=uid. It should be faster this way.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 29 Nov 2008 19:03:04 +0200 |
parents | f48e7614677a |
children | 149c6ccc3df4 |
files | doc/solr-schema.xml src/plugins/fts-solr/fts-backend-solr.c |
diffstat | 2 files changed, 99 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/doc/solr-schema.xml Mon Nov 24 00:53:36 2008 +0200 +++ b/doc/solr-schema.xml Sat Nov 29 19:03:04 2008 +0200 @@ -13,6 +13,7 @@ <fieldType name="long" class="solr.LongField" omitNorms="true"/> <fieldType name="slong" class="solr.SortableLongField" omitNorms="true"/> <fieldType name="float" class="solr.FloatField" omitNorms="true"/> + <fieldType name="boolean" class="solr.BoolField" omitNorms="true"/> <fieldType name="text" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> @@ -42,6 +43,7 @@ <field name="uidv" type="long" indexed="true" stored="true" required="true" /> <field name="box" type="string" indexed="true" stored="true" required="true" /> <field name="user" type="string" indexed="true" stored="true" required="true" /> + <field name="last_uid" type="boolean" indexed="true" stored="false" /> <field name="hdr" type="text" indexed="true" stored="false" /> <field name="body" type="text" indexed="true" stored="false" /> <field name="any" type="text" indexed="true" stored="false" multiValued="true" />
--- a/src/plugins/fts-solr/fts-backend-solr.c Mon Nov 24 00:53:36 2008 +0200 +++ b/src/plugins/fts-solr/fts-backend-solr.c Sat Nov 29 19:03:04 2008 +0200 @@ -79,8 +79,8 @@ i_free(backend); } -static int fts_backend_solr_get_last_uid(struct fts_backend *backend, - uint32_t *last_uid_r) +static int fts_backend_solr_get_last_uid_fallback(struct fts_backend *backend, + uint32_t *last_uid_r) { struct mailbox_status status; ARRAY_TYPE(seq_range) uids; @@ -115,6 +115,44 @@ return 0; } +static int fts_backend_solr_get_last_uid(struct fts_backend *backend, + uint32_t *last_uid_r) +{ + struct mailbox_status status; + ARRAY_TYPE(seq_range) uids; + const struct seq_range *uidvals; + unsigned int count; + string_t *str; + + str = t_str_new(256); + str_append(str, "fl=uid&rows=1&q=last_uid:TRUE%20"); + + mailbox_get_status(backend->box, STATUS_UIDVALIDITY, &status); + str_printfa(str, "uidv:%u%%20box:", status.uidvalidity); + solr_quote_str(str, backend->box->name); + str_append(str, "%20user:"); + solr_quote_str(str, backend->box->storage->ns->user->username); + + t_array_init(&uids, 1); + if (solr_connection_select(solr_conn, str_c(str), + NULL, NULL, &uids, NULL) < 0) + return -1; + + uidvals = array_get(&uids, &count); + if (count == 0) { + /* either nothing is indexed or we're converting from an + older database format without the last_uid fields */ + return fts_backend_solr_get_last_uid_fallback(backend, + last_uid_r); + } else if (count == 1 && uidvals[0].seq1 == uidvals[0].seq2) { + *last_uid_r = uidvals[0].seq1; + } else { + i_error("fts_solr: Last UID lookup returned multiple rows"); + return -1; + } + return 0; +} + static int fts_backend_solr_build_init(struct fts_backend *backend, uint32_t *last_uid_r, struct fts_backend_build_context **ctx_r) @@ -136,6 +174,24 @@ return 0; } +static void +fts_backend_solr_add_doc_prefix(struct solr_fts_backend_build_context *ctx, + uint32_t uid) +{ + struct mailbox *box = ctx->ctx.backend->box; + + str_printfa(ctx->cmd, "<doc>" + "<field name=\"uid\">%u</field>" + "<field name=\"uidv\">%u</field>", + uid, ctx->uid_validity); + + str_append(ctx->cmd, "<field name=\"box\">"); + xml_encode(ctx->cmd, box->name); + str_append(ctx->cmd, "</field><field name=\"user\">"); + xml_encode(ctx->cmd, box->storage->ns->user->username); + str_append(ctx->cmd, "</field>"); +} + static int fts_backend_solr_build_more(struct fts_backend_build_context *_ctx, uint32_t uid, const unsigned char *data, @@ -155,17 +211,8 @@ str_append(cmd, "</field></doc>"); ctx->prev_uid = uid; - str_printfa(cmd, "<doc>" - "<field name=\"uid\">%u</field>" - "<field name=\"uidv\">%u</field>", - uid, ctx->uid_validity); - - str_append(cmd, "<field name=\"box\">"); - xml_encode(cmd, box->name); - str_append(cmd, "</field><field name=\"user\">"); - xml_encode(cmd, box->storage->ns->user->username); - - str_printfa(cmd, "</field><field name=\"id\">%u/%u/", + fts_backend_solr_add_doc_prefix(ctx, uid); + str_printfa(cmd, "<field name=\"id\">%u/%u/", uid, ctx->uid_validity); xml_encode(cmd, box->storage->ns->user->username); str_append_c(cmd, '/'); @@ -194,24 +241,48 @@ } static int +fts_backed_solr_build_commit(struct solr_fts_backend_build_context *ctx) +{ + struct mailbox *box = ctx->ctx.backend->box; + int ret; + + if (ctx->prev_uid == 0) + return 0; + + str_append(ctx->cmd, "</field></doc>"); + + /* Update the mailbox's last_uid field, replacing the existing + document. Note that since there is no locking, it's possible that + if another session is indexing at the same time, the last_uid value + may shrink. This doesn't really matter, we'll simply do more work + in future by reindexing some messages. */ + fts_backend_solr_add_doc_prefix(ctx, ctx->prev_uid); + str_printfa(ctx->cmd, "<field name=\"last_uid\">TRUE</field>" + "<field name=\"id\">L/%u/", ctx->uid_validity); + xml_encode(ctx->cmd, box->storage->ns->user->username); + str_append_c(ctx->cmd, '/'); + xml_encode(ctx->cmd, box->name); + str_append(ctx->cmd, "</field></doc></add>"); + + solr_connection_post_more(ctx->post, str_data(ctx->cmd), + str_len(ctx->cmd)); + ret = solr_connection_post_end(ctx->post); + /* commit and wait until the documents we just indexed are + visible to the following search */ + if (solr_connection_post(solr_conn, "<commit waitFlush=\"false\" " + "waitSearcher=\"true\"/>") < 0) + ret = -1; + return ret; +} + +static int fts_backend_solr_build_deinit(struct fts_backend_build_context *_ctx) { struct solr_fts_backend_build_context *ctx = (struct solr_fts_backend_build_context *)_ctx; - int ret = 0; + int ret; - if (ctx->prev_uid != 0) { - str_append(ctx->cmd, "</field></doc></add>"); - solr_connection_post_more(ctx->post, str_data(ctx->cmd), - str_len(ctx->cmd)); - ret = solr_connection_post_end(ctx->post); - /* commit and wait until the documents we just indexed are - visible to the following search */ - if (solr_connection_post(solr_conn, - "<commit waitFlush=\"false\" " - "waitSearcher=\"true\"/>") < 0) - ret = -1; - } + ret = fts_backed_solr_build_commit(ctx); str_free(&ctx->cmd); i_free(ctx); return ret;