Mercurial > dovecot > core-2.2
changeset 13224:a50ac3a840a9
fts-lucene: Added "doveadm dump" support for lucene index.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Fri, 12 Aug 2011 17:27:05 +0300 |
parents | 271bbac3047d |
children | 046090da4aaf |
files | src/plugins/fts-lucene/Makefile.am src/plugins/fts-lucene/doveadm-fts-lucene.c src/plugins/fts-lucene/lucene-wrapper.cc src/plugins/fts-lucene/lucene-wrapper.h |
diffstat | 4 files changed, 188 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts-lucene/Makefile.am Fri Aug 12 17:40:55 2011 +0300 +++ b/src/plugins/fts-lucene/Makefile.am Fri Aug 12 17:27:05 2011 +0300 @@ -1,13 +1,16 @@ +doveadm_moduledir = $(moduledir)/doveadm + AM_CPPFLAGS = \ -I$(top_srcdir)/src/lib \ -I$(top_srcdir)/src/lib-mail \ -I$(top_srcdir)/src/lib-index \ -I$(top_srcdir)/src/lib-storage \ -I$(top_srcdir)/src/plugins/fts \ - -I/usr/lib + -I$(top_srcdir)/src/doveadm NOPLUGIN_LDFLAGS = lib21_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version +lib20_doveadm_fts_lucene_plugin_la_LDFLAGS = -module -avoid-version module_LTLIBRARIES = \ lib21_fts_lucene_plugin.la @@ -38,3 +41,9 @@ textcat.conf endif EXTRA_DIST = textcat.conf + +doveadm_module_LTLIBRARIES = \ + lib20_doveadm_fts_lucene_plugin.la + +lib20_doveadm_fts_lucene_plugin_la_SOURCES = \ + doveadm-fts-lucene.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/plugins/fts-lucene/doveadm-fts-lucene.c Fri Aug 12 17:27:05 2011 +0300 @@ -0,0 +1,71 @@ +/* Copyright (c) 2011 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "hex-binary.h" +#include "doveadm-dump.h" +#include "doveadm-fts.h" +#include "lucene-wrapper.h" + +#include <stdio.h> +#include <sys/stat.h> + +const char *doveadm_fts_lucene_plugin_version = DOVECOT_VERSION; + +void doveadm_fts_lucene_plugin_init(struct module *module); +void doveadm_fts_lucene_plugin_deinit(void); + +static void cmd_dump_fts_lucene(int argc ATTR_UNUSED, char *argv[]) +{ + struct lucene_index *index; + struct lucene_index_iter *iter; + mail_guid_128_t prev_guid; + const struct lucene_index_record *rec; + bool first = TRUE; + + memset(&prev_guid, 0, sizeof(prev_guid)); + index = lucene_index_init(argv[1], NULL, NULL); + iter = lucene_index_iter_init(index); + while ((rec = lucene_index_iter_next(iter)) != NULL) { + if (memcmp(prev_guid, rec->mailbox_guid, + sizeof(prev_guid)) != 0) { + if (first) + first = FALSE; + else + printf("\n"); + memcpy(prev_guid, rec->mailbox_guid, sizeof(prev_guid)); + printf("%s: ", binary_to_hex(prev_guid, sizeof(prev_guid))); + } + printf("%u,", rec->uid); + } + printf("\n"); + if (lucene_index_iter_deinit(&iter) < 0) + i_error("Lucene index iteration failed"); + lucene_index_deinit(index); +} + +static bool test_dump_fts_lucene(const char *path) +{ + const char *p; + struct stat st; + + p = strrchr(path, '/'); + if (p++ == NULL) + p = path; + p = t_strconcat(p, "/segments.gen", NULL); + return stat(p, &st) == 0; +} + +struct doveadm_cmd_dump doveadm_cmd_dump_fts_lucene = { + "fts-lucene", + test_dump_fts_lucene, + cmd_dump_fts_lucene +}; + +void doveadm_fts_lucene_plugin_init(struct module *module ATTR_UNUSED) +{ + doveadm_dump_register(&doveadm_cmd_dump_fts_lucene); +} + +void doveadm_fts_lucene_plugin_deinit(void) +{ +}
--- a/src/plugins/fts-lucene/lucene-wrapper.cc Fri Aug 12 17:40:55 2011 +0300 +++ b/src/plugins/fts-lucene/lucene-wrapper.cc Fri Aug 12 17:27:05 2011 +0300 @@ -213,10 +213,9 @@ } static int -lucene_doc_get_uid(struct lucene_index *index, Document *doc, - const TCHAR *field_name, uint32_t *uid_r) +lucene_doc_get_uid(struct lucene_index *index, Document *doc, uint32_t *uid_r) { - Field *field = doc->getField(field_name); + Field *field = doc->getField(_T("uid")); const TCHAR *uid = field == NULL ? NULL : field->stringValue(); if (uid == NULL) { i_error("lucene: Corrupted FTS index %s: No UID for document", @@ -253,7 +252,7 @@ uint32_t uid; if (lucene_doc_get_uid(index, &hits->doc(i), - _T("uid"), &uid) < 0) { + &uid) < 0) { ret = -1; break; } @@ -516,25 +515,33 @@ } static int -rescan_open_mailbox(struct rescan_context *ctx, Document *doc) +fts_lucene_get_mailbox_guid(struct lucene_index *index, Document *doc, + mail_guid_128_t *guid_r) { - int ret; - Field *field = doc->getField(_T("box")); const TCHAR *box_guid = field == NULL ? NULL : field->stringValue(); if (box_guid == NULL) { i_error("lucene: Corrupted FTS index %s: No mailbox for document", - ctx->index->path); - return 0; + index->path); + return -1; } - mail_guid_128_t guid; - if (wcharguid_to_guid(&guid, box_guid) < 0) { + if (wcharguid_to_guid(guid_r, box_guid) < 0) { i_error("lucene: Corrupted FTS index %s: " - "box field not in expected format", - ctx->index->path); + "box field not in expected format", index->path); + return -1; + } + return 0; +} + +static int +rescan_open_mailbox(struct rescan_context *ctx, Document *doc) +{ + mail_guid_128_t guid; + int ret; + + if (fts_lucene_get_mailbox_guid(ctx->index, doc, &guid) < 0) return 0; - } if (memcmp(guid, ctx->box_guid, sizeof(guid)) == 0) { /* same as last one */ @@ -587,7 +594,7 @@ { uint32_t lucene_uid, idx_uid; - if (lucene_doc_get_uid(ctx->index, doc, _T("uid"), &lucene_uid) < 0) + if (lucene_doc_get_uid(ctx->index, doc, &lucene_uid) < 0) return 0; if (seq_range_array_iter_nth(&ctx->uids_iter, ctx->uids_iter_n, @@ -917,7 +924,7 @@ uint32_t uid; if (lucene_doc_get_uid(index, &hits->doc(i), - _T("uid"), &uid) < 0) { + &uid) < 0) { ret = -1; break; } @@ -1028,7 +1035,7 @@ } if (lucene_doc_get_uid(index, &hits->doc(i), - _T("uid"), &uid) < 0) { + &uid) < 0) { ret = -1; break; } @@ -1049,6 +1056,7 @@ return -1; } } + int lucene_index_lookup_multi(struct lucene_index *index, struct hash_table *guids, struct mail_search_arg *args, bool and_args, @@ -1076,3 +1084,70 @@ } return 0; } + +struct lucene_index_iter { + struct lucene_index *index; + struct lucene_index_record rec; + + Hits *hits; + size_t i; + bool failed; +}; + +struct lucene_index_iter * +lucene_index_iter_init(struct lucene_index *index) +{ + static const TCHAR *sort_fields[] = { _T("box"), _T("uid"), NULL }; + struct lucene_index_iter *iter; + int ret; + + iter = i_new(struct lucene_index_iter, 1); + iter->index = index; + if ((ret = lucene_index_open_search(index)) <= 0) { + if (ret < 0) + iter->failed = true; + return iter; + } + + Term term(_T("box"), _T("*")); + WildcardQuery query(&term); + Sort sort(sort_fields); + + try { + iter->hits = index->searcher->search(&query, &sort); + } catch (CLuceneError &err) { + lucene_handle_error(index, err, "rescan search"); + iter->failed = true; + } + return iter; +} + +const struct lucene_index_record * +lucene_index_iter_next(struct lucene_index_iter *iter) +{ + if (iter->hits == NULL) + return NULL; + if (iter->i == iter->hits->length()) + return NULL; + + Document *doc = &iter->hits->doc(iter->i); + iter->i++; + + memset(&iter->rec, 0, sizeof(iter->rec)); + (void)fts_lucene_get_mailbox_guid(iter->index, doc, + &iter->rec.mailbox_guid); + (void)lucene_doc_get_uid(iter->index, doc, &iter->rec.uid); + return &iter->rec; +} + +int lucene_index_iter_deinit(struct lucene_index_iter **_iter) +{ + struct lucene_index_iter *iter = *_iter; + int ret = iter->failed ? -1 : 0; + + *_iter = NULL; + if (iter->hits != NULL) + _CLDELETE(iter->hits); + i_free(iter); + return ret; +}
--- a/src/plugins/fts-lucene/lucene-wrapper.h Fri Aug 12 17:40:55 2011 +0300 +++ b/src/plugins/fts-lucene/lucene-wrapper.h Fri Aug 12 17:27:05 2011 +0300 @@ -4,8 +4,17 @@ #include "fts-api-private.h" #include "mail-types.h" +struct hash_table; +struct mailbox_list; +struct fts_expunge_log; + #define MAILBOX_GUID_HEX_LENGTH (MAIL_GUID_128_SIZE*2) +struct lucene_index_record { + mail_guid_128_t mailbox_guid; + uint32_t uid; +}; + struct lucene_index *lucene_index_init(const char *path, const char *textcat_dir, const char *textcat_conf); @@ -37,6 +46,12 @@ struct mail_search_arg *args, bool and_args, struct fts_multi_result *result); +struct lucene_index_iter * +lucene_index_iter_init(struct lucene_index *index); +const struct lucene_index_record * +lucene_index_iter_next(struct lucene_index_iter *iter); +int lucene_index_iter_deinit(struct lucene_index_iter **iter); + /* internal: */ void lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize, wchar_t *dest, size_t destsize);