Mercurial > dovecot > original-hg > dovecot-1.2
changeset 4710:d0e37ed08bdb HEAD
Create only a single index into INBOX's index dir. Did several fixes.
However still a bit buggy.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Wed, 25 Oct 2006 02:49:13 +0300 |
parents | d2693511dd33 |
children | 12584631dc42 |
files | src/plugins/fts-lucene/fts-backend-lucene.c src/plugins/fts-lucene/fts-lucene-plugin.c src/plugins/fts-lucene/fts-lucene-plugin.h src/plugins/fts-lucene/lucene-wrapper.cc src/plugins/fts-lucene/lucene-wrapper.h |
diffstat | 5 files changed, 228 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts-lucene/fts-backend-lucene.c Wed Oct 25 02:46:14 2006 +0300 +++ b/src/plugins/fts-lucene/fts-backend-lucene.c Wed Oct 25 02:49:13 2006 +0300 @@ -1,23 +1,64 @@ /* Copyright (C) 2006 Timo Sirainen */ #include "lib.h" +#include "array.h" +#include "mail-storage-private.h" #include "lucene-wrapper.h" #include "fts-lucene-plugin.h" +#define LUCENE_INDEX_DIR_NAME "lucene-indexes" + +struct lucene_mail_storage { + struct lucene_index *index; + struct mailbox *selected_box; + int refcount; +}; + struct lucene_fts_backend { struct fts_backend backend; - struct lucene_index *index; + struct lucene_mail_storage *lstorage; + struct mailbox *box; uint32_t last_uid; }; -static struct fts_backend *fts_backend_lucene_init(const char *path) +static void fts_backend_select(struct lucene_fts_backend *backend) +{ + if (backend->lstorage->selected_box != backend->box) { + lucene_index_select_mailbox(backend->lstorage->index, + mailbox_get_name(backend->box)); + backend->lstorage->selected_box = backend->box; + } +} + +static struct fts_backend *fts_backend_lucene_init(struct mailbox *box) { + struct lucene_mail_storage *lstorage; struct lucene_fts_backend *backend; + const char *path; + + lstorage = LUCENE_CONTEXT(box->storage); + if (lstorage == NULL) { + path = mail_storage_get_mailbox_index_dir(box->storage, + "INBOX"); + if (path == NULL) { + /* in-memory indexes */ + return NULL; + } + + path = t_strconcat(path, "/"LUCENE_INDEX_DIR_NAME, NULL); + + lstorage = i_new(struct lucene_mail_storage, 1); + lstorage->index = lucene_index_init(path); + array_idx_set(&box->storage->module_contexts, + fts_lucene_storage_module_id, &lstorage); + } + lstorage->refcount++; backend = i_new(struct lucene_fts_backend, 1); backend->backend = fts_backend_lucene; - backend->index = lucene_index_init(path); + backend->lstorage = lstorage; + backend->box = box; return &backend->backend; } @@ -26,7 +67,12 @@ struct lucene_fts_backend *backend = (struct lucene_fts_backend *)_backend; - lucene_index_deinit(backend->index); + if (--backend->lstorage->refcount == 0) { + array_idx_clear(&backend->box->storage->module_contexts, + fts_lucene_storage_module_id); + lucene_index_deinit(backend->lstorage->index); + i_free(backend->lstorage); + } i_free(backend); } @@ -37,9 +83,12 @@ (struct lucene_fts_backend *)_backend; struct fts_backend_build_context *ctx; + fts_backend_select(backend); + ctx = i_new(struct fts_backend_build_context, 1); ctx->backend = _backend; - if (lucene_index_build_init(backend->index, &backend->last_uid) < 0) + if (lucene_index_build_init(backend->lstorage->index, + &backend->last_uid) < 0) ctx->failed = TRUE; *last_uid_r = backend->last_uid; @@ -60,7 +109,9 @@ i_assert(uid >= backend->last_uid); backend->last_uid = uid; - return lucene_index_build_more(backend->index, uid, data, size); + i_assert(backend->lstorage->selected_box == backend->box); + return lucene_index_build_more(backend->lstorage->index, + uid, data, size); } static int @@ -70,7 +121,8 @@ (struct lucene_fts_backend *)ctx->backend; int ret = ctx->failed ? -1 : 0; - lucene_index_build_deinit(backend->index); + i_assert(backend->lstorage->selected_box == backend->box); + lucene_index_build_deinit(backend->lstorage->index); i_free(ctx); return ret; } @@ -82,7 +134,8 @@ struct lucene_fts_backend *backend = (struct lucene_fts_backend *)_backend; - return lucene_index_lookup(backend->index, key, result); + fts_backend_select(backend); + return lucene_index_lookup(backend->lstorage->index, key, result); } static int @@ -92,7 +145,8 @@ struct lucene_fts_backend *backend = (struct lucene_fts_backend *)_backend; - return lucene_index_filter(backend->index, key, result); + fts_backend_select(backend); + return lucene_index_filter(backend->lstorage->index, key, result); } struct fts_backend fts_backend_lucene = {
--- a/src/plugins/fts-lucene/fts-lucene-plugin.c Wed Oct 25 02:46:14 2006 +0300 +++ b/src/plugins/fts-lucene/fts-lucene-plugin.c Wed Oct 25 02:49:13 2006 +0300 @@ -1,10 +1,14 @@ /* Copyright (C) 2006 Timo Sirainen */ #include "lib.h" +#include "mail-storage-private.h" #include "fts-lucene-plugin.h" +unsigned int fts_lucene_storage_module_id; + void fts_lucene_plugin_init(void) { + fts_lucene_storage_module_id = mail_storage_module_id++; fts_backend_register(&fts_backend_lucene); }
--- a/src/plugins/fts-lucene/fts-lucene-plugin.h Wed Oct 25 02:46:14 2006 +0300 +++ b/src/plugins/fts-lucene/fts-lucene-plugin.h Wed Oct 25 02:49:13 2006 +0300 @@ -3,7 +3,12 @@ #include "fts-api-private.h" +#define LUCENE_CONTEXT(obj) \ + *((void **)array_idx_modifiable(&(obj)->module_contexts, \ + fts_lucene_storage_module_id)) + extern struct fts_backend fts_backend_lucene; +extern unsigned int fts_lucene_storage_module_id; void fts_lucene_plugin_init(void); void fts_lucene_plugin_deinit(void);
--- a/src/plugins/fts-lucene/lucene-wrapper.cc Wed Oct 25 02:46:14 2006 +0300 +++ b/src/plugins/fts-lucene/lucene-wrapper.cc Wed Oct 25 02:49:13 2006 +0300 @@ -14,17 +14,21 @@ using namespace lucene::index; using namespace lucene::search; using namespace lucene::queryParser; +using namespace lucene::analysis; struct lucene_index { char *path; + char *mailbox_name; + TCHAR *tmailbox_name; IndexReader *reader; IndexWriter *writer; IndexSearcher *searcher; - lucene::analysis::standard::StandardAnalyzer *analyzer; + Analyzer *analyzer; Document *doc; uint32_t prev_uid, last_uid; + int32_t last_uid_doc_id; }; static const uint8_t utf8_skip_table[256] = { @@ -38,6 +42,39 @@ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 }; +class RawTokenStream : public TokenStream { + CL_NS(util)::Reader *reader; + +public: + RawTokenStream(CL_NS(util)::Reader *reader) { + this->reader = reader; + }; + + bool next(Token *token) { + const TCHAR *data; + + int32_t len = this->reader->read(data); + if (len <= 0) + return false; + + token->set(data, 0, len); + return true; + } + + void close() { } +}; + +class DovecotAnalyzer : public standard::StandardAnalyzer { +public: + TokenStream *tokenStream(const TCHAR *fieldName, + CL_NS(util)::Reader *reader) { + if (fieldName != 0 && wcscmp(fieldName, L"contents") != 0) + return _CLNEW RawTokenStream(reader); + return standard::StandardAnalyzer:: + tokenStream(fieldName, reader); + } +}; + struct lucene_index *lucene_index_init(const char *path) { struct lucene_index *index; @@ -58,10 +95,25 @@ void lucene_index_deinit(struct lucene_index *index) { lucene_index_close(index); - i_free(index->path); + i_free(index->mailbox_name); + i_free(index->tmailbox_name); i_free(index); } +int lucene_index_select_mailbox(struct lucene_index *index, + const char *mailbox_name) +{ + size_t len; + + i_free(index->mailbox_name); + i_free(index->tmailbox_name); + + len = strlen(mailbox_name); + index->mailbox_name = i_strdup(mailbox_name); + index->tmailbox_name = i_new(TCHAR, len + 1); + STRCPY_AtoT(index->tmailbox_name, mailbox_name, len); +} + static int lucene_index_open(struct lucene_index *index) { if (index->reader != NULL) @@ -89,10 +141,8 @@ if ((ret = lucene_index_open(index)) <= 0) return ret; - if (index->analyzer == NULL) { - index->analyzer = - _CLNEW lucene::analysis::standard::StandardAnalyzer(); - } + if (index->analyzer == NULL) + index->analyzer = _CLNEW DovecotAnalyzer(); index->searcher = _CLNEW IndexSearcher(index->reader); return 1; @@ -119,35 +169,73 @@ } static int -lucene_index_get_last_uid(struct lucene_index *index, uint32_t *last_uid_r) +lucene_index_get_last_uid(struct lucene_index *index) { - int32_t max_docnum = index->reader->maxDoc(); + int ret = 0; + + if (lucene_index_open_search(index) <= 0) + return -1; + + Term mailbox_term(_T("box"), index->tmailbox_name); + Term last_uid_term(_T("last_uid"), _T("1")); + TermQuery mailbox_query(&mailbox_term); + TermQuery last_uid_query(&last_uid_term); + + BooleanQuery query; + query.add(&mailbox_query, true, false); + query.add(&last_uid_query, true, false); + + index->last_uid = 0; + index->last_uid_doc_id = -1; + try { + Hits *hits = index->searcher->search(&query); - if (max_docnum == 0) { - *last_uid_r = 0; - return 0; + if (hits->length() > 1) { + i_error("lucene: last_uid search for mailbox %s " + "returned multiple hits", index->mailbox_name); + } + for (int32_t i = 0; i < hits->length(); i++) { + uint32_t uid; + + if (lucene_doc_get_uid(index, &hits->doc(i), + &uid) < 0) { + ret = -1; + break; + } + + int32_t del_id = -1; + if (uid > index->last_uid) { + if (index->last_uid_doc_id >= 0) + del_id = index->last_uid_doc_id; + index->last_uid = uid; + index->last_uid_doc_id = hits->id(i); + } else { + del_id = hits->id(i); + } + if (del_id >= 0) + index->reader->deleteDocument(del_id); + } + _CLDELETE(hits); + } catch (CLuceneError &err) { + i_error("lucene: last_uid search failed: %s", err.what()); + ret = -1; } - - Document *doc = index->reader->document(max_docnum-1); - if (lucene_doc_get_uid(index, doc, last_uid_r) < 0) { - _CLDELETE(doc); - return -1; - } - _CLDELETE(doc); - return 0; + return ret; } int lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r) { uint32_t last_uid = 0; + i_assert(index->mailbox_name != NULL); + if (lucene_index_open(index) < 0) return -1; if (index->reader == NULL) index->last_uid = 0; else { - if (lucene_index_get_last_uid(index, &index->last_uid) < 0) + if (lucene_index_get_last_uid(index) < 0) return -1; } *last_uid_r = index->last_uid; @@ -156,7 +244,7 @@ return 0; bool exists = IndexReader::indexExists(index->path); - index->analyzer = _CLNEW lucene::analysis::standard::StandardAnalyzer(); + index->analyzer = _CLNEW DovecotAnalyzer(); try { index->writer = _CLNEW IndexWriter(index->path, index->analyzer, !exists); @@ -230,6 +318,7 @@ i_snprintf(id, sizeof(id), "%u", uid); STRCPY_AtoT(tid, id, MAX_INT_STRLEN); index->doc->add(*Field::Text(_T("uid"), tid)); + index->doc->add(*Field::Text(_T("box"), index->tmailbox_name)); } index->doc->add(*Field::Text(_T("contents"), dest)); @@ -237,11 +326,41 @@ return 0; } +static int lucene_index_update_last_uid(struct lucene_index *index) +{ + Document doc; + char id[MAX_INT_STRLEN]; + TCHAR tid[MAX_INT_STRLEN]; + + i_snprintf(id, sizeof(id), "%u", index->last_uid); + STRCPY_AtoT(tid, id, MAX_INT_STRLEN); + + doc.add(*Field::Text(_T("last_uid"), _T("1"))); + doc.add(*Field::Text(_T("uid"), tid)); + doc.add(*Field::Text(_T("box"), index->tmailbox_name)); + + try { + if (index->last_uid_doc_id >= 0) { + index->reader->deleteDocument(index->last_uid_doc_id); + index->last_uid_doc_id = -1; + } + index->writer->addDocument(&doc); + return 0; + } catch (CLuceneError &err) { + i_error("lucene: IndexWriter::addDocument(%s) failed: %s", + index->path, err.what()); + return -1; + } +} + int lucene_index_build_deinit(struct lucene_index *index) { int ret = 0; + if (index->prev_uid > index->last_uid) + index->last_uid = index->prev_uid; index->prev_uid = 0; + if (index->writer == NULL) { lucene_index_close(index); return -1; @@ -249,6 +368,8 @@ if (lucene_index_build_flush(index) < 0) ret = -1; + if (lucene_index_update_last_uid(index) < 0) + ret = -1; try { index->writer->optimize(); @@ -287,10 +408,10 @@ lucene_utf8towcs(tkey, quoted_key, len + 1); t_pop(); - Query *query = NULL; + Query *content_query = NULL; try { - query = QueryParser::parse(tkey, _T("contents"), - index->analyzer); + content_query = QueryParser::parse(tkey, _T("contents"), + index->analyzer); } catch (CLuceneError &err) { if (getenv("DEBUG") != NULL) { i_info("lucene: QueryParser::parse(%s) failed: %s", @@ -300,8 +421,14 @@ return -1; } + BooleanQuery query; + Term mailbox_term(_T("box"), index->tmailbox_name); + TermQuery mailbox_query(&mailbox_term); + query.add(content_query, true, false); + query.add(&mailbox_query, true, false); + try { - Hits *hits = index->searcher->search(query); + Hits *hits = index->searcher->search(&query); for (int32_t i = 0; i < hits->length(); i++) { uint32_t uid; @@ -321,7 +448,7 @@ ret = -1; } - _CLDELETE(query); + _CLDELETE(content_query); lucene_index_close(index); return ret; }
--- a/src/plugins/fts-lucene/lucene-wrapper.h Wed Oct 25 02:46:14 2006 +0300 +++ b/src/plugins/fts-lucene/lucene-wrapper.h Wed Oct 25 02:49:13 2006 +0300 @@ -6,6 +6,9 @@ struct lucene_index *lucene_index_init(const char *path); void lucene_index_deinit(struct lucene_index *index); +int lucene_index_select_mailbox(struct lucene_index *index, + const char *mailbox_name); + int lucene_index_build_init(struct lucene_index *index, uint32_t *last_uid_r); int lucene_index_build_more(struct lucene_index *index, uint32_t uid, const unsigned char *data, size_t size);