Mercurial > dovecot > core-2.2
changeset 13282:c5bb5db9f541
fts-lucene: Added default_language setting and separated stemmer/textcat support.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Wed, 24 Aug 2011 21:07:04 +0300 |
parents | f0e415c46490 |
children | 12b70c1819a9 |
files | configure.in src/plugins/fts-lucene/Makefile.am src/plugins/fts-lucene/doveadm-fts-lucene.c src/plugins/fts-lucene/fts-backend-lucene.c src/plugins/fts-lucene/fts-lucene-plugin.c src/plugins/fts-lucene/fts-lucene-plugin.h src/plugins/fts-lucene/lucene-wrapper.cc src/plugins/fts-lucene/lucene-wrapper.h |
diffstat | 8 files changed, 58 insertions(+), 36 deletions(-) [+] |
line wrap: on
line diff
--- a/configure.in Tue Aug 23 04:52:55 2011 +0300 +++ b/configure.in Wed Aug 24 21:07:04 2011 +0300 @@ -2609,13 +2609,16 @@ AM_CONDITIONAL(BUILD_SOLR, test "$have_solr" = "yes") if test "$want_lucene" = "yes"; then - AC_CHECK_LIB(textcat, special_textcat_Init, [ - AC_CHECK_LIB(stemmer, sb_stemmer_new, [ + AC_CHECK_LIB(stemmer, sb_stemmer_new, [ + have_lucene_stemmer=yes + AC_DEFINE(HAVE_LUCENE_STEMMER,, Define if you want stemming support for CLucene) + AC_CHECK_LIB(textcat, special_textcat_Init, [ have_lucene_textcat=yes - AC_DEFINE(HAVE_LUCENE_TEXTCAT,, Define if you want textcat and stemming support for CLucene) + AC_DEFINE(HAVE_LUCENE_TEXTCAT,, Define if you want textcat support for CLucene) ]) ]) fi +AM_CONDITIONAL(BUILD_LUCENE_STEMMER, test "$have_lucene_stemmer" = "yes") AM_CONDITIONAL(BUILD_LUCENE_TEXTCAT, test "$have_lucene_textcat" = "yes") dnl ** @@ -2738,6 +2741,7 @@ src/plugins/quota/Makefile src/plugins/imap-quota/Makefile src/plugins/snarf/Makefile +src/plugins/stats/Makefile src/plugins/trash/Makefile src/plugins/virtual/Makefile src/plugins/zlib/Makefile
--- a/src/plugins/fts-lucene/Makefile.am Tue Aug 23 04:52:55 2011 +0300 +++ b/src/plugins/fts-lucene/Makefile.am Wed Aug 24 21:07:04 2011 +0300 @@ -15,13 +15,16 @@ module_LTLIBRARIES = \ lib21_fts_lucene_plugin.la +if BUILD_LUCENE_STEMMER +STEMMER_LIBS = -lstemmer +SHOWBALL_SOURCES = Snowball.cc +endif if BUILD_LUCENE_TEXTCAT -TEXTCAT_LIBS = -lstemmer -ltextcat -SHOWBALL_SOURCES = Snowball.cc +TEXTCAT_LIBS = -ltextcat endif lib21_fts_lucene_plugin_la_LIBADD = \ - -lclucene-shared -lclucene-core $(TEXTCAT_LIBS) + -lclucene-shared -lclucene-core $(TEXTCAT_LIBS) $(STEMMER_LIBS) lib21_fts_lucene_plugin_la_SOURCES = \ fts-lucene-plugin.c \
--- a/src/plugins/fts-lucene/doveadm-fts-lucene.c Tue Aug 23 04:52:55 2011 +0300 +++ b/src/plugins/fts-lucene/doveadm-fts-lucene.c Wed Aug 24 21:07:04 2011 +0300 @@ -23,7 +23,7 @@ bool first = TRUE; memset(&prev_guid, 0, sizeof(prev_guid)); - index = lucene_index_init(argv[1], NULL, NULL, NULL); + index = lucene_index_init(argv[1], NULL, NULL); iter = lucene_index_iter_init(index); while ((rec = lucene_index_iter_next(iter)) != NULL) { if (memcmp(prev_guid, rec->mailbox_guid,
--- a/src/plugins/fts-lucene/fts-backend-lucene.c Tue Aug 23 04:52:55 2011 +0300 +++ b/src/plugins/fts-lucene/fts-backend-lucene.c Wed Aug 24 21:07:04 2011 +0300 @@ -137,12 +137,11 @@ if (fuser != NULL) { backend->index = lucene_index_init(backend->dir_path, _backend->ns->list, - fuser->set.textcat_dir, - fuser->set.textcat_conf); + &fuser->set); } else { backend->index = lucene_index_init(backend->dir_path, _backend->ns->list, - NULL, NULL); + NULL); } path = t_strconcat(backend->dir_path, "/"LUCENE_EXPUNGE_LOG_NAME, NULL);
--- a/src/plugins/fts-lucene/fts-lucene-plugin.c Tue Aug 23 04:52:55 2011 +0300 +++ b/src/plugins/fts-lucene/fts-lucene-plugin.c Wed Aug 24 21:07:04 2011 +0300 @@ -17,8 +17,12 @@ { const char *const *tmp; + set->default_language = "english"; for (tmp = t_strsplit_spaces(str, " "); *tmp != NULL; tmp++) { - if (strncmp(*tmp, "textcat_conf=", 13) == 0) { + if (strncmp(*tmp, "default_language=", 17) == 0) { + set->default_language = + p_strdup(user->pool, *tmp + 17); + } else if (strncmp(*tmp, "textcat_conf=", 13) == 0) { set->textcat_conf = p_strdup(user->pool, *tmp + 13); } else if (strncmp(*tmp, "textcat_dir=", 12) == 0) { set->textcat_dir = p_strdup(user->pool, *tmp + 12); @@ -35,6 +39,13 @@ i_error("fts_lucene: textcat_dir set, but textcat_conf unset"); return -1; } +#ifndef HAVE_LUCENE_STEMMER + if (set->default_language != NULL) { + i_error("fts_lucene: default_language set, " + "but Dovecot built without stemmer support"); + return -1; + } +#endif #ifndef HAVE_LUCENE_TEXTCAT if (set->textcat_conf != NULL) { i_error("fts_lucene: textcat_dir set, "
--- a/src/plugins/fts-lucene/fts-lucene-plugin.h Tue Aug 23 04:52:55 2011 +0300 +++ b/src/plugins/fts-lucene/fts-lucene-plugin.h Wed Aug 24 21:07:04 2011 +0300 @@ -9,6 +9,7 @@ MODULE_CONTEXT(obj, fts_lucene_user_module) struct fts_lucene_settings { + const char *default_language; const char *textcat_conf, *textcat_dir; };
--- a/src/plugins/fts-lucene/lucene-wrapper.cc Tue Aug 23 04:52:55 2011 +0300 +++ b/src/plugins/fts-lucene/lucene-wrapper.cc Wed Aug 24 21:07:04 2011 +0300 @@ -12,6 +12,7 @@ #include "mail-namespace.h" #include "mail-storage.h" #include "fts-expunge-log.h" +#include "fts-lucene-plugin.h" #include "lucene-wrapper.h" #include <sys/stat.h> @@ -30,8 +31,6 @@ #define LUCENE_LOCK_OVERRIDE_SECS 60 -#define DEFAULT_LANGUAGE "english" - using namespace lucene::document; using namespace lucene::index; using namespace lucene::search; @@ -48,8 +47,8 @@ struct lucene_index { char *path; struct mailbox_list *list; + struct fts_lucene_settings set; - char *textcat_dir, *textcat_conf; wchar_t mailbox_guid[MAILBOX_GUID_HEX_LENGTH + 1]; IndexReader *reader; @@ -90,8 +89,7 @@ struct lucene_index *lucene_index_init(const char *path, struct mailbox_list *list, - const char *textcat_dir, - const char *textcat_conf) + const struct fts_lucene_settings *set) { struct lucene_index *index; unsigned int len; @@ -99,17 +97,11 @@ index = i_new(struct lucene_index, 1); index->path = i_strdup(path); index->list = list; - if (textcat_dir != NULL) { - /* textcat really wants the '/' suffix */ - len = strlen(textcat_dir); - if (len > 0 && textcat_dir[len-1] != '/') - index->textcat_dir = i_strconcat(textcat_dir, "/", NULL); - else - index->textcat_dir = i_strdup(textcat_dir); - index->textcat_conf = i_strdup(textcat_conf); - } -#ifdef HAVE_LUCENE_TEXTCAT - index->default_analyzer = _CLNEW snowball::SnowballAnalyzer(DEFAULT_LANGUAGE); + if (set != NULL) + index->set = *set; +#ifdef HAVE_LUCENE_STEMMER + index->default_analyzer = + _CLNEW snowball::SnowballAnalyzer(set->default_language); #else index->default_analyzer = _CLNEW standard::StandardAnalyzer(); #endif @@ -143,8 +135,6 @@ textcat = NULL; } _CLDELETE(index->default_analyzer); - i_free(index->textcat_dir); - i_free(index->textcat_conf); i_free(index->path); i_free(index); } @@ -344,6 +334,7 @@ return 0; } +#ifdef HAVE_LUCENE_TEXTCAT static Analyzer *get_analyzer(struct lucene_index *index, const char *lang) { const struct lucene_analyzer *a; @@ -362,7 +353,22 @@ return new_analyzer.analyzer; } -#ifdef HAVE_LUCENE_TEXTCAT +static void *textcat_init(struct lucene_index *index) +{ + const char *textcat_dir = index->set.textcat_dir; + unsigned int len; + + if (textcat_dir == NULL) + return NULL; + + /* textcat really wants the '/' suffix */ + len = strlen(textcat_dir); + if (len > 0 && textcat_dir[len-1] != '/') + textcat_dir = t_strconcat(textcat_dir, "/", NULL); + + return special_textcat_Init(index->set.textcat_conf, textcat_dir); +} + static Analyzer * guess_analyzer(struct lucene_index *index, const void *data, size_t size) { @@ -372,9 +378,7 @@ return NULL; if (textcat == NULL) { - textcat = index->textcat_conf == NULL ? NULL : - special_textcat_Init(index->textcat_conf, - index->textcat_dir); + textcat = textcat_init(index); if (textcat == NULL) { textcat_broken = TRUE; return NULL; @@ -388,7 +392,7 @@ if (lang[0] != '[' || p == NULL) return NULL; lang = t_strdup_until(lang+1, p); - if (strcmp(lang, DEFAULT_LANGUAGE) == 0) + if (strcmp(lang, index->set.default_language) == 0) return index->default_analyzer; return get_analyzer(index, lang);
--- a/src/plugins/fts-lucene/lucene-wrapper.h Tue Aug 23 04:52:55 2011 +0300 +++ b/src/plugins/fts-lucene/lucene-wrapper.h Wed Aug 24 21:07:04 2011 +0300 @@ -7,6 +7,7 @@ struct hash_table; struct mailbox_list; struct fts_expunge_log; +struct fts_lucene_settings; #define MAILBOX_GUID_HEX_LENGTH (MAIL_GUID_128_SIZE*2) @@ -17,8 +18,7 @@ struct lucene_index *lucene_index_init(const char *path, struct mailbox_list *list, - const char *textcat_dir, - const char *textcat_conf); + const struct fts_lucene_settings *set); void lucene_index_deinit(struct lucene_index *index); void lucene_index_select_mailbox(struct lucene_index *index,