changeset 13282:c5bb5db9f541

fts-lucene: Added default_language setting and separated stemmer/textcat support.
author Timo Sirainen <tss@iki.fi>
date Wed, 24 Aug 2011 21:07:04 +0300
parents f0e415c46490
children 12b70c1819a9
files configure.in src/plugins/fts-lucene/Makefile.am src/plugins/fts-lucene/doveadm-fts-lucene.c src/plugins/fts-lucene/fts-backend-lucene.c src/plugins/fts-lucene/fts-lucene-plugin.c src/plugins/fts-lucene/fts-lucene-plugin.h src/plugins/fts-lucene/lucene-wrapper.cc src/plugins/fts-lucene/lucene-wrapper.h
diffstat 8 files changed, 58 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/configure.in	Tue Aug 23 04:52:55 2011 +0300
+++ b/configure.in	Wed Aug 24 21:07:04 2011 +0300
@@ -2609,13 +2609,16 @@
 AM_CONDITIONAL(BUILD_SOLR, test "$have_solr" = "yes")
 
 if test "$want_lucene" = "yes"; then
-  AC_CHECK_LIB(textcat, special_textcat_Init, [
-    AC_CHECK_LIB(stemmer, sb_stemmer_new, [
+  AC_CHECK_LIB(stemmer, sb_stemmer_new, [
+    have_lucene_stemmer=yes
+    AC_DEFINE(HAVE_LUCENE_STEMMER,, Define if you want stemming support for CLucene)
+    AC_CHECK_LIB(textcat, special_textcat_Init, [
       have_lucene_textcat=yes
-      AC_DEFINE(HAVE_LUCENE_TEXTCAT,, Define if you want textcat and stemming support for CLucene)
+      AC_DEFINE(HAVE_LUCENE_TEXTCAT,, Define if you want textcat support for CLucene)
     ])
   ])
 fi
+AM_CONDITIONAL(BUILD_LUCENE_STEMMER, test "$have_lucene_stemmer" = "yes")
 AM_CONDITIONAL(BUILD_LUCENE_TEXTCAT, test "$have_lucene_textcat" = "yes")
 
 dnl **
@@ -2738,6 +2741,7 @@
 src/plugins/quota/Makefile
 src/plugins/imap-quota/Makefile
 src/plugins/snarf/Makefile
+src/plugins/stats/Makefile
 src/plugins/trash/Makefile
 src/plugins/virtual/Makefile
 src/plugins/zlib/Makefile
--- a/src/plugins/fts-lucene/Makefile.am	Tue Aug 23 04:52:55 2011 +0300
+++ b/src/plugins/fts-lucene/Makefile.am	Wed Aug 24 21:07:04 2011 +0300
@@ -15,13 +15,16 @@
 module_LTLIBRARIES = \
 	lib21_fts_lucene_plugin.la
 
+if BUILD_LUCENE_STEMMER
+STEMMER_LIBS = -lstemmer
+SHOWBALL_SOURCES = Snowball.cc
+endif
 if BUILD_LUCENE_TEXTCAT
-TEXTCAT_LIBS = -lstemmer -ltextcat
-SHOWBALL_SOURCES = Snowball.cc
+TEXTCAT_LIBS = -ltextcat
 endif
 
 lib21_fts_lucene_plugin_la_LIBADD = \
-	-lclucene-shared -lclucene-core $(TEXTCAT_LIBS)
+	-lclucene-shared -lclucene-core $(TEXTCAT_LIBS) $(STEMMER_LIBS)
 
 lib21_fts_lucene_plugin_la_SOURCES = \
 	fts-lucene-plugin.c \
--- a/src/plugins/fts-lucene/doveadm-fts-lucene.c	Tue Aug 23 04:52:55 2011 +0300
+++ b/src/plugins/fts-lucene/doveadm-fts-lucene.c	Wed Aug 24 21:07:04 2011 +0300
@@ -23,7 +23,7 @@
 	bool first = TRUE;
 
 	memset(&prev_guid, 0, sizeof(prev_guid));
-	index = lucene_index_init(argv[1], NULL, NULL, NULL);
+	index = lucene_index_init(argv[1], NULL, NULL);
 	iter = lucene_index_iter_init(index);
 	while ((rec = lucene_index_iter_next(iter)) != NULL) {
 		if (memcmp(prev_guid, rec->mailbox_guid,
--- a/src/plugins/fts-lucene/fts-backend-lucene.c	Tue Aug 23 04:52:55 2011 +0300
+++ b/src/plugins/fts-lucene/fts-backend-lucene.c	Wed Aug 24 21:07:04 2011 +0300
@@ -137,12 +137,11 @@
 	if (fuser != NULL) {
 		backend->index = lucene_index_init(backend->dir_path,
 						   _backend->ns->list,
-						   fuser->set.textcat_dir,
-						   fuser->set.textcat_conf);
+						   &fuser->set);
 	} else {
 		backend->index = lucene_index_init(backend->dir_path,
 						   _backend->ns->list,
-						   NULL, NULL);
+						   NULL);
 	}
 
 	path = t_strconcat(backend->dir_path, "/"LUCENE_EXPUNGE_LOG_NAME, NULL);
--- a/src/plugins/fts-lucene/fts-lucene-plugin.c	Tue Aug 23 04:52:55 2011 +0300
+++ b/src/plugins/fts-lucene/fts-lucene-plugin.c	Wed Aug 24 21:07:04 2011 +0300
@@ -17,8 +17,12 @@
 {
 	const char *const *tmp;
 
+	set->default_language = "english";
 	for (tmp = t_strsplit_spaces(str, " "); *tmp != NULL; tmp++) {
-		if (strncmp(*tmp, "textcat_conf=", 13) == 0) {
+		if (strncmp(*tmp, "default_language=", 17) == 0) {
+			set->default_language =
+				p_strdup(user->pool, *tmp + 17);
+		} else if (strncmp(*tmp, "textcat_conf=", 13) == 0) {
 			set->textcat_conf = p_strdup(user->pool, *tmp + 13);
 		} else if (strncmp(*tmp, "textcat_dir=", 12) == 0) {
 			set->textcat_dir = p_strdup(user->pool, *tmp + 12);
@@ -35,6 +39,13 @@
 		i_error("fts_lucene: textcat_dir set, but textcat_conf unset");
 		return -1;
 	}
+#ifndef HAVE_LUCENE_STEMMER
+	if (set->default_language != NULL) {
+		i_error("fts_lucene: default_language set, "
+			"but Dovecot built without stemmer support");
+		return -1;
+	}
+#endif
 #ifndef HAVE_LUCENE_TEXTCAT
 	if (set->textcat_conf != NULL) {
 		i_error("fts_lucene: textcat_dir set, "
--- a/src/plugins/fts-lucene/fts-lucene-plugin.h	Tue Aug 23 04:52:55 2011 +0300
+++ b/src/plugins/fts-lucene/fts-lucene-plugin.h	Wed Aug 24 21:07:04 2011 +0300
@@ -9,6 +9,7 @@
 	MODULE_CONTEXT(obj, fts_lucene_user_module)
 
 struct fts_lucene_settings {
+	const char *default_language;
 	const char *textcat_conf, *textcat_dir;
 };
 
--- a/src/plugins/fts-lucene/lucene-wrapper.cc	Tue Aug 23 04:52:55 2011 +0300
+++ b/src/plugins/fts-lucene/lucene-wrapper.cc	Wed Aug 24 21:07:04 2011 +0300
@@ -12,6 +12,7 @@
 #include "mail-namespace.h"
 #include "mail-storage.h"
 #include "fts-expunge-log.h"
+#include "fts-lucene-plugin.h"
 #include "lucene-wrapper.h"
 
 #include <sys/stat.h>
@@ -30,8 +31,6 @@
 
 #define LUCENE_LOCK_OVERRIDE_SECS 60
 
-#define DEFAULT_LANGUAGE "english"
-
 using namespace lucene::document;
 using namespace lucene::index;
 using namespace lucene::search;
@@ -48,8 +47,8 @@
 struct lucene_index {
 	char *path;
 	struct mailbox_list *list;
+	struct fts_lucene_settings set;
 
-	char *textcat_dir, *textcat_conf;
 	wchar_t mailbox_guid[MAILBOX_GUID_HEX_LENGTH + 1];
 
 	IndexReader *reader;
@@ -90,8 +89,7 @@
 
 struct lucene_index *lucene_index_init(const char *path,
 				       struct mailbox_list *list,
-				       const char *textcat_dir,
-				       const char *textcat_conf)
+				       const struct fts_lucene_settings *set)
 {
 	struct lucene_index *index;
 	unsigned int len;
@@ -99,17 +97,11 @@
 	index = i_new(struct lucene_index, 1);
 	index->path = i_strdup(path);
 	index->list = list;
-	if (textcat_dir != NULL) {
-		/* textcat really wants the '/' suffix */
-		len = strlen(textcat_dir);
-		if (len > 0 && textcat_dir[len-1] != '/')
-			index->textcat_dir = i_strconcat(textcat_dir, "/", NULL);
-		else
-			index->textcat_dir = i_strdup(textcat_dir);
-		index->textcat_conf = i_strdup(textcat_conf);
-	}
-#ifdef HAVE_LUCENE_TEXTCAT
-	index->default_analyzer = _CLNEW snowball::SnowballAnalyzer(DEFAULT_LANGUAGE);
+	if (set != NULL)
+		index->set = *set;
+#ifdef HAVE_LUCENE_STEMMER
+	index->default_analyzer =
+		_CLNEW snowball::SnowballAnalyzer(set->default_language);
 #else
 	index->default_analyzer = _CLNEW standard::StandardAnalyzer();
 #endif
@@ -143,8 +135,6 @@
 		textcat = NULL;
 	}
 	_CLDELETE(index->default_analyzer);
-	i_free(index->textcat_dir);
-	i_free(index->textcat_conf);
 	i_free(index->path);
 	i_free(index);
 }
@@ -344,6 +334,7 @@
 	return 0;
 }
 
+#ifdef HAVE_LUCENE_TEXTCAT
 static Analyzer *get_analyzer(struct lucene_index *index, const char *lang)
 {
 	const struct lucene_analyzer *a;
@@ -362,7 +353,22 @@
 	return new_analyzer.analyzer;
 }
 
-#ifdef HAVE_LUCENE_TEXTCAT
+static void *textcat_init(struct lucene_index *index)
+{
+	const char *textcat_dir = index->set.textcat_dir;
+	unsigned int len;
+
+	if (textcat_dir == NULL)
+		return NULL;
+
+	/* textcat really wants the '/' suffix */
+	len = strlen(textcat_dir);
+	if (len > 0 && textcat_dir[len-1] != '/')
+		textcat_dir = t_strconcat(textcat_dir, "/", NULL);
+
+	return special_textcat_Init(index->set.textcat_conf, textcat_dir);
+}
+
 static Analyzer *
 guess_analyzer(struct lucene_index *index, const void *data, size_t size)
 {
@@ -372,9 +378,7 @@
 		return NULL;
 
 	if (textcat == NULL) {
-		textcat = index->textcat_conf == NULL ? NULL :
-			special_textcat_Init(index->textcat_conf,
-					     index->textcat_dir);
+		textcat = textcat_init(index);
 		if (textcat == NULL) {
 			textcat_broken = TRUE;
 			return NULL;
@@ -388,7 +392,7 @@
 	if (lang[0] != '[' || p == NULL)
 		return NULL;
 	lang = t_strdup_until(lang+1, p);
-	if (strcmp(lang, DEFAULT_LANGUAGE) == 0)
+	if (strcmp(lang, index->set.default_language) == 0)
 		return index->default_analyzer;
 
 	return get_analyzer(index, lang);
--- a/src/plugins/fts-lucene/lucene-wrapper.h	Tue Aug 23 04:52:55 2011 +0300
+++ b/src/plugins/fts-lucene/lucene-wrapper.h	Wed Aug 24 21:07:04 2011 +0300
@@ -7,6 +7,7 @@
 struct hash_table;
 struct mailbox_list;
 struct fts_expunge_log;
+struct fts_lucene_settings;
 
 #define MAILBOX_GUID_HEX_LENGTH (MAIL_GUID_128_SIZE*2)
 
@@ -17,8 +18,7 @@
 
 struct lucene_index *lucene_index_init(const char *path,
 				       struct mailbox_list *list,
-				       const char *textcat_dir,
-				       const char *textcat_conf);
+				       const struct fts_lucene_settings *set);
 void lucene_index_deinit(struct lucene_index *index);
 
 void lucene_index_select_mailbox(struct lucene_index *index,