Mercurial > dovecot > core-2.2

--- a/configure.in	Tue Aug 09 14:13:13 2011 +0300
+++ b/configure.in	Tue Aug 09 15:05:18 2011 +0300
@@ -2607,6 +2607,16 @@
 fi
 AM_CONDITIONAL(BUILD_SOLR, test "$have_solr" = "yes")

+if test "$want_lucene" = "yes"; then
+  AC_CHECK_LIB(textcat, special_textcat_Init, [
+    AC_CHECK_LIB(stemmer, sb_stemmer_new, [
+      have_lucene_textcat=yes
+      AC_DEFINE(HAVE_LUCENE_TEXTCAT,, Define if you want textcat and stemming support for CLucene)
+    ])
+  ])
+fi
+AM_CONDITIONAL(BUILD_LUCENE_TEXTCAT, test "$have_lucene_textcat" = "yes")
+
 dnl **
 dnl ** Settings
 dnl **
--- a/src/plugins/fts-lucene/Makefile.am	Tue Aug 09 14:13:13 2011 +0300
+++ b/src/plugins/fts-lucene/Makefile.am	Tue Aug 09 15:05:18 2011 +0300
@@ -12,14 +12,29 @@
 module_LTLIBRARIES = \
 	lib21_fts_lucene_plugin.la

+if BUILD_LUCENE_TEXTCAT
+TEXTCAT_LIBS = -lstemmer -ltextcat
+endif
+
 lib21_fts_lucene_plugin_la_LIBADD = \
-	-lclucene-shared -lclucene-core
+	-lclucene-shared -lclucene-core $(TEXTCAT_LIBS)

 lib21_fts_lucene_plugin_la_SOURCES = \
 	fts-lucene-plugin.c \
 	fts-backend-lucene.c \
-	lucene-wrapper.cc
+	lucene-wrapper.cc \
+	Snowball.cc

 noinst_HEADERS = \
 	fts-lucene-plugin.h \
 	lucene-wrapper.h
+	SnowballAnalyzer.h \
+	SnowballFilter.h
+
+if BUILD_LUCENE_TEXTCAT
+exampledir = $(docdir)/example-config
+example_DATA = \
+	textcat.conf
+else
+EXTRA_DIST = textcat.conf
+endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-lucene/Snowball.cc	Tue Aug 09 15:05:18 2011 +0300
@@ -0,0 +1,124 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#include <CLucene.h>
+#include "SnowballAnalyzer.h"
+#include "SnowballFilter.h"
+#include <CLucene/util/CLStreams.h>
+#include <CLucene/analysis/Analyzers.h>
+#include <CLucene/analysis/standard/StandardTokenizer.h>
+#include <CLucene/analysis/standard/StandardFilter.h>
+
+extern "C" {
+#include "lib.h"
+#include "buffer.h"
+#include "unichar.h"
+#include "lucene-wrapper.h"
+};
+
+CL_NS_USE(analysis)
+CL_NS_USE(util)
+CL_NS_USE2(analysis,standard)
+
+CL_NS_DEF2(analysis,snowball)
+
+  /** Builds the named analyzer with no stop words. */
+  SnowballAnalyzer::SnowballAnalyzer(const char* language) {
+    this->language = strdup(language);
+	stopSet = NULL;
+  }
+
+  SnowballAnalyzer::~SnowballAnalyzer(){
+	  _CLDELETE_CARRAY(language);
+	  if ( stopSet != NULL )
+		  _CLDELETE(stopSet);
+  }
+
+  /** Builds the named analyzer with the given stop words.
+  */
+  SnowballAnalyzer::SnowballAnalyzer(const char* language, const TCHAR** stopWords) {
+    this->language = strdup(language);
+
+    stopSet = _CLNEW CLTCSetList(true);
+	StopFilter::fillStopTable(stopSet,stopWords);
+  }
+
+  TokenStream* SnowballAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) {
+	 return this->tokenStream(fieldName,reader,false);
+  }
+
+  /** Constructs a {@link StandardTokenizer} filtered by a {@link
+      StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
+  TokenStream* SnowballAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader) {
+		BufferedReader* bufferedReader = reader->__asBufferedReader();
+		TokenStream* result;
+
+		if ( bufferedReader == NULL )
+			result =  _CLNEW StandardTokenizer( _CLNEW FilteredBufferedReader(reader, deleteReader), true );
+		else
+			result = _CLNEW StandardTokenizer(bufferedReader, deleteReader);
+
+	 result = _CLNEW StandardFilter(result, true);
+    result = _CLNEW CL_NS(analysis)::LowerCaseFilter(result, true);
+    if (stopSet != NULL)
+      result = _CLNEW CL_NS(analysis)::StopFilter(result, true, stopSet);
+    result = _CLNEW SnowballFilter(result, language, true);
+    return result;
+  }
+
+
+
+
+
+
+
+    /** Construct the named stemming filter.
+   *
+   * @param in the input tokens to stem
+   * @param name the name of a stemmer
+   */
+	SnowballFilter::SnowballFilter(TokenStream* in, const char* language, bool deleteTS):
+		TokenFilter(in,deleteTS)
+	{
+		stemmer = sb_stemmer_new(language, NULL); //use utf8 encoding
+
+		if ( stemmer == NULL ){
+			_CLTHROWA(CL_ERR_IllegalArgument, "language not available for stemming\n"); //todo: richer error
+		}
+    }
+
+	SnowballFilter::~SnowballFilter(){
+		sb_stemmer_delete(stemmer);
+	}
+
+  /** Returns the next input Token, after being stemmed */
+  Token* SnowballFilter::next(Token* token){
+    if (input->next(token) == NULL)
+      return NULL;
+
+	unsigned char utf8text[LUCENE_MAX_WORD_LEN*5+1];
+	unsigned int len = I_MIN(LUCENE_MAX_WORD_LEN, token->termLength());
+
+	buffer_t buf = { 0, 0, { 0, 0, 0, 0, 0 } };
+	i_assert(sizeof(wchar_t) == sizeof(unichar_t));
+	buffer_create_data(&buf, utf8text, sizeof(utf8text));
+	uni_ucs4_to_utf8((const unichar_t *)token->termBuffer(), len, &buf);
+
+    const sb_symbol* stemmed = sb_stemmer_stem(stemmer, utf8text, buf.used);
+	if ( stemmed == NULL )
+		_CLTHROWA(CL_ERR_Runtime,"Out of memory");
+
+	int stemmedLen=sb_stemmer_length(stemmer);
+
+	unsigned int tchartext_size = uni_utf8_strlen_n(stemmed, stemmedLen) + 1;
+	TCHAR tchartext[tchartext_size];
+	lucene_utf8_n_to_tchar(stemmed,stemmedLen,tchartext,tchartext_size);
+	token->set(tchartext,token->startOffset(), token->endOffset(), token->type());
+	return token;
+  }
+
+
+CL_NS_END2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-lucene/SnowballAnalyzer.h	Tue Aug 09 15:05:18 2011 +0300
@@ -0,0 +1,44 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_analysis_snowball_analyser_
+#define _lucene_analysis_snowball_analyser_
+
+#include "CLucene/analysis/AnalysisHeader.h"
+
+CL_CLASS_DEF(util,BufferedReader)
+CL_NS_DEF2(analysis,snowball)
+
+/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+ * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
+ *
+ * Available stemmers are listed in {@link net.sf.snowball.ext}.  The name of a
+ * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
+ * {@link EnglishStemmer} is named "English".
+ */
+class CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer {
+  char* language;
+  CLTCSetList* stopSet;
+
+public:
+  /** Builds the named analyzer with no stop words. */
+  SnowballAnalyzer(const char* language="english");
+
+  /** Builds the named analyzer with the given stop words.
+  */
+  SnowballAnalyzer(const char* language, const TCHAR** stopWords);
+
+  ~SnowballAnalyzer();
+
+  /** Constructs a {@link StandardTokenizer} filtered by a {@link
+      StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
+  TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
+  TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader);
+};
+
+CL_NS_END2
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-lucene/SnowballFilter.h	Tue Aug 09 15:05:18 2011 +0300
@@ -0,0 +1,41 @@
+/*------------------------------------------------------------------------------
+* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
+*
+* Distributable under the terms of either the Apache License (Version 2.0) or
+* the GNU Lesser General Public License, as specified in the COPYING file.
+------------------------------------------------------------------------------*/
+#ifndef _lucene_analysis_snowball_filter_
+#define _lucene_analysis_snowball_filter_
+
+#include "CLucene/analysis/AnalysisHeader.h"
+#include "libstemmer.h"
+
+CL_NS_DEF2(analysis,snowball)
+
+/** A filter that stems words using a Snowball-generated stemmer.
+ *
+ * Available stemmers are listed in {@link net.sf.snowball.ext}.  The name of a
+ * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
+ * {@link EnglishStemmer} is named "English".
+ *
+ * Note: todo: This is not thread safe...
+ */
+class CLUCENE_CONTRIBS_EXPORT SnowballFilter: public TokenFilter {
+	struct sb_stemmer * stemmer;
+public:
+
+  /** Construct the named stemming filter.
+   *
+   * @param in the input tokens to stem
+   * @param name the name of a stemmer
+   */
+	SnowballFilter(TokenStream* in, const char* language, bool deleteTS);
+
+	~SnowballFilter();
+
+    /** Returns the next input Token, after being stemmed */
+    Token* next(Token* token);
+};
+
+CL_NS_END2
+#endif
--- a/src/plugins/fts-lucene/fts-backend-lucene.c	Tue Aug 09 14:13:13 2011 +0300
+++ b/src/plugins/fts-lucene/fts-backend-lucene.c	Tue Aug 09 15:05:18 2011 +0300
@@ -102,15 +102,18 @@
 {
 	struct lucene_fts_backend *backend =
 		(struct lucene_fts_backend *)_backend;
-	struct mailbox_list *list = _backend->ns->list;
+	struct fts_lucene_user *fuser =
+		FTS_LUCENE_USER_CONTEXT(_backend->ns->user);
 	const char *path;

-	path = mailbox_list_get_path(list, NULL,
+	path = mailbox_list_get_path(_backend->ns->list, NULL,
 				     MAILBOX_LIST_PATH_TYPE_INDEX);
 	i_assert(path != NULL); /* fts already checked this */

 	backend->dir_path = i_strconcat(path, "/"LUCENE_INDEX_DIR_NAME, NULL);
-	backend->index = lucene_index_init(backend->dir_path);
+	backend->index = lucene_index_init(backend->dir_path,
+					   fuser->set.textcat_dir,
+					   fuser->set.textcat_conf);
 	return 0;
 }
--- a/src/plugins/fts-lucene/fts-lucene-plugin.c	Tue Aug 09 14:13:13 2011 +0300
+++ b/src/plugins/fts-lucene/fts-lucene-plugin.c	Tue Aug 09 15:05:18 2011 +0300
@@ -1,19 +1,85 @@
 /* Copyright (c) 2006-2011 Dovecot authors, see the included COPYING file */

 #include "lib.h"
-#include "mail-storage-private.h"
+#include "mail-storage-hooks.h"
 #include "fts-lucene-plugin.h"

 const char *fts_lucene_plugin_version = DOVECOT_VERSION;

-unsigned int fts_lucene_storage_module_id;
+struct fts_lucene_user_module fts_lucene_user_module =
+	MODULE_CONTEXT_INIT(&mail_user_module_register);
+
+static int
+fts_lucene_plugin_init_settings(struct mail_user *user,
+				struct fts_lucene_settings *set,
+				const char *str)
+{
+	const char *const *tmp;
+
+	for (tmp = t_strsplit_spaces(str, " "); *tmp != NULL; tmp++) {
+		if (strncmp(*tmp, "textcat_conf=", 13) == 0) {
+			set->textcat_conf = p_strdup(user->pool, *tmp + 13);
+		} else if (strncmp(*tmp, "textcat_dir=", 12) == 0) {
+			set->textcat_dir = p_strdup(user->pool, *tmp + 12);
+		} else {
+			i_error("fts_lucene: Invalid setting: %s", *tmp);
+			return -1;
+		}
+	}
+	if (set->textcat_conf != NULL && set->textcat_dir == NULL) {
+		i_error("fts_lucene: textcat_conf set, but textcat_dir unset");
+		return -1;
+	}
+	if (set->textcat_conf == NULL && set->textcat_dir != NULL) {
+		i_error("fts_lucene: textcat_dir set, but textcat_conf unset");
+		return -1;
+	}
+#ifndef HAVE_LUCENE_TEXTCAT
+	if (set->textcat_conf != NULL) {
+		i_error("fts_lucene: textcat_dir set, "
+			"but Dovecot built without textcat support");
+		return -1;
+	}
+#endif
+	return 0;
+}
+
+static void fts_lucene_mail_user_create(struct mail_user *user, const char *env)
+{
+	struct fts_lucene_user *fuser;
+
+	fuser = p_new(user->pool, struct fts_lucene_user, 1);
+	if (fts_lucene_plugin_init_settings(user, &fuser->set, env) < 0) {
+		/* invalid settings, disabling */
+		return;
+	}
+
+	MODULE_CONTEXT_SET(user, fts_lucene_user_module, fuser);
+}
+
+static void fts_lucene_mail_user_created(struct mail_user *user)
+{
+	const char *env;
+
+	env = mail_user_plugin_getenv(user, "fts_lucene");
+	if (env != NULL)
+		fts_lucene_mail_user_create(user, env);
+}
+
+static struct mail_storage_hooks fts_lucene_mail_storage_hooks = {
+	.mail_user_created = fts_lucene_mail_user_created
+};

 void fts_lucene_plugin_init(struct module *module ATTR_UNUSED)
 {
 	fts_backend_register(&fts_backend_lucene);
+	mail_storage_hooks_add(module, &fts_lucene_mail_storage_hooks);
 }

 void fts_lucene_plugin_deinit(void)
 {
 	fts_backend_unregister(fts_backend_lucene.name);
+	mail_storage_hooks_remove(&fts_lucene_mail_storage_hooks);
 }
+
+const char *fts_lucene_plugin_dependencies[] = { "fts", NULL };
--- a/src/plugins/fts-lucene/fts-lucene-plugin.h	Tue Aug 09 14:13:13 2011 +0300
+++ b/src/plugins/fts-lucene/fts-lucene-plugin.h	Tue Aug 09 15:05:18 2011 +0300
@@ -1,9 +1,24 @@
 #ifndef FTS_LUCENE_PLUGIN_H
 #define FTS_LUCENE_PLUGIN_H

+#include "module-context.h"
+#include "mail-user.h"
 #include "fts-api-private.h"

+#define FTS_LUCENE_USER_CONTEXT(obj) \
+	MODULE_CONTEXT(obj, fts_lucene_user_module)
+
+struct fts_lucene_settings {
+	const char *textcat_conf, *textcat_dir;
+};
+
+struct fts_lucene_user {
+	union mail_user_module_context module_ctx;
+	struct fts_lucene_settings set;
+};
+
 extern struct fts_backend fts_backend_lucene;
+extern MODULE_CONTEXT_DEFINE(fts_lucene_user_module, &mail_user_module_register);

 void fts_lucene_plugin_init(struct module *module);
 void fts_lucene_plugin_deinit(void);
--- a/src/plugins/fts-lucene/lucene-wrapper.cc	Tue Aug 09 14:13:13 2011 +0300
+++ b/src/plugins/fts-lucene/lucene-wrapper.cc	Tue Aug 09 15:05:18 2011 +0300
@@ -13,16 +13,20 @@

 #include <dirent.h>
 #include <sys/stat.h>
+#include <libtextcat/textcat.h>
 };
 #include <CLucene.h>
 #include <CLucene/util/CLStreams.h>
 #include <CLucene/search/MultiPhraseQuery.h>
+#include "SnowballAnalyzer.h"

 /* Lucene's default is 10000. Use it here also.. */
 #define MAX_TERMS_PER_DOCUMENT 10000

 #define LUCENE_LOCK_OVERRIDE_SECS 60

+#define DEFAULT_LANGUAGE "english"
+
 using namespace lucene::document;
 using namespace lucene::index;
 using namespace lucene::search;
@@ -31,26 +35,49 @@
 using namespace lucene::analysis;
 using namespace lucene::util;

+struct lucene_analyzer {
+	char *lang;
+	Analyzer *analyzer;
+};
+
 struct lucene_index {
 	char *path;
+	char *textcat_dir, *textcat_conf;
 	wchar_t mailbox_guid[MAILBOX_GUID_HEX_LENGTH + 1];

 	IndexReader *reader;
 	IndexWriter *writer;
 	IndexSearcher *searcher;
-	Analyzer *analyzer;
+
+	Analyzer *default_analyzer, *cur_analyzer;
+	ARRAY_DEFINE(analyzers, struct lucene_analyzer);

 	Document *doc;
 	uint32_t prev_uid;
 };

-struct lucene_index *lucene_index_init(const char *path)
+static void *textcat = NULL;
+static bool textcat_broken = FALSE;
+static int textcat_refcount = 0;
+
+struct lucene_index *lucene_index_init(const char *path,
+				       const char *textcat_dir,
+				       const char *textcat_conf)
 {
 	struct lucene_index *index;

 	index = i_new(struct lucene_index, 1);
 	index->path = i_strdup(path);
-	index->analyzer = _CLNEW standard::StandardAnalyzer();
+	index->textcat_dir = i_strdup(textcat_dir);
+	index->textcat_conf = i_strdup(textcat_conf);
+#ifdef HAVE_LUCENE_TEXTCAT
+	index->default_analyzer = _CLNEW snowball::SnowballAnalyzer(DEFAULT_LANGUAGE);
+#else
+	index->default_analyzer = _CLNEW standard::StandardAnalyzer();
+#endif
+	i_array_init(&index->analyzers, 32);
+	textcat_refcount++;
+
 	return index;
 }

@@ -63,15 +90,29 @@

 void lucene_index_deinit(struct lucene_index *index)
 {
+	struct lucene_analyzer *a;
+
 	lucene_index_close(index);
-	_CLDELETE(index->analyzer);
+	array_foreach_modifiable(&index->analyzers, a) {
+		i_free(a->lang);
+		_CLDELETE(a->analyzer);
+	}
+	array_free(&index->analyzers);
+	if (--textcat_refcount == 0 && textcat != NULL) {
+#ifdef HAVE_LUCENE_TEXTCAT
+		textcat_Done(textcat);
+#endif
+		textcat = NULL;
+	}
+	_CLDELETE(index->default_analyzer);
+	i_free(index->textcat_dir);
+	i_free(index->textcat_conf);
 	i_free(index->path);
 	i_free(index);
 }

-static void
-lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize,
-		       wchar_t *dest, size_t destsize)
+void lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize,
+			    wchar_t *dest, size_t destsize)
 {
 	ARRAY_TYPE(unichars) dest_arr;
 	buffer_t buf = { 0, 0, { 0, 0, 0, 0, 0 } };
@@ -245,7 +286,8 @@
 	bool exists = IndexReader::indexExists(index->path);
 	try {
 		index->writer = _CLNEW IndexWriter(index->path,
-						   index->analyzer, !exists);
+						   index->default_analyzer,
+						   !exists);
 	} catch (CLuceneError &err) {
 		lucene_handle_error(index, err, "IndexWriter()");
 		return -1;
@@ -254,6 +296,64 @@
 	return 0;
 }

+static Analyzer *get_analyzer(struct lucene_index *index, const char *lang)
+{
+	const struct lucene_analyzer *a;
+	struct lucene_analyzer new_analyzer;
+	Analyzer *analyzer;
+
+	array_foreach(&index->analyzers, a) {
+		if (strcmp(a->lang, lang) == 0)
+			return a->analyzer;
+	}
+
+	memset(&new_analyzer, 0, sizeof(new_analyzer));
+	new_analyzer.lang = i_strdup(lang);
+	new_analyzer.analyzer = _CLNEW snowball::SnowballAnalyzer(lang);
+	array_append_i(&index->analyzers.arr, &new_analyzer, 1);
+	return new_analyzer.analyzer;
+}
+
+#ifdef HAVE_LUCENE_TEXTCAT
+static Analyzer *
+guess_analyzer(struct lucene_index *index, const void *data, size_t size)
+{
+	const char *lang;
+
+	if (textcat_broken)
+		return NULL;
+
+	if (textcat == NULL) {
+		textcat = index->textcat_conf == NULL ? NULL :
+			special_textcat_Init(index->textcat_conf,
+					     index->textcat_dir);
+		if (textcat == NULL) {
+			textcat_broken = TRUE;
+			return NULL;
+		}
+	}
+
+	/* try to guess the language */
+	lang = textcat_Classify(textcat, (const char *)data,
+				I_MIN(size, 500));
+	const char *p = strchr(lang, ']');
+	if (lang[0] != '[' || p == NULL)
+		return NULL;
+	lang = t_strdup_until(lang+1, p);
+	if (strcmp(lang, DEFAULT_LANGUAGE) == 0)
+		return index->default_analyzer;
+
+	return get_analyzer(index, lang);
+}
+#else
+static Analyzer *
+guess_analyzer(struct lucene_index *index ATTR_UNUSED,
+	       const void *data ATTR_UNUSED, size_t size ATTR_UNUSED)
+{
+	return NULL;
+}
+#endif
+
 static int lucene_index_build_flush(struct lucene_index *index)
 {
 	int ret = 0;
@@ -262,7 +362,10 @@
 		return 0;

 	try {
-		index->writer->addDocument(index->doc);
+		index->writer->addDocument(index->doc,
+					   index->cur_analyzer != NULL ?
+					   index->cur_analyzer :
+					   index->default_analyzer);
 	} catch (CLuceneError &err) {
 		lucene_handle_error(index, err, "IndexWriter::addDocument()");
 		ret = -1;
@@ -270,6 +373,7 @@

 	_CLDELETE(index->doc);
 	index->doc = NULL;
+	index->cur_analyzer = NULL;
 	return ret;
 }

@@ -307,6 +411,8 @@
 		if (fts_header_want_indexed(hdr_name))
 			index->doc->add(*_CLNEW Field(wname, dest, Field::STORE_NO | Field::INDEX_TOKENIZED));
 	} else if (size > 0) {
+		if (index->cur_analyzer == NULL)
+			index->cur_analyzer = guess_analyzer(index, data, size);
 		index->doc->add(*_CLNEW Field(_T("body"), dest, Field::STORE_NO | Field::INDEX_TOKENIZED));
 	}
 	return 0;
@@ -453,7 +559,7 @@

 	IndexWriter *writer = NULL;
 	try {
-		writer = _CLNEW IndexWriter(index->path, index->analyzer, false);
+		writer = _CLNEW IndexWriter(index->path, index->default_analyzer, false);
 		writer->optimize();
 	} catch (CLuceneError &err) {
 		lucene_handle_error(index, err, "IndexWriter::optimize()");
@@ -562,7 +668,12 @@
 		 const TCHAR *key, const struct mail_search_arg *arg)
 {
 	const TCHAR *wvalue = t_lucene_utf8_to_tchar(arg->value.str);
-	return getFieldQuery(index->analyzer, key, wvalue, arg->fuzzy);
+	Analyzer *analyzer = guess_analyzer(index, arg->value.str,
+					    strlen(arg->value.str));
+	if (analyzer == NULL)
+		analyzer = index->default_analyzer;
+
+	return getFieldQuery(analyzer, key, wvalue, arg->fuzzy);
 }

 static bool
--- a/src/plugins/fts-lucene/lucene-wrapper.h	Tue Aug 09 14:13:13 2011 +0300
+++ b/src/plugins/fts-lucene/lucene-wrapper.h	Tue Aug 09 15:05:18 2011 +0300
@@ -2,10 +2,13 @@
 #define LUCENE_WRAPPER_H

 #include "fts-api-private.h"
+#include "mail-types.h"

 #define MAILBOX_GUID_HEX_LENGTH (MAIL_GUID_128_SIZE*2)

-struct lucene_index *lucene_index_init(const char *path);
+struct lucene_index *lucene_index_init(const char *path,
+				       const char *textcat_dir,
+				       const char *textcat_conf);
 void lucene_index_deinit(struct lucene_index *index);

 void lucene_index_select_mailbox(struct lucene_index *index,
@@ -34,4 +37,8 @@
 			      struct mail_search_arg *args, bool and_args,
 			      struct fts_multi_result *result);

+/* internal: */
+void lucene_utf8_n_to_tchar(const unsigned char *src, size_t srcsize,
+			    wchar_t *dest, size_t destsize);
+
 #endif