changeset 18549:ae0458c63761

fts: Create tokenizers differently Create tokenizers earlier. Create separate tokenizers for search and indexing. Enable configuration of tokenizers. Add some helpers in fts-tokenizer.h api. Change tokenizer unit tests to match those changes. lib-fts: Refactor lib-fts settings a bit Turned address tokenizer settings into "boolean" values. Changed have_parent to "no_parent" and added "search" setting. Added documentation in fts-tokenizer.h. Change unit tests accordingly.
author Teemu Huovila <teemu.huovila@dovecot.fi>
date Sat, 09 May 2015 11:02:22 +0300
parents abbd71252175
children cebe8be92034
files src/lib-fts/fts-tokenizer-address.c src/lib-fts/fts-tokenizer.c src/lib-fts/fts-tokenizer.h src/lib-fts/test-fts-tokenizer.c src/plugins/fts/fts-api-private.h src/plugins/fts/fts-build-mail.c src/plugins/fts/fts-plugin.c src/plugins/fts/fts-search-args.c src/plugins/fts/fts-storage.c src/plugins/fts/fts-user.c src/plugins/fts/fts-user.h
diffstat 11 files changed, 188 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-fts/fts-tokenizer-address.c	Sat May 09 10:53:25 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-address.c	Sat May 09 11:02:22 2015 +0300
@@ -5,8 +5,8 @@
 #include "buffer.h"
 #include "fts-tokenizer-private.h"
 
-/* Return not only our tokens, but also data for parent to process.*/
-#define FTS_DEFAULT_HAVE_PARENT 1
+#define FTS_DEFAULT_NO_PARENT FALSE
+#define FTS_DEFAULT_SEARCH FALSE
 
 enum email_address_parser_state {
 	EMAIL_ADDRESS_PARSER_STATE_NONE = 0,
@@ -21,8 +21,8 @@
 	string_t *last_word;
 	string_t *parent_data; /* Copy of input data between tokens.
 	                          TODO: could be buffer_t maybe */
-	unsigned int have_parent; /* Setting for stand-alone usage.
-	                             Might be superfluous. */
+	bool no_parent;
+	bool search;
 };
 
 /*
@@ -85,18 +85,17 @@
 				   const char **error_r)
 {
 	struct email_address_fts_tokenizer *tok;
-	unsigned int have_parent = FTS_DEFAULT_HAVE_PARENT;
+	bool no_parent = FTS_DEFAULT_NO_PARENT;
+	bool search = FTS_DEFAULT_SEARCH;
 	unsigned int i;
 
 	for (i = 0; settings[i] != NULL; i += 2) {
-		const char *key = settings[i], *value = settings[i+1];
+		const char *key = settings[i];
 
-		if (strcmp(key, "have_parent") == 0) {
-			if (str_to_uint(value, &have_parent) < 0 ) {
-				*error_r = t_strdup_printf(
-					"Invalid parent setting: %s", value);
-				return -1;
-			}
+		if (strcmp(key, "no_parent") == 0) {
+			no_parent = TRUE;
+		}else if (strcmp(key, "search") == 0) {
+			search = TRUE;
 		} else {
 			*error_r = t_strdup_printf("Unknown setting: %s", key);
 			return -1;
@@ -107,7 +106,8 @@
 	tok->tokenizer = *fts_tokenizer_email_address;
 	tok->last_word = str_new(default_pool, 128);
 	tok->parent_data = str_new(default_pool, 128);
-	tok->have_parent = have_parent;
+	tok->no_parent = no_parent;
+	tok->search = search;
 	*tokenizer_r = &tok->tokenizer;
 	return 0;
 }
@@ -134,6 +134,9 @@
 fts_tokenizer_address_parent_data(struct email_address_fts_tokenizer *tok)
 {
 	const char *ret;
+	/* TODO: search option removes address from data here. */
+	if (tok->search && tok->state >= EMAIL_ADDRESS_PARSER_STATE_DOMAIN)
+		i_debug("Would remove current token");
 
 	ret = t_strdup(str_c(tok->parent_data));
 	str_truncate(tok->parent_data, 0);
@@ -250,7 +253,7 @@
 fts_tokenizer_address_update_parent(struct email_address_fts_tokenizer *tok,
                                     const unsigned char *data, size_t size)
 {
-	if (tok->have_parent > 0)
+	if (!tok->no_parent)
 		str_append_n(tok->parent_data, data, size);
 }
 static const char *
@@ -273,7 +276,7 @@
 	/* end of data, output lingering tokens. first the parents data, then
 	   possibly our token, if complete enough */
 	if (size == 0) {
-		if (tok->have_parent > 0 && str_len(tok->parent_data) > 0)
+		if (!tok->no_parent && str_len(tok->parent_data) > 0)
 		    return fts_tokenizer_address_parent_data(tok);
 
 		if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN
@@ -328,7 +331,7 @@
 			*skip_r = pos + local_skip;
 			fts_tokenizer_address_update_parent(tok, data+pos,
 			                                    local_skip);
-			if (tok->have_parent > 0)
+			if (!tok->no_parent)
 				return fts_tokenizer_address_parent_data(tok);
 			else {
 				return fts_tokenizer_address_current_token(tok);
--- a/src/lib-fts/fts-tokenizer.c	Sat May 09 10:53:25 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.c	Sat May 09 11:02:22 2015 +0300
@@ -10,6 +10,20 @@
 
 ARRAY(struct fts_tokenizer) fts_tokenizer_classes;
 
+void fts_tokenizers_init(void)
+{
+	if (!array_is_created(&fts_tokenizer_classes)) {
+		fts_tokenizer_register(fts_tokenizer_generic);
+		fts_tokenizer_register(fts_tokenizer_email_address);
+	}
+}
+
+void fts_tokenizers_deinit(void)
+{
+	if (array_is_created(&fts_tokenizer_classes))
+		array_free(&fts_tokenizer_classes);
+}
+
 /* private */
 void fts_tokenizer_register(const struct fts_tokenizer *tok_class)
 {
@@ -47,6 +61,11 @@
 	return NULL;
 }
 
+const char *fts_tokenizer_name(const struct fts_tokenizer *tok)
+{
+	return tok->name;
+}
+
 int fts_tokenizer_create(const struct fts_tokenizer *tok_class,
 			 struct fts_tokenizer *parent,
 			 const char *const *settings,
--- a/src/lib-fts/fts-tokenizer.h	Sat May 09 10:53:25 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.h	Sat May 09 11:02:22 2015 +0300
@@ -3,7 +3,9 @@
 
 /*
  Settings are given in the form of a const char * const *settings =
- {"key, "value", "key2", "value2", NULL} array of string pairs.
+ {"key, "value", "key2", "value2", NULL} array of string pairs. Some
+ keys, like "no_parent" and "search" are a sort of boolean and the
+ value does not matter, just mentioning the key enables the functionality.
  The array has to be NULL terminated.
 */
 /* Email address header tokenizer that returns "user@domain.org" input as
@@ -13,15 +15,21 @@
    allows doing an explicit "user@domain" search, which returns only mails
    matching that exact address (instead of e.g. a mail with both user@domain2
    and user2@domain words). */
-/* Settings: "have_parent", Return not only our tokens, but also data
-   for parent to process. Defaults to 1. Should normally not need to
-   be changed. */
+/* Settings:
+   "no_parent", Return only our tokens, no data for parent to process.
+   Defaults to disabled. Should normally not be needed.
+
+   "search" Remove addresses from parent data stream, so they are not processed
+   further. Defaults to disabled. Enable by defining the keyword (and any
+   value). */
 extern const struct fts_tokenizer *fts_tokenizer_email_address;
 #define FTS_TOKENIZER_EMAIL_ADDRESS_NAME "email-address"
 
 /* Generic email content tokenizer. Cuts text into tokens. */
-/* Settings: "maxlen" Maximum length of token, before an arbitary cut
-   off is made. Defaults to FTS_DEFAULT_TOKEN_MAX_LENGTH.
+/* Settings: 
+   "maxlen" Maximum length of token, before an arbitary cut off is made.
+   Defaults to FTS_DEFAULT_TOKEN_MAX_LENGTH.
+
    "algorithm", accepted values are "simple" or "tr29". Defines the
    method for looking for word boundaries. Simple is faster and will
    work for many texts, especially those using latin alphabets, but
@@ -35,9 +43,18 @@
 extern const struct fts_tokenizer *fts_tokenizer_generic;
 #define FTS_TOKENIZER_GENERIC_NAME "generic"
 
+/*
+ Tokenizing workflow, find --> create --> filter --> destroy.
+ Do init before first use and deinit after all done.
+ */
+
+/* Register all built-in tokenizers. */
+void fts_tokenizers_init(void);
+void fts_tokenizers_deinit(void);
+
 const struct fts_tokenizer *fts_tokenizer_find(const char *name);
 
-/* Create a new tokenizer. The settings is an array of key,value pairs. */
+/* Create a new tokenizer. The settings are described above. */
 int fts_tokenizer_create(const struct fts_tokenizer *tok_class,
 			 struct fts_tokenizer *parent,
 			 const char *const *settings,
@@ -57,4 +74,5 @@
 fts_tokenizer_next(struct fts_tokenizer *tok,
 		   const unsigned char *data, size_t size);
 
+const char *fts_tokenizer_name(const struct fts_tokenizer *tok);
 #endif
--- a/src/lib-fts/test-fts-tokenizer.c	Sat May 09 10:53:25 2015 +0300
+++ b/src/lib-fts/test-fts-tokenizer.c	Sat May 09 11:02:22 2015 +0300
@@ -25,7 +25,7 @@
 	const char *token, *error;
 
 	test_begin("fts tokenizer generic simple");
-	fts_tokenizer_register(fts_tokenizer_generic);
+	fts_tokenizers_init();
 	tok_class = fts_tokenizer_find(FTS_TOKENIZER_GENERIC_NAME);
 	test_assert(fts_tokenizer_create(tok_class, NULL, NULL, &tok, &error) == 0);
 	while ((token = fts_tokenizer_next(tok, input, sizeof(input)-1)) != NULL) {
@@ -38,7 +38,7 @@
 	}
 	test_assert(*eopp == NULL);
 	fts_tokenizer_unref(&tok);
-	fts_tokenizer_unregister(fts_tokenizer_generic);
+	fts_tokenizers_deinit();
 	test_end();
 }
 
@@ -267,7 +267,7 @@
 		"abc@example.com", "bar@example.org",
 		"foo@domain", "foo@domain", "bar@example.org", NULL
 	};
-	const char *const settings[] = {"have_parent", "0", NULL};
+	const char *const settings[] = {"no_parent", "foo", NULL};
 	struct fts_tokenizer *tok;
 	const char * const *eopp = expected_output;
 	const char *token, *error;
@@ -305,7 +305,7 @@
 		"abc@example.com", "bar@example.org",
 		"foo@domain", NULL
 	};
-	const char *const settings[] = {"have_parent", "0", NULL};
+	const char *const settings[] = {"no_parent", "0", NULL};
 	struct fts_tokenizer *tok;
 	const char * const *eopp = expected_output;
 	const char *token, *error;
@@ -346,7 +346,7 @@
 	struct fts_tokenizer *tok;
 	const char * const *eopp = expected_output;
 	const char *token, *error;
-	const char *const settings[] = {"have_parent", "0", NULL};
+	const char *const settings[] = {"no_parent", "abc", NULL};
 	unsigned int i, step, step_max = 10;
 
 	test_begin("fts tokenizer email address, input random length");
@@ -390,8 +390,7 @@
 	unsigned int i;
 
 	test_begin("fts tokenizer email address + parent, input one character at a time");
-	fts_tokenizer_register(fts_tokenizer_generic);
-	fts_tokenizer_register(fts_tokenizer_email_address);
+	fts_tokenizers_init();
 
 	test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0);
 	test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0);
@@ -411,8 +410,7 @@
 	test_assert(*eopp == NULL);
 	fts_tokenizer_unref(&tok);
 	fts_tokenizer_unref(&gen_tok);
-	fts_tokenizer_unregister(fts_tokenizer_generic);
-	fts_tokenizer_unregister(fts_tokenizer_email_address);
+	fts_tokenizers_deinit();
 	test_end();
 }
 
@@ -437,8 +435,7 @@
 	unsigned int i;
 
 	test_begin("fts tokenizer email address + parent, input one line at a time");
-	fts_tokenizer_register(fts_tokenizer_generic);
-	fts_tokenizer_register(fts_tokenizer_email_address);
+	fts_tokenizers_init();
 
 	test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0);
 	test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0);
@@ -457,8 +454,7 @@
 	test_assert(*eopp == NULL);
 	fts_tokenizer_unref(&tok);
 	fts_tokenizer_unref(&gen_tok);
-	fts_tokenizer_unregister(fts_tokenizer_generic);
-	fts_tokenizer_unregister(fts_tokenizer_email_address);
+	fts_tokenizers_deinit();
 	test_end();
 
 }
--- a/src/plugins/fts/fts-api-private.h	Sat May 09 10:53:25 2015 +0300
+++ b/src/plugins/fts/fts-api-private.h	Sat May 09 11:02:22 2015 +0300
@@ -76,8 +76,6 @@
 	struct fts_backend_vfuncs v;
 	struct mail_namespace *ns;
 
-	struct fts_tokenizer *tokenizer;
-
 	unsigned int updating:1;
 };
 
--- a/src/plugins/fts/fts-build-mail.c	Sat May 09 10:53:25 2015 +0300
+++ b/src/plugins/fts/fts-build-mail.c	Sat May 09 11:02:22 2015 +0300
@@ -241,9 +241,11 @@
 fts_build_add_tokens_with_filter(struct fts_mail_build_context *ctx,
 				 const unsigned char *data, size_t size)
 {
-	struct fts_tokenizer *tokenizer = ctx->update_ctx->backend->tokenizer;
+	struct fts_tokenizer *tokenizer;
 	struct fts_filter *filter = ctx->cur_user_lang->filter;
 	const char *token;
+
+	tokenizer = fts_user_get_index_tokenizer(ctx->update_ctx->backend->ns->user);
 	while ((token = fts_tokenizer_next(tokenizer, data, size)) != NULL) {
 		if (filter != NULL) {
 			token = fts_filter_filter(filter, token);
@@ -311,7 +313,7 @@
 	} else {
 		ctx->cur_user_lang = fts_user_language_find(user, lang);
 		i_assert(ctx->cur_user_lang != NULL);
-		
+
 		if (ctx->pending_input->used > 0) {
 			if (fts_build_add_tokens_with_filter(ctx,
 					ctx->pending_input->data,
--- a/src/plugins/fts/fts-plugin.c	Sat May 09 10:53:25 2015 +0300
+++ b/src/plugins/fts/fts-plugin.c	Sat May 09 11:02:22 2015 +0300
@@ -3,6 +3,7 @@
 #include "lib.h"
 #include "mail-storage-hooks.h"
 #include "fts-filter.h"
+#include "fts-tokenizer.h"
 #include "fts-parser.h"
 #include "fts-storage.h"
 #include "fts-user.h"
@@ -21,12 +22,14 @@
 void fts_plugin_init(struct module *module)
 {
 	fts_filters_init();
+	fts_tokenizers_init();
 	mail_storage_hooks_add(module, &fts_mail_storage_hooks);
 }
 
 void fts_plugin_deinit(void)
 {
 	fts_filters_deinit();
+	fts_tokenizers_deinit();
 	fts_parsers_unload();
 	mail_storage_hooks_remove(&fts_mail_storage_hooks);
 }
--- a/src/plugins/fts/fts-search-args.c	Sat May 09 10:53:25 2015 +0300
+++ b/src/plugins/fts/fts-search-args.c	Sat May 09 11:02:22 2015 +0300
@@ -95,8 +95,10 @@
 	struct mail_search_arg *and_arg, *orig_arg = *argp;
 	const char *token, *orig_token = orig_arg->value.str;
 	unsigned int orig_token_len = strlen(orig_token);
+	struct fts_tokenizer *tokenizer;
 
 	languages = fts_user_get_all_languages(backend->ns->user);
+	tokenizer = fts_user_get_search_tokenizer(backend->ns->user);
 
 	/* we want all the tokens found from the string to be found, so create
 	   a parent AND and place all the filtered token alternatives under
@@ -107,14 +109,14 @@
 	and_arg->next = orig_arg->next;
 	*argp = and_arg;
 
-	while ((token = fts_tokenizer_next(backend->tokenizer,
+	while ((token = fts_tokenizer_next(tokenizer,
 					   (const void *)orig_token,
 					   orig_token_len)) != NULL) {
 		fts_backend_dovecot_expand_lang_tokens(languages, pool, and_arg,
 						       orig_arg, orig_token,
 						       token);
 	}
-	while ((token = fts_tokenizer_next(backend->tokenizer, NULL, 0)) != NULL) {
+	while ((token = fts_tokenizer_next(tokenizer, NULL, 0)) != NULL) {
 		fts_backend_dovecot_expand_lang_tokens(languages, pool, and_arg,
 						       orig_arg, orig_token,
 						       token);
@@ -151,7 +153,6 @@
 int fts_search_args_expand(struct fts_backend *backend,
 			   struct mail_search_args *args)
 {
-
 	fts_search_args_expand_tree(backend, args->pool, &args->args);
 
 	/* we'll need to re-simplify the args if we changed anything */
--- a/src/plugins/fts/fts-storage.c	Sat May 09 10:53:25 2015 +0300
+++ b/src/plugins/fts/fts-storage.c	Sat May 09 11:02:22 2015 +0300
@@ -738,23 +738,11 @@
 {
 	struct fts_mailbox_list *flist = FTS_LIST_CONTEXT(list);
 
-	if (flist->backend->tokenizer != NULL)
-		fts_tokenizer_unref(&flist->backend->tokenizer);
 	fts_backend_deinit(&flist->backend);
 	flist->module_ctx.super.deinit(list);
 }
 
-static int fts_backend_init_libfts(struct fts_backend *backend)
-{
-	const char *error;
 
-	if (fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL,
-				 &backend->tokenizer, &error) < 0) {
-		i_error("Failed to initialize fts tokenizer: %s", error);
-		return -1;
-	}
-	return 0;
-}
 
 static void
 fts_mailbox_list_init(struct mailbox_list *list, const char *name)
@@ -773,8 +761,6 @@
 	if (fts_backend_init(name, list->ns, &error, &backend) < 0) {
 		i_error("fts: Failed to initialize backend '%s': %s",
 			name, error);
-	} else if (fts_backend_init_libfts(backend) < 0) {
-		fts_backend_deinit(&backend);
 	} else {
 		struct fts_mailbox_list *flist;
 		struct mailbox_list_vfuncs *v = list->vlast;
--- a/src/plugins/fts/fts-user.c	Sat May 09 10:53:25 2015 +0300
+++ b/src/plugins/fts/fts-user.c	Sat May 09 11:02:22 2015 +0300
@@ -5,6 +5,7 @@
 #include "mail-user.h"
 #include "fts-language.h"
 #include "fts-filter.h"
+#include "fts-tokenizer.h"
 #include "fts-user.h"
 
 #define FTS_USER_CONTEXT(obj) \
@@ -14,6 +15,7 @@
 	union mail_user_module_context module_ctx;
 
 	struct fts_language_list *lang_list;
+	struct fts_tokenizer *index_tokenizer, *search_tokenizer;
 	ARRAY_TYPE(fts_user_language) languages;
 };
 
@@ -114,6 +116,85 @@
 	return 0;
 }
 
+static int
+fts_user_create_tokenizer(struct mail_user *user,
+			  struct fts_tokenizer **tokenizer_r, bool search,
+			  const char **error_r)
+{
+	const struct fts_tokenizer *tokenizer_class;
+	struct fts_tokenizer *tokenizer = NULL, *parent = NULL;
+	const char *tokenizers_key, *const *tokenizers;
+	const char *str, *error, *set_key, *const *settings;
+	unsigned int i;
+	int ret = 0;
+
+	tokenizers_key = "fts_tokenizers";
+	str = mail_user_plugin_getenv(user, tokenizers_key);
+	if (str == NULL)
+		str = "generic email-address"; /* default tokenizers */
+
+	tokenizers = t_strsplit_spaces(str, " ");
+
+	for (i = 0; tokenizers[i] != NULL; i++) {
+		tokenizer_class = fts_tokenizer_find(tokenizers[i]);
+		if (tokenizer_class == NULL) {
+			*error_r = t_strdup_printf("%s: Unknown tokenizer '%s'",
+						   tokenizers_key, tokenizers[i]);
+			ret = -1;
+			break;
+		}
+
+		set_key = t_strdup_printf("fts_tokenizers_%s", tokenizers[i]);
+		str = mail_user_plugin_getenv(user, set_key);
+
+		/* If the email-address tokenizer is included in the search
+		   tokenizer, add a setting. */
+		if (search && strcmp(fts_tokenizer_name(tokenizer_class),
+		                     FTS_TOKENIZER_EMAIL_ADDRESS_NAME) == 0) {
+			if (str == NULL)
+				str = "search yes";
+			else
+				str = t_strconcat(str, " search yes", NULL);
+		}
+
+		settings = str == NULL ? NULL : t_strsplit_spaces(str, " ");
+
+		if (fts_tokenizer_create(tokenizer_class, parent, settings,
+				      &tokenizer, &error) < 0) {
+			*error_r = t_strdup_printf(
+				"Tokenizer '%s' init via settings '%s' failed: %s",
+				tokenizers[i], set_key, error);
+			ret = -1;
+			break;
+		}
+		if (parent != NULL)
+			fts_tokenizer_unref(&parent);
+		parent = tokenizer;
+	}
+	if (ret < 0) {
+		if (parent != NULL)
+			fts_tokenizer_unref(&parent);
+		return -1;
+	}
+	*tokenizer_r = tokenizer;
+	return 0;
+}
+
+static int fts_user_init_tokenizers(struct mail_user *user,
+				    struct fts_user *fuser,
+				    const char **error_r)
+{
+	if (fts_user_create_tokenizer(user, &fuser->index_tokenizer, FALSE,
+	                              error_r) < 0)
+		return -1;
+
+	if (fts_user_create_tokenizer(user, &fuser->search_tokenizer, TRUE,
+	                              error_r) < 0)
+		return -1;
+
+	return 0;
+}
+
 struct fts_user_language *
 fts_user_language_find(struct mail_user *user,
 		       const struct fts_language *lang)
@@ -128,6 +209,20 @@
 	return NULL;
 }
 
+struct fts_tokenizer *fts_user_get_index_tokenizer(struct mail_user *user)
+{
+	struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+	return fuser->index_tokenizer;
+}
+
+struct fts_tokenizer *fts_user_get_search_tokenizer(struct mail_user *user)
+{
+	struct fts_user *fuser = FTS_USER_CONTEXT(user);
+
+	return fuser->search_tokenizer;
+}
+
 static int fts_user_language_create(struct mail_user *user,
                                     struct fts_user *fuser,
 				    const struct fts_language *lang,
@@ -185,12 +280,16 @@
 		if ((*user_langp)->filter != NULL)
 			fts_filter_unref(&(*user_langp)->filter);
 	}
+
+	if (fuser->index_tokenizer != NULL)
+		fts_tokenizer_unref(&fuser->index_tokenizer);
+	if (fuser->search_tokenizer != NULL)
+		fts_tokenizer_unref(&fuser->search_tokenizer);
 }
 
 int fts_mail_user_init(struct mail_user *user, const char **error_r)
 {
 	struct fts_user *fuser;
-	const char *error;
 
 	fuser = p_new(user->pool, struct fts_user, 1);
 	p_array_init(&fuser->languages, user->pool, 4);
@@ -199,11 +298,12 @@
 		fts_user_free(fuser);
 		return -1;
 	}
-	if (fts_user_languages_fill_all(user, fuser, &error) < 0) {
-		i_error("fts_dovecot: Failed to initialize languages: %s", error);
+	if (fts_user_languages_fill_all(user, fuser, error_r) < 0 ||
+	    fts_user_init_tokenizers(user, fuser, error_r) < 0) {
 		fts_user_free(fuser);
 		return -1;
 	}
+
 	MODULE_CONTEXT_SET(user, fts_user_module, fuser);
 	return 0;
 }
--- a/src/plugins/fts/fts-user.h	Sat May 09 10:53:25 2015 +0300
+++ b/src/plugins/fts/fts-user.h	Sat May 09 11:02:22 2015 +0300
@@ -10,7 +10,8 @@
 struct fts_user_language *
 fts_user_language_find(struct mail_user *user,
                        const struct fts_language *lang);
-
+struct fts_tokenizer *fts_user_get_index_tokenizer(struct mail_user *user);
+struct fts_tokenizer *fts_user_get_search_tokenizer(struct mail_user *user);
 struct fts_language_list *fts_user_get_language_list(struct mail_user *user);
 const ARRAY_TYPE(fts_user_language) *
 fts_user_get_all_languages(struct mail_user *user);