changeset 18551:7fe766887394

fts: Change tokenizer API to be able to return errors Modify fts_tokenizer_next() to return integer status codes. It returns 1 if a token was returned in *token_r, 0 if more input is needed and -1 on error.
author Teemu Huovila <teemu.huovila@dovecot.fi>
date Sat, 09 May 2015 11:05:04 +0300
parents cebe8be92034
children 95a827d97e5b
files src/lib-fts/fts-tokenizer-address.c src/lib-fts/fts-tokenizer-generic.c src/lib-fts/fts-tokenizer-private.h src/lib-fts/fts-tokenizer.c src/lib-fts/fts-tokenizer.h src/lib-fts/test-fts-tokenizer.c src/plugins/fts/fts-build-mail.c src/plugins/fts/fts-search-args.c
diffstat 8 files changed, 144 insertions(+), 124 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-fts/fts-tokenizer-address.c	Sat May 09 11:03:21 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-address.c	Sat May 09 11:05:04 2015 +0300
@@ -122,25 +122,27 @@
 	i_free(tok);
 }
 
-static const char *
-fts_tokenizer_address_current_token(struct email_address_fts_tokenizer *tok)
+static int
+fts_tokenizer_address_current_token(struct email_address_fts_tokenizer *tok,
+                                    const char **token_r)
 {
 	tok->tokenizer.skip_parents = TRUE;
 	tok->state = EMAIL_ADDRESS_PARSER_STATE_NONE;
-	return t_strdup(str_c(tok->last_word));
+	*token_r = t_strdup(str_c(tok->last_word));
+	return 1;
 }
 
-static const char *
-fts_tokenizer_address_parent_data(struct email_address_fts_tokenizer *tok)
+static int
+fts_tokenizer_address_parent_data(struct email_address_fts_tokenizer *tok,
+                                  const char **token_r)
 {
-	const char *ret;
 	/* TODO: search option removes address from data here. */
 	if (tok->search && tok->state >= EMAIL_ADDRESS_PARSER_STATE_DOMAIN)
 		i_debug("Would remove current token");
 
-	ret = t_strdup(str_c(tok->parent_data));
+	*token_r = t_strdup(str_c(tok->parent_data));
 	str_truncate(tok->parent_data, 0);
-	return ret;
+	return 1;
 }
 
 /* Used to rewind past characters that can not be the start of a new localpart.
@@ -256,10 +258,10 @@
 	if (!tok->no_parent)
 		str_append_n(tok->parent_data, data, size);
 }
-static const char *
+static int
 fts_tokenizer_email_address_next(struct fts_tokenizer *_tok,
-				 const unsigned char *data, size_t size,
-				 size_t *skip_r)
+                                 const unsigned char *data, size_t size,
+                                 size_t *skip_r, const char **token_r)
 {
 	struct email_address_fts_tokenizer *tok =
 		(struct email_address_fts_tokenizer *)_tok;
@@ -270,18 +272,18 @@
 
 	if (tok->state == EMAIL_ADDRESS_PARSER_STATE_COMPLETE) {
 		*skip_r = pos;
-		return fts_tokenizer_address_current_token(tok);
+		return fts_tokenizer_address_current_token(tok, token_r);
 	}
 
 	/* end of data, output lingering tokens. first the parents data, then
 	   possibly our token, if complete enough */
 	if (size == 0) {
 		if (!tok->no_parent && str_len(tok->parent_data) > 0)
-		    return fts_tokenizer_address_parent_data(tok);
+			return fts_tokenizer_address_parent_data(tok, token_r);
 
 		if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN
 		    && chars_after_at(tok) > 0)
-			return fts_tokenizer_address_current_token(tok);
+			return fts_tokenizer_address_current_token(tok, token_r);
 	}
 
 	/* 1) regular input data OR
@@ -332,9 +334,9 @@
 			fts_tokenizer_address_update_parent(tok, data+pos,
 			                                    local_skip);
 			if (!tok->no_parent)
-				return fts_tokenizer_address_parent_data(tok);
+				return fts_tokenizer_address_parent_data(tok, token_r);
 			else {
-				return fts_tokenizer_address_current_token(tok);
+				return fts_tokenizer_address_current_token(tok, token_r);
 			}
 		default:
 			i_unreached();
@@ -342,7 +344,7 @@
 
 	}
 	*skip_r = pos;
-	return NULL;
+	return 0;
 }
 
 static const struct fts_tokenizer_vfuncs email_address_tokenizer_vfuncs = {
--- a/src/lib-fts/fts-tokenizer-generic.c	Sat May 09 11:03:21 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c	Sat May 09 11:05:04 2015 +0300
@@ -82,14 +82,13 @@
 	i_free(tok);
 }
 
-static const char *
-fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok)
+static int
+fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok,
+                                           const char **token_r)
 {
-	const char *ret;
-
-	ret = t_strndup(tok->token->data, tok->token->used);
+	*token_r = t_strndup(tok->token->data, tok->token->used);
 	buffer_set_used_size(tok->token, 0);
-	return ret;
+	return 1;
 }
 
 /* TODO: This is duplicated from unichar.c */
@@ -135,10 +134,10 @@
 	return is_word_break(c);
 }
 
-static const char *
+static int
 fts_tokenizer_generic_next_simple(struct fts_tokenizer *_tok,
-			   const unsigned char *data, size_t size,
-			   size_t *skip_r)
+                                  const unsigned char *data, size_t size,
+                                  size_t *skip_r, const char **token_r)
 {
 	struct generic_fts_tokenizer *tok =
 		(struct generic_fts_tokenizer *)_tok;
@@ -157,7 +156,7 @@
 			}
 			/* word boundary found - return a new token */
 			*skip_r = i + 1;
-			return fts_tokenizer_generic_simple_current_token(tok);
+			return fts_tokenizer_generic_simple_current_token(tok, token_r);
 		}
 	}
 	/* word boundary not found yet */
@@ -168,9 +167,9 @@
 
 	if (size == 0 && tok->token->used > 0) {
 		/* return the last token */
-		return fts_tokenizer_generic_simple_current_token(tok);
+		return fts_tokenizer_generic_simple_current_token(tok, token_r);
 	}
-	return NULL;
+	return 0;
 }
 
 /* TODO: Arrange array searches roughly in order of likelyhood of a match.
@@ -464,20 +463,20 @@
 
 	return FALSE;
 }
-static const char *
-fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok)
+static int
+fts_tokenizer_generic_tr29_current_token(struct generic_fts_tokenizer *tok,
+                                         const char **token_r)
 {
-	const char *ret;
 	size_t end_skip = 0;
 
 	if (is_one_past_end(tok))
 		end_skip = tok->last_size;
 
-	ret = t_strndup(tok->token->data, tok->token->used - end_skip);
+	*token_r = t_strndup(tok->token->data, tok->token->used - end_skip);
 	buffer_set_used_size(tok->token, 0);
 	tok->prev_prev_letter = LETTER_TYPE_NONE;
 	tok->prev_letter = LETTER_TYPE_NONE;
-	return ret;
+	return 1;
 }
 /*
   Find word boundaries in input text. Based on Unicode standard annex
@@ -516,10 +515,10 @@
 	return FALSE;
 }
 
-static const char *
+static int
 fts_tokenizer_generic_next_tr29(struct fts_tokenizer *_tok,
 			   const unsigned char *data, size_t size,
-			   size_t *skip_r)
+                                size_t *skip_r, const char **token_r)
 {
 	struct generic_fts_tokenizer *tok =
 		(struct generic_fts_tokenizer *)_tok;
@@ -547,7 +546,7 @@
 			buffer_append(tok->token, data + start_skip,
 			              len - start_skip);
 			*skip_r = i + 1;
-			return fts_tokenizer_generic_tr29_current_token(tok);
+			return fts_tokenizer_generic_tr29_current_token(tok, token_r);
 		}
 	}
 	len =  I_MIN(i, tok->max_length);
@@ -558,16 +557,17 @@
 	if (size == 0 && tok->token->used > 0) {
 		/* return the last token */
 		*skip_r = 0;
-		return fts_tokenizer_generic_tr29_current_token(tok);
+		return fts_tokenizer_generic_tr29_current_token(tok, token_r);
 	}
-	return NULL;
+	return 0;
 }
 
-static const char *
+static int
 fts_tokenizer_generic_next(struct fts_tokenizer *_tok ATTR_UNUSED,
 			   const unsigned char *data ATTR_UNUSED,
                            size_t size ATTR_UNUSED,
-			   size_t *skip_r ATTR_UNUSED)
+                           size_t *skip_r ATTR_UNUSED,
+                           const char **token_r ATTR_UNUSED)
 {
 	i_unreached();
 }
--- a/src/lib-fts/fts-tokenizer-private.h	Sat May 09 11:03:21 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-private.h	Sat May 09 11:05:04 2015 +0300
@@ -10,9 +10,8 @@
 		      struct fts_tokenizer **tokenizer_r, const char **error_r);
 	void (*destroy)(struct fts_tokenizer *tok);
 
-	const char *(*next)(struct fts_tokenizer *tok,
-			    const unsigned char *data, size_t size,
-			    size_t *skip_r);
+	int (*next)(struct fts_tokenizer *tok, const unsigned char *data,
+	            size_t size, size_t *skip_r, const char **token_r);
 };
 
 enum fts_tokenizer_parent_state {
--- a/src/lib-fts/fts-tokenizer.c	Sat May 09 11:03:21 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.c	Sat May 09 11:05:04 2015 +0300
@@ -120,11 +120,12 @@
 	tok->v->destroy(tok);
 }
 
-static const char *
+static int
 fts_tokenizer_next_self(struct fts_tokenizer *tok,
-			const unsigned char *data, size_t size)
+                        const unsigned char *data, size_t size,
+                        const char **token_r)
 {
-	const char *token;
+	int ret = 0;
 	size_t skip = 0;
 
 	i_assert(tok->prev_reply_finished ||
@@ -132,60 +133,60 @@
 
 	if (tok->prev_reply_finished) {
 		/* whole new data */
-		token = tok->v->next(tok, data, size, &skip);
+		ret = tok->v->next(tok, data, size, &skip, token_r);
 	} else {
 		/* continuing previous data */
 		i_assert(tok->prev_skip <= size);
-		token = tok->v->next(tok, data + tok->prev_skip,
-				     size - tok->prev_skip, &skip);
+		ret = tok->v->next(tok, data + tok->prev_skip,
+		                   size - tok->prev_skip, &skip, token_r);
 	}
 
-	if (token != NULL) {
+	if (ret > 0) {
 		i_assert(skip <= size - tok->prev_skip);
 		tok->prev_data = data;
 		tok->prev_size = size;
 		tok->prev_skip = tok->prev_skip + skip;
 		tok->prev_reply_finished = FALSE;
-	} else {
+	} else if (ret == 0) {
 		/* we need a new data block */
 		tok->prev_data = NULL;
 		tok->prev_size = 0;
 		tok->prev_skip = 0;
 		tok->prev_reply_finished = TRUE;
 	}
-	return token;
+	return ret;
 }
 
-const char *
+int
 fts_tokenizer_next(struct fts_tokenizer *tok,
-		   const unsigned char *data, size_t size)
+                   const unsigned char *data, size_t size, const char **token_r)
 {
-	const char *token;
+	int ret;
 
 	switch (tok->parent_state) {
 	case FTS_TOKENIZER_PARENT_STATE_ADD_DATA:
-		token = fts_tokenizer_next_self(tok, data, size);
-		if (token == NULL || tok->parent == NULL || tok->skip_parents)
-			return token;
+		ret = fts_tokenizer_next_self(tok, data, size, token_r);
+		if (ret <= 0 || tok->parent == NULL || tok->skip_parents)
+			return ret;
 		buffer_set_used_size(tok->parent_input, 0);
-		buffer_append(tok->parent_input, token, strlen(token));
+		buffer_append(tok->parent_input, *token_r, strlen(*token_r));
 		tok->parent_state++;
 		/* fall through */
 	case FTS_TOKENIZER_PARENT_STATE_NEXT_OUTPUT:
-		token = fts_tokenizer_next(tok->parent, tok->parent_input->data,
-					   tok->parent_input->used);
-		if (token != NULL)
-			return token;
+		ret = fts_tokenizer_next(tok->parent, tok->parent_input->data,
+		                         tok->parent_input->used, token_r);
+		if (ret != 0)
+			return ret;
 		tok->parent_state++;
 		/* fall through */
 	case FTS_TOKENIZER_PARENT_STATE_FINALIZE:
-		token = fts_tokenizer_next(tok->parent, NULL, 0);
-		if (token != NULL)
-			return token;
+		ret = fts_tokenizer_next(tok->parent, NULL, 0, token_r);
+		if (ret != 0)
+			return ret;
 		/* we're finished sending this token to parent tokenizer.
 		   see if our own tokenizer has more tokens available */
 		tok->parent_state = FTS_TOKENIZER_PARENT_STATE_ADD_DATA;
-		return fts_tokenizer_next(tok, data, size);
+		return fts_tokenizer_next(tok, data, size, token_r);
 	default:
 		i_unreached();
 	}
--- a/src/lib-fts/fts-tokenizer.h	Sat May 09 11:03:21 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.h	Sat May 09 11:05:04 2015 +0300
@@ -63,16 +63,23 @@
 void fts_tokenizer_ref(struct fts_tokenizer *tok);
 void fts_tokenizer_unref(struct fts_tokenizer **tok);
 
-/* Returns the next token, or NULL if more data is needed for the next token.
-   This function should be called with the same data+size until it returns
-   NULL. When the input is finished, this function should be still be called
-   with size=0 to flush out the final token(s).
+/*
+   Returns 1 if token was returned, 0 if input was non-blocking and
+   more data is needed, -1 if EOF/error.
+
+   Returns the next token into *token_r, or NULL if more data is
+   needed for the next token.
+
+   This function should be called with the same data+size until it
+   returns 0. When the input is finished, this function should be
+   still be called with size=0 to flush out the final token(s).
 
    data must contain only valid complete UTF-8 sequences, but otherwise it
    may be broken into however small pieces. */
-const char *
+
+int
 fts_tokenizer_next(struct fts_tokenizer *tok,
-		   const unsigned char *data, size_t size);
+                   const unsigned char *data, size_t size, const char **token_r);
 
 const char *fts_tokenizer_name(const struct fts_tokenizer *tok);
 #endif
--- a/src/lib-fts/test-fts-tokenizer.c	Sat May 09 11:03:21 2015 +0300
+++ b/src/lib-fts/test-fts-tokenizer.c	Sat May 09 11:05:04 2015 +0300
@@ -28,11 +28,11 @@
 	fts_tokenizers_init();
 	tok_class = fts_tokenizer_find(FTS_TOKENIZER_GENERIC_NAME);
 	test_assert(fts_tokenizer_create(tok_class, NULL, NULL, &tok, &error) == 0);
-	while ((token = fts_tokenizer_next(tok, input, sizeof(input)-1)) != NULL) {
+	while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) {
 		test_assert(strcmp(token, *eopp) == 0);
 		eopp++;
 	}
-	while ((token = fts_tokenizer_next(tok, NULL, 0)) != NULL) {
+	while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) {
 		test_assert(strcmp(token, *eopp) == 0);
 		eopp++;
 	}
@@ -63,11 +63,11 @@
 	fts_tokenizer_register(fts_tokenizer_generic);
 	tok_class = fts_tokenizer_find(FTS_TOKENIZER_GENERIC_NAME);
 	test_assert(fts_tokenizer_create(tok_class, NULL, NULL, &tok, &error) == 0);
-	while ((token = fts_tokenizer_next(tok, input, sizeof(input)-1)) != NULL) {
+	while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) {
 		test_assert(strcmp(token, *eopp) == 0);
 		eopp++;
 	}
-	while ((token = fts_tokenizer_next(tok, NULL, 0)) != NULL) {
+	while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) {
 		test_assert(strcmp(token, *eopp) == 0);
 		eopp++;
 	}
@@ -91,6 +91,7 @@
 	const char * const *eopp = expected_output;
 	const char *token, *error;
 	unsigned int i;
+	int ret;
 
 	test_begin("fts tokenizer generic simple input one character at a time");
 	fts_tokenizer_register(fts_tokenizer_generic);
@@ -98,10 +99,10 @@
 	test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &tok, &error) == 0);
 
 	for (i = 0; i <= sizeof(input)-1; ) {
-		token = i < sizeof(input)-1 ?
-			fts_tokenizer_next(tok, &input[i], 1) :
-			fts_tokenizer_next(tok, NULL, 0);
-		if (token == NULL) {
+		ret = i < sizeof(input)-1 ?
+			fts_tokenizer_next(tok, &input[i], 1, &token) :
+			fts_tokenizer_next(tok, NULL, 0, &token);
+		if (ret == 0) {
 			i++;
 			continue;
 		}
@@ -136,11 +137,11 @@
 	fts_tokenizer_register(fts_tokenizer_generic);
 	tok_class = fts_tokenizer_find(FTS_TOKENIZER_GENERIC_NAME);
 	test_assert(fts_tokenizer_create(tok_class, NULL, tr29_settings, &tok, &error) == 0);
-	while ((token = fts_tokenizer_next(tok, input, sizeof(input)-1)) != NULL) {
+	while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) {
 		test_assert(strcmp(token, *eopp) == 0);
 		eopp++;
 	}
-	while ((token = fts_tokenizer_next(tok, NULL, 0)) != NULL) {
+	while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) {
 		test_assert(strcmp(token, *eopp) == 0);
 		eopp++;
 	}
@@ -173,11 +174,11 @@
 	fts_tokenizer_register(fts_tokenizer_generic);
 	tok_class = fts_tokenizer_find(FTS_TOKENIZER_GENERIC_NAME);
 	test_assert(fts_tokenizer_create(tok_class, NULL, tr29_settings, &tok, &error) == 0);
-	while ((token = fts_tokenizer_next(tok, input, sizeof(input)-1)) != NULL) {
+	while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) {
 		test_assert(strcmp(token, *eopp) == 0);
 		eopp++;
 	}
-	while ((token = fts_tokenizer_next(tok, NULL, 0)) != NULL) {
+	while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) {
 		test_assert(strcmp(token, *eopp) == 0);
 		eopp++;
 	}
@@ -204,11 +205,11 @@
 	fts_tokenizer_register(fts_tokenizer_generic);
 	tok_class = fts_tokenizer_find(FTS_TOKENIZER_GENERIC_NAME);
 	test_assert(fts_tokenizer_create(tok_class, NULL, tr29_settings, &tok, &error) == 0);
-	while ((token = fts_tokenizer_next(tok, input, sizeof(input)-1)) != NULL) {
+	while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) {
 		test_assert(null_strcmp(token, *eopp) == 0);
 		eopp++;
 	}
-	while ((token = fts_tokenizer_next(tok, NULL, 0)) != NULL) {
+	while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) {
 		test_assert(null_strcmp(token, *eopp) == 0);
 		eopp++;
 	}
@@ -232,6 +233,7 @@
 	const char * const *eopp = expected_output;
 	const char *token, *error;
 	unsigned int i;
+	int ret;
 
 	test_begin("fts tokenizer generic TR29 input one character at a time");
 	fts_tokenizer_register(fts_tokenizer_generic);
@@ -239,10 +241,10 @@
 	test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0);
 
 	for (i = 0; i <= sizeof(input)-1; ) {
-		token = i < sizeof(input)-1 ?
-			fts_tokenizer_next(tok, &input[i], 1) :
-			fts_tokenizer_next(tok, NULL, 0);
-		if (token == NULL) {
+		ret = i < sizeof(input)-1 ?
+			fts_tokenizer_next(tok, &input[i], 1, &token) :
+			fts_tokenizer_next(tok, NULL, 0, &token);
+		if (ret == 0) {
 			i++;
 			continue;
 		}
@@ -272,6 +274,7 @@
 	const char * const *eopp = expected_output;
 	const char *token, *error;
 	unsigned int i;
+	int ret;
 
 	test_begin("fts tokenizer email address only, input one line at a time");
 	fts_tokenizer_register(fts_tokenizer_email_address);
@@ -279,10 +282,11 @@
 	test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, settings, &tok, &error) == 0);
 
 	for (i = 0; i <= N_ELEMENTS(input);) {
-		token = i < N_ELEMENTS(input) ?
-			fts_tokenizer_next(tok, (unsigned char *)input[i], strlen(input[i])) :
-			fts_tokenizer_next(tok, NULL, 0);
-		if (token == NULL) {
+		ret = i < N_ELEMENTS(input) ?
+			fts_tokenizer_next(tok, (unsigned char *)input[i],
+			                   strlen(input[i]), &token) :
+			fts_tokenizer_next(tok, NULL, 0, &token);
+		if (ret == 0) {
 			i++;
 			continue;
 		}
@@ -310,16 +314,17 @@
 	const char * const *eopp = expected_output;
 	const char *token, *error;
 	unsigned int i;
+	int ret;
 
 	test_begin("fts tokenizer email address only, input one character at a time");
 	fts_tokenizer_register(fts_tokenizer_email_address);
 	test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, settings, &tok, &error) == 0);
 
 	for (i = 0; i <= sizeof(input)-1; ) {
-		token = i < sizeof(input)-1 ?
-			fts_tokenizer_next(tok, &input[i], 1) :
-			fts_tokenizer_next(tok, NULL, 0);
-		if (token == NULL) {
+		ret = i < sizeof(input)-1 ?
+			fts_tokenizer_next(tok, &input[i], 1, &token) :
+			fts_tokenizer_next(tok, NULL, 0, &token);
+		if (ret == 0) {
 			i++;
 			continue;
 		}
@@ -348,6 +353,7 @@
 	const char *token, *error;
 	const char *const settings[] = {"no_parent", "abc", NULL};
 	unsigned int i, step, step_max = 10;
+	int ret;
 
 	test_begin("fts tokenizer email address, input random length");
 	fts_tokenizer_register(fts_tokenizer_email_address);
@@ -355,10 +361,10 @@
 	                                 settings, &tok, &error) == 0);
 	step = rand() % step_max + 1;
 	for (i = 0; i <= sizeof(input)-1; ) {
-		token = i < sizeof(input)-1 ?
-			fts_tokenizer_next(tok, &input[i], step) :
-			fts_tokenizer_next(tok, NULL, 0);
-		if (token == NULL) {
+		ret = i < sizeof(input)-1 ?
+			fts_tokenizer_next(tok, &input[i], step, &token) :
+			fts_tokenizer_next(tok, NULL, 0, &token);
+		if (ret == 0) {
 			i += step;
 			step = rand() % step_max + 1;
 			step = I_MIN(step, sizeof(input) - i);
@@ -388,6 +394,7 @@
 	const char * const *eopp = expected_output;
 	const char *token, *error;
 	unsigned int i;
+	int ret;
 
 	test_begin("fts tokenizer email address + parent, input one character at a time");
 	fts_tokenizers_init();
@@ -396,10 +403,10 @@
 	test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0);
 
 	for (i = 0; i <= sizeof(input)-1; ) {
-		token = i < sizeof(input)-1 ?
-			fts_tokenizer_next(tok, &input[i], 1) :
-			fts_tokenizer_next(tok, NULL, 0);
-		if (token == NULL) {
+		ret = i < sizeof(input)-1 ?
+			fts_tokenizer_next(tok, &input[i], 1, &token) :
+			fts_tokenizer_next(tok, NULL, 0, &token);
+		if (ret == 0) {
 			i++;
 			continue;
 		}
@@ -433,6 +440,7 @@
 	const char * const *eopp = expected_output;
 	const char *token, *error;
 	unsigned int i;
+	int ret;
 
 	test_begin("fts tokenizer email address + parent, input one line at a time");
 	fts_tokenizers_init();
@@ -441,10 +449,11 @@
 	test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0);
 
 	for (i = 0; i <= N_ELEMENTS(input);) {
-		token = i < N_ELEMENTS(input) ?
-			fts_tokenizer_next(tok, (unsigned char *)input[i], strlen(input[i])) :
-			fts_tokenizer_next(tok, NULL, 0);
-		if (token == NULL) {
+		ret = i < N_ELEMENTS(input) ?
+			fts_tokenizer_next(tok, (unsigned char *)input[i],
+			                   strlen(input[i]), &token) :
+			fts_tokenizer_next(tok, NULL, 0, &token);
+		if (ret == 0) {
 			i++;
 			continue;
 		}
@@ -474,6 +483,7 @@
 	const char * const *eopp = expected_output;
 	const char *token, *error;
 	unsigned int i, step, step_max = 10;
+	int ret;
 
 	test_begin("fts tokenizer email address + parent, input random length");
 	fts_tokenizer_register(fts_tokenizer_generic);
@@ -485,10 +495,10 @@
 	//srand(1424142100); /* had a bug */
 	step = rand() % step_max + 1;
 	for (i = 0; i <= sizeof(input)-1; ) {
-		token = i < sizeof(input)-1 ?
-			fts_tokenizer_next(tok, &input[i], step) :
-			fts_tokenizer_next(tok, NULL, 0);
-		if (token == NULL) {
+		ret = i < sizeof(input)-1 ?
+		      fts_tokenizer_next(tok, &input[i], step, &token) :
+		      fts_tokenizer_next(tok, NULL, 0, &token);
+		if (ret == 0) {
 			i += step;
 			step = rand() % step_max + 1;
 			step = I_MIN(step, sizeof(input) - i);
--- a/src/plugins/fts/fts-build-mail.c	Sat May 09 11:03:21 2015 +0300
+++ b/src/plugins/fts/fts-build-mail.c	Sat May 09 11:05:04 2015 +0300
@@ -244,9 +244,10 @@
 	struct fts_tokenizer *tokenizer;
 	struct fts_filter *filter = ctx->cur_user_lang->filter;
 	const char *token;
+	int ret;
 
 	tokenizer = fts_user_get_index_tokenizer(ctx->update_ctx->backend->ns->user);
-	while ((token = fts_tokenizer_next(tokenizer, data, size)) != NULL) {
+	while ((ret = fts_tokenizer_next(tokenizer, data, size, &token)) > 0) {
 		if (filter != NULL) {
 			token = fts_filter_filter(filter, token);
 			if (token == NULL)
@@ -257,7 +258,7 @@
 						  strlen(token)) < 0)
 			return -1;
 	}
-	return 0;
+	return ret;
 }
 
 static int
--- a/src/plugins/fts/fts-search-args.c	Sat May 09 11:03:21 2015 +0300
+++ b/src/plugins/fts/fts-search-args.c	Sat May 09 11:05:04 2015 +0300
@@ -109,17 +109,17 @@
 	and_arg->next = orig_arg->next;
 	*argp = and_arg;
 
-	while ((token = fts_tokenizer_next(tokenizer,
-					   (const void *)orig_token,
-					   orig_token_len)) != NULL) {
+	while (fts_tokenizer_next(tokenizer,
+	                          (const void *)orig_token,
+	                          orig_token_len, &token) > 0) {
 		fts_backend_dovecot_expand_lang_tokens(languages, pool, and_arg,
 						       orig_arg, orig_token,
 						       token);
 	}
-	while ((token = fts_tokenizer_next(tokenizer, NULL, 0)) != NULL) {
+	while (fts_tokenizer_next(tokenizer, NULL, 0, &token) > 0) {
 		fts_backend_dovecot_expand_lang_tokens(languages, pool, and_arg,
-						       orig_arg, orig_token,
-						       token);
+		                                       orig_arg, orig_token,
+		                                       token);
 	}
 }