changeset 18602:7542e3be6721

lib-fts: Added fts_tokenizer_final() as a convenience wrapper.
author Timo Sirainen <tss@iki.fi>
date Sat, 09 May 2015 18:00:58 +0300
parents dd04199a689f
children e4b62ba0fb5a
files src/lib-fts/fts-tokenizer.c src/lib-fts/fts-tokenizer.h src/plugins/fts/fts-search-args.c
diffstat 3 files changed, 13 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-fts/fts-tokenizer.c	Sat May 09 17:34:59 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.c	Sat May 09 18:00:58 2015 +0300
@@ -196,3 +196,8 @@
 		i_unreached();
 	}
 }
+
+int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r)
+{
+	return fts_tokenizer_next(tok, NULL, 0, token_r);
+}
--- a/src/lib-fts/fts-tokenizer.h	Sat May 09 17:34:59 2015 +0300
+++ b/src/lib-fts/fts-tokenizer.h	Sat May 09 18:00:58 2015 +0300
@@ -65,22 +65,21 @@
 void fts_tokenizer_reset(struct fts_tokenizer *tok);
 
 /*
-   Returns 1 if token was returned, 0 if input was non-blocking and
-   more data is needed, -1 if EOF/error.
-
-   Returns the next token into *token_r, or NULL if more data is
-   needed for the next token.
+   Returns 1 if *token_r was returned, 0 if more data is needed, -1 on error.
 
    This function should be called with the same data+size until it
-   returns 0. When the input is finished, this function should be
-   still be called with size=0 to flush out the final token(s).
+   returns 0. After that fts_tokenizer_final() should be called until it
+   returns 0 to flush out the final token(s).
 
    data must contain only valid complete UTF-8 sequences, but otherwise it
-   may be broken into however small pieces. */
+   may be broken into however small pieces. (Input to this function typically
+   comes from message-decoder, which returns only complete UTF-8 sequences.) */
 
 int fts_tokenizer_next(struct fts_tokenizer *tok,
 		       const unsigned char *data, size_t size,
 		       const char **token_r);
+/* Returns same as fts_tokenizer_next(). */
+int fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r);
 
 const char *fts_tokenizer_name(const struct fts_tokenizer *tok);
 
--- a/src/plugins/fts/fts-search-args.c	Sat May 09 17:34:59 2015 +0300
+++ b/src/plugins/fts/fts-search-args.c	Sat May 09 18:00:58 2015 +0300
@@ -125,7 +125,7 @@
 							   token) < 0)
 			return -1;
 	}
-	while (fts_tokenizer_next(tokenizer, NULL, 0, &token) > 0) {
+	while (fts_tokenizer_final(tokenizer, &token) > 0) {
 		if (fts_backend_dovecot_expand_lang_tokens(languages, pool, and_arg,
 							   orig_arg, orig_token,
 							   token) < 0)