changeset 19933:159b933b617d

lib-fts: Lift helper function out of generic tokenizer.
author Teemu Huovila <teemu.huovila@dovecot.fi>
date Tue, 15 Mar 2016 10:47:20 +0200
parents 5920e652d82c
children 5d5b2fd1b95e
files src/lib-fts/Makefile.am src/lib-fts/fts-tokenizer-common.c src/lib-fts/fts-tokenizer-common.h src/lib-fts/fts-tokenizer-generic.c
diffstat 4 files changed, 32 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-fts/Makefile.am	Wed Mar 16 10:55:01 2016 +1100
+++ b/src/lib-fts/Makefile.am	Tue Mar 15 10:47:20 2016 +0200
@@ -78,6 +78,7 @@
 	fts-library.c \
 	fts-tokenizer.c \
 	fts-tokenizer-address.c \
+	fts-tokenizer-common.c \
 	fts-tokenizer-generic.c \
 	$(ICU_SOURCES)
 
@@ -89,6 +90,7 @@
 	fts-language.h \
 	fts-library.h \
 	fts-tokenizer.h \
+	fts-tokenizer-common.h \
 	fts-tokenizer-private.h \
 	fts-tokenizer-generic-private.h
 
@@ -132,7 +134,7 @@
 endif
 
 test_fts_tokenizer_SOURCES = test-fts-tokenizer.c
-test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo ../lib-mail/libmail.la $(test_libs)
+test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo fts-tokenizer-common.lo ../lib-mail/libmail.la $(test_libs)
 test_fts_tokenizer_DEPENDENCIES = ../lib-mail/libmail.la $(test_deps)
 
 check: check-am check-test
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/fts-tokenizer-common.c	Tue Mar 15 10:47:20 2016 +0200
@@ -0,0 +1,22 @@
+#include "lib.h"
+#include "unichar.h"
+#include "fts-tokenizer-common.h"
+void
+fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
+					   size_t *len)
+{
+	size_t pos;
+	unsigned int char_bytes;
+
+	/* the token is truncated - make sure the last character
+	   exists entirely in the token */
+	for (pos = *len-1; pos > 0; pos--) {
+		if (UTF8_IS_START_SEQ(data[pos]))
+			break;
+	}
+	char_bytes = uni_utf8_char_bytes(data[pos]);
+	if (char_bytes != *len-pos) {
+		i_assert(char_bytes > *len-pos);
+		*len = pos;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-fts/fts-tokenizer-common.h	Tue Mar 15 10:47:20 2016 +0200
@@ -0,0 +1,6 @@
+#ifndef FTS_TOKENIZER_COMMON_H
+#define FTS_TOKENIZER_COMMON_H
+void
+fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
+                                           size_t *len);
+#endif
--- a/src/lib-fts/fts-tokenizer-generic.c	Wed Mar 16 10:55:01 2016 +1100
+++ b/src/lib-fts/fts-tokenizer-generic.c	Tue Mar 15 10:47:20 2016 +0200
@@ -8,6 +8,7 @@
 #include "fts-common.h"
 #include "fts-tokenizer-private.h"
 #include "fts-tokenizer-generic-private.h"
+#include "fts-tokenizer-common.h"
 #include "word-boundary-data.c"
 #include "word-break-data.c"
 
@@ -100,26 +101,6 @@
 	i_free(tok);
 }
 
-static void
-fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
-					   size_t *len)
-{
-	size_t pos;
-	unsigned int char_bytes;
-
-	/* the token is truncated - make sure the last character
-	   exists entirely in the token */
-	for (pos = *len-1; pos > 0; pos--) {
-		if (UTF8_IS_START_SEQ(data[pos]))
-			break;
-	}
-	char_bytes = uni_utf8_char_bytes(data[pos]);
-	if (char_bytes != *len-pos) {
-		i_assert(char_bytes > *len-pos);
-		*len = pos;
-	}
-}
-
 static bool
 fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok,
                                            const char **token_r)