Mercurial > dovecot > core-2.2
changeset 19933:159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
author | Teemu Huovila <teemu.huovila@dovecot.fi> |
---|---|
date | Tue, 15 Mar 2016 10:47:20 +0200 |
parents | 5920e652d82c |
children | 5d5b2fd1b95e |
files | src/lib-fts/Makefile.am src/lib-fts/fts-tokenizer-common.c src/lib-fts/fts-tokenizer-common.h src/lib-fts/fts-tokenizer-generic.c |
diffstat | 4 files changed, 32 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-fts/Makefile.am Wed Mar 16 10:55:01 2016 +1100 +++ b/src/lib-fts/Makefile.am Tue Mar 15 10:47:20 2016 +0200 @@ -78,6 +78,7 @@ fts-library.c \ fts-tokenizer.c \ fts-tokenizer-address.c \ + fts-tokenizer-common.c \ fts-tokenizer-generic.c \ $(ICU_SOURCES) @@ -89,6 +90,7 @@ fts-language.h \ fts-library.h \ fts-tokenizer.h \ + fts-tokenizer-common.h \ fts-tokenizer-private.h \ fts-tokenizer-generic-private.h @@ -132,7 +134,7 @@ endif test_fts_tokenizer_SOURCES = test-fts-tokenizer.c -test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo ../lib-mail/libmail.la $(test_libs) +test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo fts-tokenizer-common.lo ../lib-mail/libmail.la $(test_libs) test_fts_tokenizer_DEPENDENCIES = ../lib-mail/libmail.la $(test_deps) check: check-am check-test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-fts/fts-tokenizer-common.c Tue Mar 15 10:47:20 2016 +0200 @@ -0,0 +1,22 @@ +#include "lib.h" +#include "unichar.h" +#include "fts-tokenizer-common.h" +void +fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, + size_t *len) +{ + size_t pos; + unsigned int char_bytes; + + /* the token is truncated - make sure the last character + exists entirely in the token */ + for (pos = *len-1; pos > 0; pos--) { + if (UTF8_IS_START_SEQ(data[pos])) + break; + } + char_bytes = uni_utf8_char_bytes(data[pos]); + if (char_bytes != *len-pos) { + i_assert(char_bytes > *len-pos); + *len = pos; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-fts/fts-tokenizer-common.h Tue Mar 15 10:47:20 2016 +0200 @@ -0,0 +1,6 @@ +#ifndef FTS_TOKENIZER_COMMON_H +#define FTS_TOKENIZER_COMMON_H +void +fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, + size_t *len); +#endif
--- a/src/lib-fts/fts-tokenizer-generic.c Wed Mar 16 10:55:01 2016 +1100 +++ b/src/lib-fts/fts-tokenizer-generic.c Tue Mar 15 10:47:20 2016 +0200 @@ -8,6 +8,7 @@ #include "fts-common.h" #include "fts-tokenizer-private.h" #include "fts-tokenizer-generic-private.h" +#include "fts-tokenizer-common.h" #include "word-boundary-data.c" #include "word-break-data.c" @@ -100,26 +101,6 @@ i_free(tok); } -static void -fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, - size_t *len) -{ - size_t pos; - unsigned int char_bytes; - - /* the token is truncated - make sure the last character - exists entirely in the token */ - for (pos = *len-1; pos > 0; pos--) { - if (UTF8_IS_START_SEQ(data[pos])) - break; - } - char_bytes = uni_utf8_char_bytes(data[pos]); - if (char_bytes != *len-pos) { - i_assert(char_bytes > *len-pos); - *len = pos; - } -} - static bool fts_tokenizer_generic_simple_current_token(struct generic_fts_tokenizer *tok, const char **token_r)