Mercurial > dovecot > core-2.2
changeset 18584:75b4b312ea09
lib-fts: Added "lowercase" filter.
For now it handles only ASCII characters, but that's enough for our use.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 09 May 2015 14:26:42 +0300 |
parents | 1963690280b7 |
children | fcc20dce3c83 |
files | src/lib-fts/Makefile.am src/lib-fts/fts-filter-lowercase.c src/lib-fts/fts-filter.c src/lib-fts/fts-filter.h src/lib-fts/test-fts-filter.c |
diffstat | 5 files changed, 97 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-fts/Makefile.am Sat May 09 14:09:37 2015 +0300 +++ b/src/lib-fts/Makefile.am Sat May 09 14:26:42 2015 +0300 @@ -61,6 +61,7 @@ libfts_la_SOURCES = \ fts-filter.c \ + fts-filter-lowercase.c \ fts-filter-normalizer-icu.c \ fts-filter-stopwords.c \ fts-filter-stemmer-snowball.c \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-fts/fts-filter-lowercase.c Sat May 09 14:26:42 2015 +0300 @@ -0,0 +1,61 @@ +/* Copyright (c) 2015 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "fts-filter.h" +#include "fts-filter-private.h" +#include "fts-language.h" + +static bool +fts_filter_lowercase_supports(const struct fts_language *lang ATTR_UNUSED) +{ + return TRUE; +} + +static void +fts_filter_lowercase_destroy(struct fts_filter *filter) +{ + i_free(filter); +} + +static int +fts_filter_lowercase_create(const struct fts_language *lang ATTR_UNUSED, + const char *const *settings, + struct fts_filter **filter_r, + const char **error_r) +{ + struct fts_filter *filter; + + if (settings[0] != NULL) { + *error_r = t_strdup_printf("Unknown setting: %s", settings[0]); + return -1; + } + filter = i_new(struct fts_filter, 1); + *filter = *fts_filter_lowercase; + + *filter_r = filter; + return 0; +} + +static int +fts_filter_lowercase_filter(struct fts_filter *_filter ATTR_UNUSED, + const char **token, + const char **error_r ATTR_UNUSED) +{ + *token = t_str_lcase(*token); + return 1; +} + +static const struct fts_filter_vfuncs normalizer_filter_vfuncs = { + fts_filter_lowercase_supports, + fts_filter_lowercase_create, + fts_filter_lowercase_filter, + fts_filter_lowercase_destroy +}; + +static const struct fts_filter fts_filter_lowercase_real = { + .class_name = LOWERCASE_FILTER_NAME, + .v = &normalizer_filter_vfuncs +}; + +const struct fts_filter *fts_filter_lowercase = &fts_filter_lowercase_real;
--- a/src/lib-fts/fts-filter.c Sat May 09 14:09:37 2015 +0300 +++ b/src/lib-fts/fts-filter.c Sat May 09 14:26:42 2015 +0300 @@ -15,6 +15,7 @@ fts_filter_register(fts_filter_stopwords); fts_filter_register(fts_filter_stemmer_snowball); fts_filter_register(fts_filter_normalizer_icu); + fts_filter_register(fts_filter_lowercase); } void fts_filters_deinit(void)
--- a/src/lib-fts/fts-filter.h Sat May 09 14:09:37 2015 +0300 +++ b/src/lib-fts/fts-filter.h Sat May 09 14:26:42 2015 +0300 @@ -33,6 +33,10 @@ extern const struct fts_filter *fts_filter_normalizer_icu; #define ICU_NORMALIZER_FILTER_NAME "normalizer-icu" +/* Lowecases the input. Currently only ASCII data is lowercased. */ +extern const struct fts_filter *fts_filter_lowercase; +#define LOWERCASE_FILTER_NAME "lowercase" + /* Register all built-in filters. */ void fts_filters_init(void); void fts_filters_deinit(void);
--- a/src/lib-fts/test-fts-filter.c Sat May 09 14:09:37 2015 +0300 +++ b/src/lib-fts/test-fts-filter.c Sat May 09 14:26:42 2015 +0300 @@ -11,6 +11,35 @@ static const char *const stopword_settings[] = {"stopwords_dir", TEST_STOPWORDS_DIR, NULL}; static struct fts_language english_language = { .name = "en" }; +static void test_fts_filter_lowercase(void) +{ + struct { + const char *input; + const char *output; + } tests[] = { + { "foo", "foo" }, + { "FOO", "foo" }, + { "fOo", "foo" } + }; + const struct fts_filter *filter_class; + struct fts_filter *filter; + const char *error; + const char *token; + unsigned int i; + + test_begin("fts filter lowercase"); + filter_class = fts_filter_find(LOWERCASE_FILTER_NAME); + test_assert(fts_filter_create(filter_class, NULL, &english_language, NULL, &filter, &error) == 0); + + for (i = 0; i < N_ELEMENTS(tests); i++) { + token = tests[i].input; + test_assert_idx(fts_filter_filter(filter, &token, &error) > 0 && + strcmp(token, tests[i].output) == 0, 0); + } + fts_filter_unref(&filter); + test_end(); +} + static void test_fts_filter_stopwords_eng(void) { const struct fts_filter *filter_class; @@ -521,6 +550,7 @@ int main(void) { static void (*test_functions[])(void) = { + test_fts_filter_lowercase, test_fts_filter_stopwords_eng, test_fts_filter_stopwords_fin, test_fts_filter_stopwords_fra,