Mercurial > dovecot > core-2.2
view src/lib-fts/fts-filter-stopwords.c @ 22711:25d4771ad0fd
lib-storage: mailbox_list_index - indentation cleanup
author | Timo Sirainen <timo.sirainen@dovecot.fi> |
---|---|
date | Thu, 14 Dec 2017 02:10:27 +0200 |
parents | 2e2563132d5f |
children | cb108f786fb4 |
line wrap: on
line source
/* Copyright (c) 2014-2017 Dovecot authors, see the included COPYING file */ #include "lib.h" #include "array.h" #include "istream.h" #include "strfuncs.h" #include "hash.h" #include "unichar.h" #include "fts-language.h" #include "fts-filter-private.h" #define STOPWORDS_FILE_FORMAT "%s/stopwords_%s.txt" #define STOPWORDS_CUTCHARS "|#\t " #define STOPWORDS_DISALLOWED_CHARS "/\\<>.,\":()\t\n\r" struct fts_filter_stopwords { struct fts_filter filter; struct fts_language *lang; pool_t pool; HASH_TABLE(const char *, const char *) stopwords; const char *stopwords_dir; }; static int fts_filter_stopwords_read_list(struct fts_filter_stopwords *filter, const char **error_r) { struct istream *input; const char *line, *word, *path; int ret = 0; size_t len; path = t_strdup_printf(STOPWORDS_FILE_FORMAT, filter->stopwords_dir, filter->lang->name); input = i_stream_create_file(path, IO_BLOCK_SIZE); while ((line = i_stream_read_next_line(input)) != NULL) { len = strcspn(line, STOPWORDS_CUTCHARS); if (len == 0) continue; if (strcspn(line, STOPWORDS_DISALLOWED_CHARS) < len) continue; word = p_strndup(filter->pool, line, len); hash_table_insert(filter->stopwords, word, word); } if (input->stream_errno != 0) { *error_r = t_strdup_printf("Failed to read stopword list %s: %s", path, i_stream_get_error(input)); ret = -1; } if (ret == 0 && hash_table_count(filter->stopwords) == 0) i_warning("Stopwords list \"%s\" seems empty. Is the file correctly formatted?", path); i_stream_destroy(&input); return ret; } static void fts_filter_stopwords_destroy(struct fts_filter *filter) { struct fts_filter_stopwords *sp = (struct fts_filter_stopwords *)filter; if (hash_table_is_created(sp->stopwords)) hash_table_destroy(&sp->stopwords); pool_unref(&sp->pool); } static int fts_filter_stopwords_create(const struct fts_language *lang, const char *const *settings, struct fts_filter **filter_r, const char **error_r) { struct fts_filter_stopwords *sp; pool_t pp; const char *dir = NULL; unsigned int i; for (i = 0; settings[i] != NULL; i += 2) { const char *key = settings[i], *value = settings[i+1]; if (strcmp(key, "stopwords_dir") == 0) { dir = value; } else { *error_r = t_strdup_printf("Unknown setting: %s", key); return -1; } } pp = pool_alloconly_create(MEMPOOL_GROWING"fts_filter_stopwords", sizeof(struct fts_filter)); sp = p_new(pp, struct fts_filter_stopwords, 1); sp->filter = *fts_filter_stopwords; sp->pool = pp; sp->lang = p_malloc(sp->pool, sizeof(struct fts_language)); sp->lang->name = p_strdup(sp->pool, lang->name); if (dir != NULL) sp->stopwords_dir = p_strdup(pp, dir); else sp->stopwords_dir = DATADIR"/stopwords"; *filter_r = &sp->filter; return 0; } static int fts_filter_stopwords_filter(struct fts_filter *filter, const char **token, const char **error_r) { struct fts_filter_stopwords *sp = (struct fts_filter_stopwords *) filter; if (!hash_table_is_created(sp->stopwords)) { hash_table_create(&sp->stopwords, sp->pool, 0, str_hash, strcmp); if (fts_filter_stopwords_read_list(sp, error_r) < 0) return -1; } return hash_table_lookup(sp->stopwords, *token) == NULL ? 1 : 0; } const struct fts_filter fts_filter_stopwords_real = { .class_name = "stopwords", .v = { fts_filter_stopwords_create, fts_filter_stopwords_filter, fts_filter_stopwords_destroy } }; const struct fts_filter *fts_filter_stopwords = &fts_filter_stopwords_real;