Mercurial > dovecot > core-2.2
changeset 19919:126a81a431d2
fts: Added "doveadm fts tokenize" command.
author | Timo Sirainen <timo.sirainen@dovecot.fi> |
---|---|
date | Thu, 03 Mar 2016 16:10:51 +0200 |
parents | e7fb4e09d051 |
children | c1b70b76f9bb |
files | src/plugins/fts/doveadm-fts.c |
diffstat | 1 files changed, 157 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts/doveadm-fts.c Thu Mar 03 11:53:05 2016 +0200 +++ b/src/plugins/fts/doveadm-fts.c Thu Mar 03 16:10:51 2016 +0200 @@ -6,14 +6,25 @@ #include "mail-namespace.h" #include "mail-search.h" #include "mailbox-list-iter.h" +#include "fts-tokenizer.h" +#include "fts-filter.h" +#include "fts-language.h" #include "fts-storage.h" #include "fts-search-args.h" +#include "fts-user.h" +#include "doveadm-print.h" #include "doveadm-mail.h" #include "doveadm-mailbox-list-iter.h" #include "doveadm-fts.h" const char *doveadm_fts_plugin_version = DOVECOT_ABI_VERSION; +struct fts_tokenize_cmd_context { + struct doveadm_mail_cmd_context ctx; + const char *language; + const char *tokens; +}; + static int cmd_search_box(struct doveadm_mail_cmd_context *ctx, const struct mailbox_info *info) @@ -156,6 +167,142 @@ } static int +cmd_fts_tokenize_run(struct doveadm_mail_cmd_context *_ctx, + struct mail_user *user) +{ + struct fts_tokenize_cmd_context *ctx = + (struct fts_tokenize_cmd_context *)_ctx; + struct mail_namespace *ns = mail_namespace_find_inbox(user->namespaces); + struct fts_backend *backend; + struct fts_user_language *user_lang; + const struct fts_language *lang = NULL; + int ret, ret2; + bool final = FALSE; + + backend = fts_list_backend(ns->list); + if (backend == NULL) { + i_error("fts not enabled for INBOX"); + _ctx->exit_code = EX_CONFIG; + return -1; + } + + if (ctx->language == NULL) { + struct fts_language_list *lang_list = + fts_user_get_language_list(user); + enum fts_language_result result; + + result = fts_language_detect(lang_list, + (const unsigned char *)ctx->tokens, strlen(ctx->tokens), + &lang); + if (lang == NULL) + lang = fts_language_list_get_first(lang_list); + switch (result) { + case FTS_LANGUAGE_RESULT_SHORT: + i_warning("Text too short, can't detect its language - assuming %s", lang->name); + break; + case FTS_LANGUAGE_RESULT_UNKNOWN: + i_warning("Can't detect its language - assuming %s", lang->name); + break; + case FTS_LANGUAGE_RESULT_OK: + break; + case FTS_LANGUAGE_RESULT_ERROR: + i_error("Language detection library initialization failed"); + _ctx->exit_code = EX_CONFIG; + return -1; + default: + i_unreached(); + } + } else { + lang = fts_language_find(ctx->language); + if (lang == NULL) { + i_error("Unknown language: %s", ctx->language); + _ctx->exit_code = EX_USAGE; + return -1; + } + } + user_lang = fts_user_language_find(user, lang); + if (user_lang == NULL) { + i_error("Language not enabled for user: %s", ctx->language); + _ctx->exit_code = EX_USAGE; + return -1; + } + + fts_tokenizer_reset(user_lang->index_tokenizer); + for (;;) { + const char *token, *error; + + if (!final) { + ret = fts_tokenizer_next(user_lang->index_tokenizer, + (const unsigned char *)ctx->tokens, strlen(ctx->tokens), + &token, &error); + } else { + ret = fts_tokenizer_final(user_lang->index_tokenizer, + &token, &error); + } + if (ret < 0) + break; + if (ret > 0 && user_lang->filter != NULL) { + ret2 = fts_filter_filter(user_lang->filter, &token, &error); + if (ret2 > 0) + doveadm_print(token); + else if (ret2 < 0) + i_error("Couldn't create indexable tokens: %s", error); + } + if (ret == 0) { + if (final) + break; + final = TRUE; + } + } + return 0; +} + +static void +cmd_fts_tokenize_init(struct doveadm_mail_cmd_context *_ctx, + const char *const args[]) +{ + struct fts_tokenize_cmd_context *ctx = + (struct fts_tokenize_cmd_context *)_ctx; + + if (args[0] == NULL) + doveadm_mail_help_name("fts tokenize"); + + ctx->tokens = p_strdup(_ctx->pool, t_strarray_join(args, " ")); + + doveadm_print_init(DOVEADM_PRINT_TYPE_FLOW); + doveadm_print_header("token", "token", DOVEADM_PRINT_HEADER_FLAG_HIDE_TITLE); +} + +static bool +cmd_fts_tokenize_parse_arg(struct doveadm_mail_cmd_context *_ctx, int c) +{ + struct fts_tokenize_cmd_context *ctx = + (struct fts_tokenize_cmd_context *)_ctx; + + switch (c) { + case 'l': + ctx->language = p_strdup(_ctx->pool, optarg); + break; + default: + return FALSE; + } + return TRUE; +} + +static struct doveadm_mail_cmd_context * +cmd_fts_tokenize_alloc(void) +{ + struct fts_tokenize_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct fts_tokenize_cmd_context); + ctx->ctx.v.run = cmd_fts_tokenize_run; + ctx->ctx.v.init = cmd_fts_tokenize_init; + ctx->ctx.v.parse_arg = cmd_fts_tokenize_parse_arg; + ctx->ctx.getopt_args = "l"; + return &ctx->ctx; +} + +static int fts_namespace_find(struct mail_user *user, const char *ns_prefix, struct mail_namespace **ns_r) { @@ -279,6 +426,16 @@ DOVEADM_CMD_PARAMS_END }, { + .name = "fts tokenize", + .mail_cmd = cmd_fts_tokenize_alloc, + .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "<text>", +DOVEADM_CMD_PARAMS_START +DOVEADM_CMD_MAIL_COMMON +DOVEADM_CMD_PARAM('l', "language", CMD_PARAM_STR, 0) +DOVEADM_CMD_PARAM('\0', "text", CMD_PARAM_ARRAY, CMD_PARAM_FLAG_POSITIONAL) +DOVEADM_CMD_PARAMS_END +}, +{ .name = "fts optimize", .mail_cmd = cmd_fts_optimize_alloc, .usage = DOVEADM_CMD_MAIL_USAGE_PREFIX "[<namespace>]",