Mercurial > dovecot > original-hg > dovecot-2.1
changeset 4946:460d589403d0 HEAD
Make indexing non-blocking. Optimize also header searches.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Thu, 21 Dec 2006 15:16:32 +0200 |
parents | cc9832968e8e |
children | 8be1ce6cfeec |
files | src/plugins/fts/fts-storage.c |
diffstat | 1 files changed, 272 insertions(+), 106 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts/fts-storage.c Thu Dec 21 13:29:25 2006 +0200 +++ b/src/plugins/fts/fts-storage.c Thu Dec 21 15:16:32 2006 +0200 @@ -17,6 +17,8 @@ *((void **)array_idx_modifiable(&(obj)->module_contexts, \ fts_storage_module_id)) +#define FTS_SEARCH_NONBLOCK_COUNT 10 + struct fts_mailbox { struct mailbox_vfuncs super; struct fts_backend *backend_exact; @@ -30,10 +32,25 @@ ARRAY_TYPE(seq_range) result; unsigned int result_pos; + struct mail_search_arg *args, *best_arg; struct fts_backend *backend; + struct fts_storage_build_context *build_ctx; + unsigned int locked:1; }; +struct fts_storage_build_context { + struct mail_search_context *search_ctx; + struct mail_search_seqset seqset; + struct mail_search_arg search_arg; + struct mail *mail; + struct fts_backend_build_context *build; + + uint32_t uid; + string_t *headers; + bool save_part; +}; + struct fts_transaction_context { bool expunges; }; @@ -83,13 +100,6 @@ return 0; } -struct fts_storage_build_context { - struct fts_backend_build_context *build; - uint32_t uid; - string_t *headers; - bool save_part; -}; - static int fts_build_mail_flush(struct fts_storage_build_context *ctx) { if (str_len(ctx->headers) == 0) @@ -140,8 +150,7 @@ return fts_build_mail_flush(ctx); } -static int -fts_build_mail(struct fts_storage_build_context *ctx, struct mail *mail) +static int fts_build_mail(struct fts_storage_build_context *ctx) { struct istream *input; struct message_parser_ctx *parser; @@ -150,9 +159,9 @@ struct message_part *prev_part, *skip_part; int ret; - ctx->uid = mail->uid; + ctx->uid = ctx->mail->uid; - input = mail_get_stream(mail, NULL, NULL); + input = mail_get_stream(ctx->mail, NULL, NULL); if (input == NULL) return -1; @@ -196,7 +205,7 @@ break; } } else { - if (fts_backend_build_more(ctx->build, mail->uid, + if (fts_backend_build_more(ctx->build, ctx->mail->uid, block.data, block.size) < 0) { ret = -1; @@ -209,16 +218,14 @@ return ret; } -static int fts_build_new(struct fts_backend *backend, - struct mailbox_transaction_context *t) +static int fts_build_init(struct fts_search_context *fctx, + struct mailbox_transaction_context *t) { - struct fts_storage_build_context ctx; - struct mail_search_context *search_ctx; + struct fts_backend *backend = fctx->backend; + struct fts_storage_build_context *ctx; + struct fts_backend_build_context *build; struct mail_search_seqset seqset; - struct mail_search_arg search_arg; - struct mail *mail; uint32_t last_uid, last_uid_locked; - int ret = 0; if (fts_backend_get_last_uid(backend, &last_uid) < 0) return -1; @@ -232,8 +239,7 @@ return 0; } - memset(&ctx, 0, sizeof(ctx)); - ctx.build = fts_backend_build_init(backend, &last_uid_locked); + build = fts_backend_build_init(backend, &last_uid_locked); if (last_uid != last_uid_locked) { /* changed, need to get again the sequences */ i_assert(last_uid < last_uid_locked); @@ -241,47 +247,219 @@ last_uid = last_uid_locked; if (mailbox_get_uids(t->box, last_uid+1, (uint32_t)-1, &seqset.seq1, &seqset.seq2) < 0) { - (void)fts_backend_build_deinit(ctx.build); + (void)fts_backend_build_deinit(build); return -1; } if (seqset.seq1 == 0) { /* no new messages */ - (void)fts_backend_build_deinit(ctx.build); + (void)fts_backend_build_deinit(build); return 0; } } - memset(&search_arg, 0, sizeof(search_arg)); - search_arg.type = SEARCH_SEQSET; - search_arg.value.seqset = &seqset; + + ctx = i_new(struct fts_storage_build_context, 1); + ctx->build = build; + ctx->seqset = seqset; + ctx->search_arg.type = SEARCH_SEQSET; + ctx->search_arg.value.seqset = &ctx->seqset; + + ctx->headers = str_new(default_pool, 512); + ctx->mail = mail_alloc(t, 0, NULL); + ctx->search_ctx = mailbox_search_init(t, NULL, &ctx->search_arg, NULL); + + fctx->build_ctx = ctx; + return 0; +} + +static int fts_build_deinit(struct fts_storage_build_context *ctx) +{ + int ret = 0; - ctx.headers = str_new(default_pool, 512); - mail = mail_alloc(t, 0, NULL); - search_ctx = mailbox_search_init(t, NULL, &search_arg, NULL); - while (mailbox_search_next(search_ctx, mail) > 0) { + if (mailbox_search_deinit(&ctx->search_ctx) < 0) + ret = -1; + mail_free(&ctx->mail); + + if (fts_backend_build_deinit(ctx->build) < 0) + ret = -1; + str_free(&ctx->headers); + i_free(ctx); + return ret; +} + +static int fts_build_more(struct fts_storage_build_context *ctx) +{ + unsigned int count = 0; + int ret; + + while (mailbox_search_next(ctx->search_ctx, ctx->mail) > 0) { t_push(); - ret = fts_build_mail(&ctx, mail); + ret = fts_build_mail(ctx); t_pop(); if (ret < 0) - break; + return -1; + + if (++count == FTS_SEARCH_NONBLOCK_COUNT) + return 0; } - if (mailbox_search_deinit(&search_ctx) < 0) - ret = -1; - mail_free(&mail); + return 1; +} + +static void fts_search_filter_args(struct fts_search_context *fctx, + struct mail_search_arg *args, + ARRAY_TYPE(seq_range) *uid_result) +{ + const char *key; + + for (; args != NULL; args = args->next) { + switch (args->type) { + case SEARCH_BODY_FAST: + case SEARCH_TEXT_FAST: + if ((fctx->backend->flags & + FTS_BACKEND_FLAG_EXACT_LOOKUPS) == 0) + break; + /* fall through */ + case SEARCH_BODY: + case SEARCH_TEXT: + case SEARCH_HEADER: + if (args == fctx->best_arg) { + /* already handled this one */ + break; + } - if (fts_backend_build_deinit(ctx.build) < 0) - ret = -1; - str_free(&ctx.headers); - return ret; + key = args->value.str; + if (*key == '\0') { + i_assert(args->type == SEARCH_HEADER); + + /* we're only checking the existence + of the header. */ + key = args->hdr_field_name; + } + + if (fts_backend_filter(fctx->backend, key, + uid_result) < 0) { + /* failed, but we already have limited + the search, so just ignore this */ + break; + } + if (args->type != SEARCH_HEADER && + (fctx->backend->flags & + FTS_BACKEND_FLAG_DEFINITE_LOOKUPS) != 0) { + args->match_always = TRUE; + args->result = 1; + } + break; + case SEARCH_OR: + case SEARCH_SUB: + fts_search_filter_args(fctx, args->value.subargs, + uid_result); + break; + default: + break; + } + } } -#define SEARCH_ARG_IS_OPTIMIZABLE(backend, arg) \ - ((arg)->type == SEARCH_BODY_FAST || (arg)->type == SEARCH_TEXT_FAST || \ - ((backend) != NULL && \ - ((backend)->flags & FTS_BACKEND_FLAG_EXACT_LOOKUPS) != 0 && \ - ((arg)->type == SEARCH_BODY || (arg)->type == SEARCH_TEXT))) +static void fts_search_init(struct mailbox *box, + struct fts_search_context *fctx) +{ + struct fts_backend *backend = fctx->backend; + const char *key; + ARRAY_TYPE(seq_range) uid_result; + + if (fts_backend_lock(backend) <= 0) + return; + fctx->locked = TRUE; + + key = fctx->best_arg->value.str; + if (*key == '\0') { + i_assert(fctx->best_arg->type == SEARCH_HEADER); + + /* we're only checking the existence + of the header. */ + key = fctx->best_arg->hdr_field_name; + } + + i_array_init(&uid_result, 64); + if (fts_backend_lookup(backend, key, &uid_result) < 0) { + /* failed, fallback to reading everything */ + array_free(&uid_result); + return; + } + + if ((backend->flags & FTS_BACKEND_FLAG_DEFINITE_LOOKUPS) != 0) { + fctx->best_arg->match_always = TRUE; + fctx->best_arg->result = 1; + } + + fts_search_filter_args(fctx, fctx->args, &uid_result); + + (void)uid_range_to_seq(box, &uid_result, &fctx->result); + array_free(&uid_result); +} + +static bool arg_is_better(const struct mail_search_arg *new_arg, + const struct mail_search_arg *old_arg) +{ + if (old_arg == NULL) + return TRUE; + /* prefer not to use headers. they have a larger possibility of + having lots of identical strings */ + if (old_arg->type == SEARCH_HEADER) + return TRUE; + else if (new_arg->type == SEARCH_HEADER) + return FALSE; + + return strlen(new_arg->value.str) > strlen(old_arg->value.str); +} + +static void fts_search_args_check(struct mail_search_arg *args, + bool *have_fast_r, bool *have_exact_r, + struct mail_search_arg **best_fast_arg, + struct mail_search_arg **best_exact_arg) +{ + for (; args != NULL; args = args->next) { + switch (args->type) { + case SEARCH_BODY_FAST: + case SEARCH_TEXT_FAST: + if (*args->value.str == '\0') { + /* this matches everything */ + args->match_always = TRUE; + args->result = 1; + break; + } + if (arg_is_better(args, *best_fast_arg)) { + *best_fast_arg = args; + *have_fast_r = TRUE; + } + break; + case SEARCH_BODY: + case SEARCH_TEXT: + if (*args->value.str == '\0') { + /* this matches everything */ + args->match_always = TRUE; + args->result = 1; + break; + } + case SEARCH_HEADER: + if (arg_is_better(args, *best_exact_arg)) { + *best_exact_arg = args; + *have_exact_r = TRUE; + } + break; + case SEARCH_OR: + case SEARCH_SUB: + fts_search_args_check(args->value.subargs, + have_fast_r, have_exact_r, + best_fast_arg, best_exact_arg); + break; + default: + break; + } + } +} static struct mail_search_context * fts_mailbox_search_init(struct mailbox_transaction_context *t, @@ -291,86 +469,68 @@ struct fts_mailbox *fbox = FTS_CONTEXT(t->box); struct mail_search_context *ctx; struct fts_search_context *fctx; - struct fts_backend *backend = NULL; - struct mail_search_arg *tmp; - ARRAY_TYPE(seq_range) uid_result; + struct mail_search_arg *best_fast_arg, *best_exact_arg; + bool have_fast, have_exact; ctx = fbox->super.search_init(t, charset, args, sort_program); fctx = i_new(struct fts_search_context, 1); + fctx->args = args; array_idx_set(&ctx->module_contexts, fts_storage_module_id, &fctx); if (fbox->backend_exact == NULL && fbox->backend_fast == NULL) return ctx; - /* FIXME: handle AND/OR. Maybe also header lookups? */ - - /* first check if we can use backend_fast */ - for (tmp = args; tmp != NULL; tmp = tmp->next) { - if (args->type == SEARCH_BODY_FAST || - args->type == SEARCH_TEXT_FAST) - break; + have_fast = have_exact = FALSE; + best_fast_arg = best_exact_arg = NULL; + fts_search_args_check(args, &have_fast, &have_exact, + &best_fast_arg, &best_exact_arg); + if (have_fast && fbox->backend_fast != NULL) { + /* use fast backend whenever possible */ + fctx->backend = fbox->backend_fast; + fctx->best_arg = best_fast_arg; + } else if (have_exact || have_fast) { + fctx->backend = fbox->backend_exact; + fctx->best_arg = arg_is_better(best_exact_arg, best_fast_arg) ? + best_exact_arg : best_fast_arg; } - if (tmp != NULL) { - /* yes */ - backend = fbox->backend_fast; - } else if (fbox->backend_exact != NULL) { - /* nope. what about backend_exact? */ - for (tmp = args; tmp != NULL; tmp = tmp->next) { - if (args->type == SEARCH_BODY || - args->type == SEARCH_TEXT) { - backend = fbox->backend_exact; - break; - } + + if (fctx->backend != NULL) { + if (fts_build_init(fctx, t) < 0) + fctx->backend = NULL; + else if (fctx->build_ctx == NULL) { + /* the index was up to date */ + fts_search_init(t->box, fctx); } } - args = tmp; + + return ctx; +} - if (backend == NULL) - return ctx; - - /* update the backend */ - if (fts_build_new(backend, t) < 0) - return ctx; +static int fts_mailbox_search_next_nonblock(struct mail_search_context *ctx, + struct mail *mail, bool *tryagain_r) +{ + struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box); + struct fts_search_context *fctx = FTS_CONTEXT(ctx); + int ret; - if (fts_backend_lock(backend) <= 0) - return ctx; - fctx->backend = backend; - fctx->locked = TRUE; + if (fctx->build_ctx != NULL) { + /* still building the index */ + ret = fts_build_more(fctx->build_ctx); + if (ret == 0) { + *tryagain_r = TRUE; + return 0; + } - i_array_init(&uid_result, 64); - if (fts_backend_lookup(backend, args->value.str, &uid_result) < 0) { - /* failed, fallback to reading everything */ - array_free(&uid_result); - } + /* finished / error */ + fts_build_deinit(fctx->build_ctx); + fctx->build_ctx = NULL; - if ((backend->flags & FTS_BACKEND_FLAG_DEFINITE_LOOKUPS) != 0) { - args->match_always = TRUE; - args->result = 1; + if (ret == 0) + fts_search_init(ctx->transaction->box, fctx); } - args = args->next; - while (args != NULL) { - if (SEARCH_ARG_IS_OPTIMIZABLE(backend, args)) { - if ((backend->flags & - FTS_BACKEND_FLAG_DEFINITE_LOOKUPS) != 0) { - args->match_always = TRUE; - args->result = 1; - } - if (fts_backend_filter(backend, args->value.str, - &uid_result) < 0) { - /* failed, but we already have limited - the search, so just ignore this */ - break; - } - } - args = args->next; - } + return fbox->super.search_next_nonblock(ctx, mail, tryagain_r); - if (array_is_created(&uid_result)) { - (void)uid_range_to_seq(t->box, &uid_result, &fctx->result); - array_free(&uid_result); - } - return ctx; } static int fts_mailbox_search_next_update_seq(struct mail_search_context *ctx) @@ -418,6 +578,11 @@ struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box); struct fts_search_context *fctx = FTS_CONTEXT(ctx); + if (fctx->build_ctx != NULL) { + /* the search was cancelled */ + fts_build_deinit(fctx->build_ctx); + } + if (fctx->locked) fts_backend_unlock(fctx->backend); @@ -574,6 +739,7 @@ fbox->super = box->v; box->v.close = fts_mailbox_close; box->v.search_init = fts_mailbox_search_init; + box->v.search_next_nonblock = fts_mailbox_search_next_nonblock; box->v.search_next_update_seq = fts_mailbox_search_next_update_seq; box->v.search_deinit = fts_mailbox_search_deinit; box->v.mail_alloc = fts_mail_alloc;