Mercurial > dovecot > core-2.2
changeset 12877:51ef5ffea07f
lib-storage: Added mail prefetching support for searching.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Mon, 04 Apr 2011 13:07:19 +0300 |
parents | b93d45b71332 |
children | 194855240d31 |
files | src/lib-storage/index/index-mail.h src/lib-storage/index/index-search.c src/lib-storage/mail-search.c src/lib-storage/mail-search.h |
diffstat | 4 files changed, 293 insertions(+), 154 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-storage/index/index-mail.h Mon Apr 04 12:57:49 2011 +0300 +++ b/src/lib-storage/index/index-mail.h Mon Apr 04 13:07:19 2011 +0300 @@ -88,6 +88,8 @@ enum mail_fetch_field cache_fetch_fields, dont_cache_fetch_fields; unsigned int dont_cache_field_idx; + buffer_t *search_results; + struct istream *stream, *filter_stream; struct tee_istream *tee_stream; struct message_size hdr_size, body_size;
--- a/src/lib-storage/index/index-search.c Mon Apr 04 12:57:49 2011 +0300 +++ b/src/lib-storage/index/index-search.c Mon Apr 04 13:07:19 2011 +0300 @@ -53,13 +53,15 @@ struct message_part *part; }; -static const enum message_header_parser_flags hdr_parser_flags = - MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE; - static void search_parse_msgset_args(unsigned int messages_count, struct mail_search_arg *args, uint32_t *seq1_r, uint32_t *seq2_r); +static void search_none(struct mail_search_arg *arg ATTR_UNUSED, + struct search_body_context *ctx ATTR_UNUSED) +{ +} + static void search_init_arg(struct mail_search_arg *arg, struct index_search_context *ctx) { @@ -590,93 +592,104 @@ } static int search_arg_match_text(struct mail_search_arg *args, - struct index_search_context *ctx, int ret) + struct index_search_context *ctx) { - struct istream *input; + const enum message_header_parser_flags hdr_parser_flags = + MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE; + struct index_mail *imail = (struct index_mail *)ctx->cur_mail; + struct istream *input = NULL; struct mailbox_header_lookup_ctx *headers_ctx; - struct mail_search_arg *arg; + struct search_header_context hdr_ctx; + struct search_body_context body_ctx; const char *const *headers; - bool have_headers, have_body; + bool have_headers, have_body, failed = FALSE; + int ret; /* first check what we need to use */ headers = mail_search_args_analyze(args, &have_headers, &have_body); if (!have_headers && !have_body) - return ret; + return -1; + + memset(&hdr_ctx, 0, sizeof(hdr_ctx)); + /* hdr_ctx.imail is different from imail for mails in + virtual mailboxes */ + hdr_ctx.imail = (struct index_mail *)mail_get_real_mail(ctx->cur_mail); + hdr_ctx.custom_header = TRUE; + hdr_ctx.args = args; + + headers_ctx = headers == NULL ? NULL : + mailbox_header_lookup_init(ctx->box, headers); + if (headers != NULL && + (!have_body || + ctx->cur_mail->lookup_abort == MAIL_LOOKUP_ABORT_NEVER)) { + /* try to look up the specified headers from cache */ + i_assert(*headers != NULL); + + if (mail_get_header_stream(ctx->cur_mail, headers_ctx, + &input) < 0) + failed = TRUE; + else { + message_parse_header(input, NULL, hdr_parser_flags, + search_header, &hdr_ctx); + } + input = NULL; + } else if (have_headers) { + /* we need to read the entire header */ + if (mail_get_stream(ctx->cur_mail, NULL, NULL, &input) < 0) + failed = TRUE; + else { + hdr_ctx.parse_headers = + index_mail_want_parse_headers(hdr_ctx.imail); + if (hdr_ctx.parse_headers) { + index_mail_parse_header_init(hdr_ctx.imail, + headers_ctx); + } + message_parse_header(input, NULL, hdr_parser_flags, + search_header, &hdr_ctx); + } + } + if (headers_ctx != NULL) + mailbox_header_lookup_unref(&headers_ctx); + + if (failed) { + /* opening mail failed. maybe because of lookup_abort. + update access_parts for prefetching */ + if (have_body) + imail->data.access_part |= READ_HDR | READ_BODY; + else + imail->data.access_part |= READ_HDR; + return -1; + } if (have_headers) { - struct search_header_context hdr_ctx; + /* see if the header search succeeded in finishing the search */ + ret = mail_search_args_foreach(args, search_none, NULL); + if (ret >= 0 || !have_body) + return ret; + } - if (have_body && - ctx->cur_mail->lookup_abort == MAIL_LOOKUP_ABORT_NEVER) { - /* just open the mail bypassing any caching, since - we're going to read through the body anyway */ - headers = NULL; - } + i_assert(have_body); - if (headers == NULL) { - headers_ctx = NULL; - if (mail_get_stream(ctx->cur_mail, NULL, NULL, &input) < 0) - return -1; - } else { - /* FIXME: do this once in init */ - i_assert(*headers != NULL); - headers_ctx = - mailbox_header_lookup_init(ctx->box, headers); - if (mail_get_header_stream(ctx->cur_mail, headers_ctx, - &input) < 0) { - mailbox_header_lookup_unref(&headers_ctx); - return -1; - } - } + if (ctx->cur_mail->lookup_abort != MAIL_LOOKUP_ABORT_NEVER) { + imail->data.access_part |= READ_HDR | READ_BODY; + return -1; + } - memset(&hdr_ctx, 0, sizeof(hdr_ctx)); - hdr_ctx.imail = (struct index_mail *)mail_get_real_mail(ctx->cur_mail); - hdr_ctx.custom_header = TRUE; - hdr_ctx.args = args; - hdr_ctx.parse_headers = headers == NULL && - index_mail_want_parse_headers(hdr_ctx.imail); - - if (hdr_ctx.parse_headers) - index_mail_parse_header_init(hdr_ctx.imail, headers_ctx); - message_parse_header(input, NULL, hdr_parser_flags, - search_header, &hdr_ctx); - if (headers_ctx != NULL) - mailbox_header_lookup_unref(&headers_ctx); - } else { + if (input == NULL) { + /* we didn't search headers. */ struct message_size hdr_size; if (mail_get_stream(ctx->cur_mail, &hdr_size, NULL, &input) < 0) return -1; - i_stream_seek(input, hdr_size.physical_size); } - if (have_body) { - struct search_body_context body_ctx; - - if (ctx->cur_mail->lookup_abort != MAIL_LOOKUP_ABORT_NEVER) - return -1; - - memset(&body_ctx, 0, sizeof(body_ctx)); - body_ctx.index_ctx = ctx; - body_ctx.input = input; - (void)mail_get_parts(ctx->cur_mail, &body_ctx.part); + memset(&body_ctx, 0, sizeof(body_ctx)); + body_ctx.index_ctx = ctx; + body_ctx.input = input; + (void)mail_get_parts(ctx->cur_mail, &body_ctx.part); - ret = mail_search_args_foreach(args, search_body, &body_ctx); - } else { - /* see if we have a decision */ - ret = 1; - arg = ctx->mail_ctx.args->args; - for (; arg != NULL; arg = arg->next) { - if (arg->result == 0) { - ret = 0; - break; - } - if (arg->result < 0) - ret = -1; - } - } - return ret; + return mail_search_args_foreach(args, search_body, &body_ctx); } static bool search_msgset_fix_limits(unsigned int messages_count, @@ -1164,71 +1177,32 @@ return ret; } -static bool search_match_next(struct index_search_context *ctx) +static unsigned long long search_mail_get_cost(struct mail_private *mail) { - static enum mail_lookup_abort cache_lookups[] = { - MAIL_LOOKUP_ABORT_NOT_IN_CACHE, - MAIL_LOOKUP_ABORT_READ_MAIL, - MAIL_LOOKUP_ABORT_NEVER - }; - unsigned int i; - int ret = -1; - - if (ctx->have_mailbox_args) { - /* check that the mailbox name matches. - this makes sense only with virtual mailboxes. */ - ret = mail_search_args_foreach(ctx->mail_ctx.args->args, - search_mailbox_arg, ctx); - if (ret >= 0) - return ret > 0; - } - - /* avoid doing extra work for as long as possible */ - for (i = 0; i < N_ELEMENTS(cache_lookups) && ret < 0; i++) { - ctx->cur_mail->lookup_abort = cache_lookups[i]; - ret = mail_search_args_foreach(ctx->mail_ctx.args->args, - search_cached_arg, ctx); - if (ret >= 0) - break; - - ret = search_arg_match_text(ctx->mail_ctx.args->args, ctx, ret); - if (ret >= 0) - break; - } - ctx->cur_mail->lookup_abort = MAIL_LOOKUP_ABORT_NEVER; - return ret > 0; + return mail->stats_open_lookup_count * SEARCH_COST_DENTRY + + mail->stats_stat_lookup_count * SEARCH_COST_DENTRY + + mail->stats_fstat_lookup_count * SEARCH_COST_ATTR + + mail->stats_cache_hit_count * SEARCH_COST_CACHE + + mail->stats_files_read_count * SEARCH_COST_FILES_READ + + (mail->stats_files_read_bytes/1024) * SEARCH_COST_KBYTE; } -static void index_storage_search_notify(struct mailbox *box, - struct index_search_context *ctx) +static int search_match_once(struct index_search_context *ctx) { - float percentage; - unsigned int msecs, secs; + struct mail_private *mail_private = + (struct mail_private *)ctx->cur_mail; + unsigned long long cost1, cost2; + int ret; - if (ctx->last_notify.tv_sec == 0) { - /* set the search time in here, in case a plugin - already spent some time indexing the mailbox */ - ctx->search_start_time = ioloop_timeval; - } else if (box->storage->callbacks.notify_ok != NULL && - !ctx->mail_ctx.progress_hidden) { - percentage = ctx->mail_ctx.progress_cur * 100.0 / - ctx->mail_ctx.progress_max; - msecs = timeval_diff_msecs(&ioloop_timeval, - &ctx->search_start_time); - secs = (msecs / (percentage / 100.0) - msecs) / 1000; + cost1 = search_mail_get_cost(mail_private); + ret = mail_search_args_foreach(ctx->mail_ctx.args->args, + search_cached_arg, ctx); + if (ret < 0) + ret = search_arg_match_text(ctx->mail_ctx.args->args, ctx); - T_BEGIN { - const char *text; - - text = t_strdup_printf("Searched %d%% of the mailbox, " - "ETA %d:%02d", (int)percentage, - secs/60, secs%60); - box->storage->callbacks. - notify_ok(box, text, - box->storage->callback_context); - } T_END; - } - ctx->last_notify = ioloop_timeval; + cost2 = search_mail_get_cost(mail_private); + ctx->cost += cost2 - cost1; + return ret; } static bool search_arg_is_static(struct mail_search_arg *arg) @@ -1293,14 +1267,82 @@ return FALSE; } -static unsigned long long search_mail_get_cost(struct mail_private *mail) +static void search_match_finish(struct index_search_context *ctx, int match) +{ + if (ctx->cur_mail->expunged) + ctx->mail_ctx.seen_lost_data = TRUE; + + if (match == 0 && + search_has_static_nonmatches(ctx->mail_ctx.args->args)) { + /* if there are saved search results remember + that this message never matches */ + mailbox_search_results_never(&ctx->mail_ctx, + ctx->cur_mail->uid); + } +} + +static int search_match_next(struct index_search_context *ctx) { - return mail->stats_open_lookup_count * SEARCH_COST_DENTRY + - mail->stats_stat_lookup_count * SEARCH_COST_DENTRY + - mail->stats_fstat_lookup_count * SEARCH_COST_ATTR + - mail->stats_cache_hit_count * SEARCH_COST_CACHE + - mail->stats_files_read_count * SEARCH_COST_FILES_READ + - (mail->stats_files_read_bytes/1024) * SEARCH_COST_KBYTE; + static enum mail_lookup_abort cache_lookups[] = { + MAIL_LOOKUP_ABORT_NOT_IN_CACHE, + MAIL_LOOKUP_ABORT_READ_MAIL, + MAIL_LOOKUP_ABORT_NEVER + }; + unsigned int i, n = N_ELEMENTS(cache_lookups); + int ret = -1; + + if (ctx->have_mailbox_args) { + /* check that the mailbox name matches. + this makes sense only with virtual mailboxes. */ + ret = mail_search_args_foreach(ctx->mail_ctx.args->args, + search_mailbox_arg, ctx); + } + + /* avoid doing extra work for as long as possible */ + if (ctx->max_mails > 1) { + /* we're doing prefetching. if we have to read the mail, + do a prefetch first and the final search later */ + n--; + } + for (i = 0; i < n && ret < 0; i++) { + ctx->cur_mail->lookup_abort = cache_lookups[i]; + ret = search_match_once(ctx); + } + ctx->cur_mail->lookup_abort = MAIL_LOOKUP_ABORT_NEVER; + search_match_finish(ctx, ret); + return ret; +} + +static void index_storage_search_notify(struct mailbox *box, + struct index_search_context *ctx) +{ + float percentage; + unsigned int msecs, secs; + + if (ctx->last_notify.tv_sec == 0) { + /* set the search time in here, in case a plugin + already spent some time indexing the mailbox */ + ctx->search_start_time = ioloop_timeval; + } else if (box->storage->callbacks.notify_ok != NULL && + !ctx->mail_ctx.progress_hidden) { + percentage = ctx->mail_ctx.progress_cur * 100.0 / + ctx->mail_ctx.progress_max; + msecs = timeval_diff_msecs(&ioloop_timeval, + &ctx->search_start_time); + secs = (msecs / (percentage / 100.0) - msecs) / 1000; + + T_BEGIN { + const char *text; + + text = t_strdup_printf("Searched %d%% of the mailbox, " + "ETA %d:%02d", (int)percentage, + secs/60, secs%60); + box->storage->callbacks. + notify_ok(box, text, + box->storage->callback_context); + } T_END; + } + ctx->last_notify = ioloop_timeval; } static bool search_would_block(struct index_search_context *ctx) @@ -1353,9 +1395,8 @@ { struct mail_search_context *_ctx = &ctx->mail_ctx; struct mailbox *box = _ctx->transaction->box; - struct mail_private *mail_private = (struct mail_private *)mail; - unsigned long long cost1, cost2; - bool match; + struct index_mail *imail = (struct index_mail *)mail; + int match; if (search_would_block(ctx)) { /* this lookup is useful when a large number of @@ -1367,7 +1408,6 @@ SEARCH_NOTIFY_INTERVAL_SECS) index_storage_search_notify(box, ctx); - cost1 = search_mail_get_cost(mail_private); while (box->v.search_next_update_seq(_ctx)) { mail_set_seq(mail, _ctx->seq); @@ -1377,22 +1417,19 @@ } T_END; ctx->cur_mail = NULL; - if (mail->expunged) - _ctx->seen_lost_data = TRUE; - - if (!match && search_has_static_nonmatches(_ctx->args->args)) { - /* if there are saved search results remember - that this message never matches */ - mailbox_search_results_never(_ctx, mail->uid); + i_assert(imail->data.search_results == NULL); + if (match < 0) { + /* result isn't known yet, do a prefetch and + finish later */ + imail->data.search_results = + buffer_create_dynamic(imail->data_pool, 64); + mail_search_args_result_serialize(_ctx->args, + imail->data.search_results); } - cost2 = search_mail_get_cost(mail_private); - ctx->cost += cost2 - cost1; - cost1 = cost2; - mail_search_args_reset(_ctx->args->args, FALSE); - if (match) + if (match != 0) return 1; if (search_would_block(ctx)) return 0; @@ -1426,8 +1463,8 @@ return mail; } -static int search_more(struct index_search_context *ctx, - struct mail **mail_r) +static int search_more_with_prefetching(struct index_search_context *ctx, + struct mail **mail_r) { struct mail *mail, *const *mails; unsigned int count; @@ -1471,6 +1508,42 @@ return 1; } +static bool search_finish_prefetch(struct index_search_context *ctx, + struct index_mail *imail) +{ + int ret; + + i_assert(imail->mail.mail.lookup_abort == MAIL_LOOKUP_ABORT_NEVER); + + ctx->cur_mail = &imail->mail.mail; + mail_search_args_result_deserialize(ctx->mail_ctx.args, + imail->data.search_results->data, + imail->data.search_results->used); + ret = search_match_once(ctx); + search_match_finish(ctx, ret); + ctx->cur_mail = NULL; + return ret > 0; +} + +static int search_more(struct index_search_context *ctx, + struct mail **mail_r) +{ + struct index_mail *imail; + int ret; + + while ((ret = search_more_with_prefetching(ctx, mail_r)) > 0) { + imail = (struct index_mail *)*mail_r; + if (imail->data.search_results == NULL) + break; + + /* searching wasn't finished yet */ + if (search_finish_prefetch(ctx, imail)) + break; + /* search finished as non-match */ + } + return ret; +} + bool index_storage_search_next_nonblock(struct mail_search_context *_ctx, struct mail **mail_r, bool *tryagain_r) {
--- a/src/lib-storage/mail-search.c Mon Apr 04 12:57:49 2011 +0300 +++ b/src/lib-storage/mail-search.c Mon Apr 04 13:07:19 2011 +0300 @@ -895,3 +895,60 @@ return mail_search_arg_equals(args1->args, args2->args); } + +static void +mail_search_args_result_serialize_arg(const struct mail_search_arg *arg, + buffer_t *dest) +{ + const struct mail_search_arg *subarg; + + buffer_append_c(dest, arg->result < 0 ? 0xff : arg->result); + + switch (arg->type) { + case SEARCH_OR: + case SEARCH_SUB: + case SEARCH_INTHREAD: + subarg = arg->value.subargs; + for (; subarg != NULL; subarg = subarg->next) + mail_search_args_result_serialize_arg(subarg, dest); + default: + break; + } +} + +void mail_search_args_result_serialize(const struct mail_search_args *args, + buffer_t *dest) +{ + mail_search_args_result_serialize_arg(args->args, dest); +} + +static void +mail_search_args_result_deserialize_arg(struct mail_search_arg *arg, + const unsigned char **data, + size_t *size) +{ + struct mail_search_arg *subarg; + + i_assert(*size > 0); + arg->result = **data == 0xff ? -1 : **data; + *data += 1; *size -= 1; + + switch (arg->type) { + case SEARCH_OR: + case SEARCH_SUB: + case SEARCH_INTHREAD: + subarg = arg->value.subargs; + for (; subarg != NULL; subarg = subarg->next) { + mail_search_args_result_deserialize_arg(subarg, + data, size); + } + default: + break; + } +} + +void mail_search_args_result_deserialize(struct mail_search_args *args, + const unsigned char *data, size_t size) +{ + mail_search_args_result_deserialize_arg(args->args, &data, &size); +}
--- a/src/lib-storage/mail-search.h Mon Apr 04 12:57:49 2011 +0300 +++ b/src/lib-storage/mail-search.h Mon Apr 04 13:07:19 2011 +0300 @@ -178,4 +178,11 @@ guaranteed to have not=FALSE. */ void mail_search_args_simplify(struct mail_search_args *args); +/* Serialization for search args' results. */ +void mail_search_args_result_serialize(const struct mail_search_args *args, + buffer_t *dest); +void mail_search_args_result_deserialize(struct mail_search_args *args, + const unsigned char *data, + size_t size); + #endif