Mercurial > dovecot > original-hg > dovecot-1.2
view src/lib-storage/index/index-search.c @ 2327:7d02e2a7672d HEAD
Header caching redesigned. New design allows caching decisions per field, so
they can be divided to temporary/permanent. Cached headers are now always
returned in original order, old code didn't guarantee it. Some other caching
changes. (still missing code to store changes in caching decisions)
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sun, 18 Jul 2004 05:25:06 +0300 |
parents | 5beb0c20b6e8 |
children | 98f6057f27a1 |
line wrap: on
line source
/* Copyright (C) 2002 Timo Sirainen */ #include "lib.h" #include "istream.h" #include "str.h" #include "message-address.h" #include "message-date.h" #include "message-body-search.h" #include "message-header-search.h" #include "message-parser.h" #include "imap-date.h" #include "index-storage.h" #include "index-mail.h" #include "mail-search.h" #include <stdlib.h> #include <ctype.h> #define TXT_UNKNOWN_CHARSET "[BADCHARSET] Unknown charset" #define TXT_INVALID_SEARCH_KEY "Invalid search key" struct index_search_context { struct mail_search_context mail_ctx; struct mail_index_view *view; struct index_mailbox *ibox; char *charset; struct mail_search_arg *args; uint32_t seq1, seq2; struct index_mail imail; struct mail *mail; pool_t hdr_pool; const char *error; int failed; }; struct search_header_context { struct index_search_context *index_context; struct mail_search_arg *args; struct message_header_line *hdr; unsigned int custom_header:1; unsigned int threading:1; }; struct search_body_context { struct index_search_context *index_ctx; struct istream *input; const struct message_part *part; }; static int seqset_contains(struct mail_search_seqset *set, uint32_t seq) { while (set != NULL) { if (seq >= set->seq1 && seq <= set->seq2) return TRUE; set = set->next; } return FALSE; } static uoff_t str_to_uoff_t(const char *str) { uoff_t num; num = 0; while (*str != '\0') { if (*str < '0' || *str > '9') return 0; num = num*10 + (*str - '0'); str++; } return num; } static int search_keyword(struct mail_index *index, const struct mail_index_record *rec, const char *value) { const char **keywords; int i; for (i = 0; i < INDEX_KEYWORDS_BYTE_COUNT; i++) { if (rec->keywords[i] != 0) break; } if (i == INDEX_KEYWORDS_BYTE_COUNT) return FALSE; /* no keywords set */ /*FIXME:keywords = mail_keywords_list_get(index->keywords); for (i = 0; i < MAIL_KEYWORDS_COUNT; i++) { if (keywords[i] != NULL && strcasecmp(keywords[i], value) == 0) { return rec->msg_flags & (1 << (MAIL_KEYWORD_1_BIT+i)); } }*/ return FALSE; } /* Returns >0 = matched, 0 = not matched, -1 = unknown */ static int search_arg_match_index(struct index_mailbox *ibox, struct index_mail *imail, enum mail_search_arg_type type, const char *value) { const struct mail_index_record *rec = imail->data.rec; const struct mail_full_flags *full_flags; switch (type) { case SEARCH_ALL: return 1; /* flags */ case SEARCH_ANSWERED: return rec->flags & MAIL_ANSWERED; case SEARCH_DELETED: return rec->flags & MAIL_DELETED; case SEARCH_DRAFT: return rec->flags & MAIL_DRAFT; case SEARCH_FLAGGED: return rec->flags & MAIL_FLAGGED; case SEARCH_SEEN: return rec->flags & MAIL_SEEN; case SEARCH_RECENT: full_flags = imail->mail.get_flags(&imail->mail); return full_flags->flags & MAIL_RECENT; case SEARCH_KEYWORD: return search_keyword(ibox->index, rec, value); default: return -1; } } static void search_index_arg(struct mail_search_arg *arg, void *context) { struct index_search_context *ctx = context; int found; if (arg->type == SEARCH_SEQSET) { found = seqset_contains(arg->value.seqset, ctx->mail->seq); ARG_SET_RESULT(arg, found); return; } if (ctx->imail.data.rec == NULL) { /* expunged message */ ARG_SET_RESULT(arg, 0); return; } switch (search_arg_match_index(ctx->ibox, &ctx->imail, arg->type, arg->value.str)) { case -1: /* unknown */ break; case 0: ARG_SET_RESULT(arg, 0); break; default: ARG_SET_RESULT(arg, 1); break; } } /* Returns >0 = matched, 0 = not matched, -1 = unknown */ static int search_arg_match_cached(struct index_search_context *ctx, enum mail_search_arg_type type, const char *value) { time_t date, search_time; uoff_t virtual_size, search_size; int timezone_offset; switch (type) { /* internal dates */ case SEARCH_BEFORE: case SEARCH_ON: case SEARCH_SINCE: date = ctx->mail->get_received_date(ctx->mail); if (date == (time_t)-1) return -1; if (!imap_parse_date(value, &search_time)) return 0; switch (type) { case SEARCH_BEFORE: return date < search_time; case SEARCH_ON: return date >= search_time && date < search_time + 3600*24; case SEARCH_SINCE: return date >= search_time; default: /* unreachable */ break; } /* sent dates */ case SEARCH_SENTBEFORE: case SEARCH_SENTON: case SEARCH_SENTSINCE: /* NOTE: RFC-3501 specifies that timezone is ignored in searches. date is returned as UTC, so change it. */ date = ctx->mail->get_date(ctx->mail, &timezone_offset); if (date == (time_t)-1) return -1; date += timezone_offset * 60; if (!imap_parse_date(value, &search_time)) return 0; switch (type) { case SEARCH_SENTBEFORE: return date < search_time; case SEARCH_SENTON: return date >= search_time && date < search_time + 3600*24; case SEARCH_SENTSINCE: return date >= search_time; default: /* unreachable */ break; } /* sizes */ case SEARCH_SMALLER: case SEARCH_LARGER: virtual_size = ctx->mail->get_size(ctx->mail); if (virtual_size == (uoff_t)-1) return -1; search_size = str_to_uoff_t(value); if (type == SEARCH_SMALLER) return virtual_size < search_size; else return virtual_size > search_size; default: return -1; } } static void search_cached_arg(struct mail_search_arg *arg, void *context) { struct index_search_context *ctx = context; switch (search_arg_match_cached(ctx, arg->type, arg->value.str)) { case -1: /* unknown */ break; case 0: ARG_SET_RESULT(arg, 0); break; default: ARG_SET_RESULT(arg, 1); break; } } static int search_sent(enum mail_search_arg_type type, const char *search_value, const unsigned char *sent_value, size_t sent_value_len) { time_t search_time, sent_time; int timezone_offset; if (sent_value == NULL) return 0; if (!imap_parse_date(search_value, &search_time)) return 0; /* NOTE: RFC-3501 specifies that timezone is ignored in searches. sent_time is returned as UTC, so change it. */ if (!message_date_parse(sent_value, sent_value_len, &sent_time, &timezone_offset)) return 0; sent_time += timezone_offset * 60; switch (type) { case SEARCH_SENTBEFORE: return sent_time < search_time; case SEARCH_SENTON: return sent_time >= search_time && sent_time < search_time + 3600*24; case SEARCH_SENTSINCE: return sent_time >= search_time; default: i_unreached(); } } static struct header_search_context * search_header_context(struct index_search_context *ctx, struct mail_search_arg *arg) { int unknown_charset; if (arg->context != NULL) { message_header_search_reset(arg->context); return arg->context; } if (ctx->hdr_pool == NULL) { ctx->hdr_pool = pool_alloconly_create("message_header_search", 8192); } arg->context = message_header_search_init(ctx->hdr_pool, arg->value.str, ctx->charset, &unknown_charset); if (arg->context == NULL) { ctx->error = unknown_charset ? TXT_UNKNOWN_CHARSET : TXT_INVALID_SEARCH_KEY; } return arg->context; } static void search_header_arg(struct mail_search_arg *arg, void *context) { struct search_header_context *ctx = context; struct header_search_context *hdr_search_ctx; int ret; /* first check that the field name matches to argument. */ switch (arg->type) { case SEARCH_SENTBEFORE: case SEARCH_SENTON: case SEARCH_SENTSINCE: /* date is handled differently than others */ if (strcasecmp(ctx->hdr->name, "Date") == 0) { if (ctx->hdr->continues) { ctx->hdr->use_full_value = TRUE; return; } ret = search_sent(arg->type, arg->value.str, ctx->hdr->full_value, ctx->hdr->full_value_len); ARG_SET_RESULT(arg, ret); } return; case SEARCH_HEADER: case SEARCH_HEADER_ADDRESS: ctx->custom_header = TRUE; if (strcasecmp(ctx->hdr->name, arg->hdr_field_name) != 0) return; case SEARCH_TEXT: /* TEXT goes through all headers */ ctx->custom_header = TRUE; break; default: return; } if (arg->value.str[0] == '\0') { /* we're just testing existence of the field. always matches. */ ret = 1; } else { if (ctx->hdr->continues) { ctx->hdr->use_full_value = TRUE; return; } t_push(); hdr_search_ctx = search_header_context(ctx->index_context, arg); if (hdr_search_ctx == NULL) ret = 0; else if (arg->type == SEARCH_HEADER_ADDRESS) { /* we have to match against normalized address */ struct message_address *addr; string_t *str; addr = message_address_parse(pool_datastack_create(), ctx->hdr->full_value, ctx->hdr->full_value_len, 0); str = t_str_new(ctx->hdr->value_len); message_address_write(str, addr); ret = message_header_search(str_data(str), str_len(str), hdr_search_ctx) ? 1 : 0; } else { ret = message_header_search(ctx->hdr->full_value, ctx->hdr->full_value_len, hdr_search_ctx) ? 1 : 0; } t_pop(); } if (ret == 1 || (arg->type != SEARCH_TEXT && arg->type != SEARCH_HEADER)) { /* set only when we definitely know if it's a match */ ARG_SET_RESULT(arg, ret); } } static void search_header_unmatch(struct mail_search_arg *arg, void *context __attr_unused__) { switch (arg->type) { case SEARCH_SENTBEFORE: case SEARCH_SENTON: case SEARCH_SENTSINCE: if (arg->not) { /* date header not found, so we match only for NOT searches */ ARG_SET_RESULT(arg, 0); } break; case SEARCH_HEADER: case SEARCH_HEADER_ADDRESS: ARG_SET_RESULT(arg, 0); break; default: break; } } static void search_header(struct message_part *part, struct message_header_line *hdr, void *context) { struct search_header_context *ctx = context; if (hdr == NULL) { /* end of headers, mark all unknown SEARCH_HEADERs unmatched */ mail_search_args_foreach(ctx->args, search_header_unmatch, ctx); return; } if (hdr->eoh) return; index_mail_parse_header(NULL, hdr, &ctx->index_context->imail); if (ctx->custom_header || strcasecmp(hdr->name, "Date") == 0) { ctx->hdr = hdr; ctx->custom_header = FALSE; mail_search_args_foreach(ctx->args, search_header_arg, ctx); } } static void search_body(struct mail_search_arg *arg, void *context) { struct search_body_context *ctx = context; int ret, unknown_charset; if (ctx->index_ctx->error != NULL) return; if (arg->type == SEARCH_TEXT || arg->type == SEARCH_BODY) { i_stream_seek(ctx->input, 0); ret = message_body_search(arg->value.str, ctx->index_ctx->charset, &unknown_charset, ctx->input, ctx->part, arg->type == SEARCH_TEXT); if (ret < 0) { ctx->index_ctx->error = unknown_charset ? TXT_UNKNOWN_CHARSET : TXT_INVALID_SEARCH_KEY; } ARG_SET_RESULT(arg, ret > 0); } } static int search_arg_match_text(struct mail_search_arg *args, struct index_search_context *ctx) { struct istream *input; struct mailbox_header_lookup_ctx *headers_ctx; const char *const *headers; int have_headers, have_body; /* first check what we need to use */ headers = mail_search_args_analyze(args, &have_headers, &have_body); if (!have_headers && !have_body) return TRUE; if (have_headers) { struct search_header_context hdr_ctx; if (have_body) headers = NULL; if (headers == NULL) { headers_ctx = NULL; input = ctx->mail->get_stream(ctx->mail, NULL, NULL); if (input == NULL) return FALSE; } else { /* FIXME: do this once in init */ headers_ctx = mailbox_header_lookup_init(&ctx->ibox->box, headers); input = ctx->mail->get_headers(ctx->mail, headers_ctx); if (input == NULL) { mailbox_header_lookup_deinit(headers_ctx); return FALSE; } } memset(&hdr_ctx, 0, sizeof(hdr_ctx)); hdr_ctx.index_context = ctx; hdr_ctx.custom_header = TRUE; hdr_ctx.args = args; index_mail_parse_header_init(&ctx->imail, headers_ctx); message_parse_header(NULL, input, NULL, search_header, &hdr_ctx); if (headers_ctx != NULL) mailbox_header_lookup_deinit(headers_ctx); } else { struct message_size hdr_size; input = ctx->mail->get_stream(ctx->mail, &hdr_size, NULL); if (input == NULL) return FALSE; i_stream_seek(input, hdr_size.physical_size); } if (have_body) { struct search_body_context body_ctx; memset(&body_ctx, 0, sizeof(body_ctx)); body_ctx.index_ctx = ctx; body_ctx.input = input; body_ctx.part = ctx->mail->get_parts(ctx->mail); mail_search_args_foreach(args, search_body, &body_ctx); } return TRUE; } static int search_msgset_fix(struct index_mailbox *ibox, const struct mail_index_header *hdr, struct mail_search_seqset *set, uint32_t *seq1_r, uint32_t *seq2_r) { for (; set != NULL; set = set->next) { if (set->seq1 == (uint32_t)-1) set->seq1 = hdr->messages_count; if (set->seq2 == (uint32_t)-1) set->seq2 = hdr->messages_count; if (set->seq1 == 0 || set->seq2 == 0 || set->seq1 > hdr->messages_count || set->seq2 > hdr->messages_count) { mail_storage_set_syntax_error(ibox->box.storage, "Invalid messageset"); return -1; } if (*seq1_r > set->seq1 || *seq1_r == 0) *seq1_r = set->seq1; if (*seq2_r < set->seq2) *seq2_r = set->seq2; } return 0; } static int search_parse_msgset_args(struct index_mailbox *ibox, const struct mail_index_header *hdr, struct mail_search_arg *args, uint32_t *seq1_r, uint32_t *seq2_r) { *seq1_r = *seq2_r = 0; for (; args != NULL; args = args->next) { if (args->type == SEARCH_SUB) { if (search_parse_msgset_args(ibox, hdr, args->value.subargs, seq1_r, seq2_r) < 0) return -1; } else if (args->type == SEARCH_OR) { /* FIXME: in cases like "SEEN OR 5 7" we shouldn't limit the range, but in cases like "1 OR 5 7" we should expand the range. A bit tricky, we'll just go through everything now to make it work right. */ *seq1_r = 1; *seq2_r = hdr->messages_count; /* We still have to fix potential seqsets though */ if (search_parse_msgset_args(ibox, hdr, args->value.subargs, seq1_r, seq2_r) < 0) return -1; } else if (args->type == SEARCH_SEQSET) { if (search_msgset_fix(ibox, hdr, args->value.seqset, seq1_r, seq2_r) < 0) return -1; } else if (args->type == SEARCH_ALL) { /* go through everything. don't stop, have to fix seqsets. */ *seq1_r = 1; *seq2_r = hdr->messages_count; } } return 0; } static int search_limit_lowwater(struct index_search_context *ctx, uint32_t uid_lowwater, uint32_t *first_seq) { uint32_t seq1, seq2; if (uid_lowwater == 0) return 0; if (mail_index_lookup_uid_range(ctx->view, uid_lowwater, (uint32_t)-1, &seq1, &seq2) < 0) { mail_storage_set_index_error(ctx->ibox); return -1; } if (*first_seq < seq1) *first_seq = seq1; return 0; } static int search_limit_by_flags(struct index_search_context *ctx, const struct mail_index_header *hdr, struct mail_search_arg *args, uint32_t *seq1, uint32_t *seq2) { for (; args != NULL; args = args->next) { if (args->type == SEARCH_SEEN) { /* SEEN with 0 seen? */ if (!args->not && hdr->seen_messages_count == 0) return 0; if (hdr->seen_messages_count == hdr->messages_count) { /* UNSEEN with all seen? */ if (args->not) return 0; /* SEEN with all seen */ args->match_always = TRUE; } else if (args->not) { /* UNSEEN with lowwater limiting */ if (search_limit_lowwater(ctx, hdr->first_unseen_uid_lowwater, seq1) < 0) return -1; } } if (args->type == SEARCH_DELETED) { /* DELETED with 0 deleted? */ if (!args->not && hdr->deleted_messages_count == 0) return 0; if (hdr->deleted_messages_count == hdr->messages_count) { /* UNDELETED with all deleted? */ if (args->not) return 0; /* DELETED with all deleted */ args->match_always = TRUE; } else if (!args->not) { /* DELETED with lowwater limiting */ if (search_limit_lowwater(ctx, hdr->first_deleted_uid_lowwater, seq1) < 0) return -1; } } } return *seq1 <= *seq2; } static int search_get_seqset(struct index_search_context *ctx, struct mail_search_arg *args) { const struct mail_index_header *hdr; if (mail_index_get_header(ctx->view, &hdr) < 0) { mail_storage_set_index_error(ctx->ibox); return -1; } if (hdr->messages_count == 0) { ctx->seq1 = 1; ctx->seq2 = 0; return 0; } if (search_parse_msgset_args(ctx->ibox, hdr, args, &ctx->seq1, &ctx->seq2) < 0) return -1; if (ctx->seq1 == 0) { ctx->seq1 = 1; ctx->seq2 = hdr->messages_count; } i_assert(ctx->seq1 <= ctx->seq2); /* UNSEEN and DELETED in root search level may limit the range */ if (search_limit_by_flags(ctx, hdr, args, &ctx->seq1, &ctx->seq2) < 0) return -1; return 0; } int index_storage_search_get_sorting(struct mailbox *box __attr_unused__, enum mail_sort_type *sort_program) { /* currently we don't support sorting */ *sort_program = MAIL_SORT_END; return 0; } struct mail_search_context * index_storage_search_init(struct mailbox_transaction_context *_t, const char *charset, struct mail_search_arg *args, const enum mail_sort_type *sort_program, enum mail_fetch_field wanted_fields, struct mailbox_header_lookup_ctx *wanted_headers) { struct index_transaction_context *t = (struct index_transaction_context *)_t; struct index_search_context *ctx; if (sort_program != NULL && *sort_program != MAIL_SORT_END) { i_fatal("BUG: index_storage_search_init(): " "invalid sort_program"); } ctx = i_new(struct index_search_context, 1); ctx->mail_ctx.box = &t->ibox->box; ctx->ibox = t->ibox; ctx->view = t->trans_view; ctx->charset = i_strdup(charset); ctx->args = args; ctx->mail = &ctx->imail.mail; index_mail_init(t, &ctx->imail, wanted_fields, wanted_headers); mail_search_args_reset(ctx->args, TRUE); if (search_get_seqset(ctx, args) < 0) { ctx->failed = TRUE; ctx->seq1 = 1; ctx->seq2 = 0; } return &ctx->mail_ctx; } int index_storage_search_deinit(struct mail_search_context *_ctx) { struct index_search_context *ctx = (struct index_search_context *)_ctx; int ret; ret = ctx->failed || ctx->error != NULL ? -1 : 0; if (ctx->imail.pool != NULL) index_mail_deinit(&ctx->imail); if (ctx->error != NULL) { mail_storage_set_error(ctx->ibox->box.storage, "%s", ctx->error); } if (ctx->hdr_pool != NULL) pool_unref(ctx->hdr_pool); i_free(ctx); return ret; } static int search_match_next(struct index_search_context *ctx) { struct mail_search_arg *arg; int ret; /* check the index matches first */ mail_search_args_reset(ctx->args, FALSE); ret = mail_search_args_foreach(ctx->args, search_index_arg, ctx); if (ret >= 0) return ret > 0; if (ctx->imail.data.rec == NULL) { /* expunged message, no way to check if the rest would have matched */ return FALSE; } /* next search only from cached arguments */ ret = mail_search_args_foreach(ctx->args, search_cached_arg, ctx); if (ret >= 0) return ret > 0; /* open the mail file and check the rest */ if (!search_arg_match_text(ctx->args, ctx)) return FALSE; for (arg = ctx->args; arg != NULL; arg = arg->next) { if (arg->result != 1) return FALSE; } return TRUE; } struct mail *index_storage_search_next(struct mail_search_context *_ctx) { struct index_search_context *ctx = (struct index_search_context *)_ctx; int ret; ret = 0; while (ctx->seq1 <= ctx->seq2) { if (index_mail_next(&ctx->imail, ctx->seq1++) < 0) { ctx->failed = TRUE; return NULL; } t_push(); ret = search_match_next(ctx); t_pop(); if (ctx->error != NULL) ret = -1; if (ret != 0) break; } if (ret <= 0) { /* error or last record */ return NULL; } return ctx->mail; }