# HG changeset patch # User Timo Sirainen # Date 1090117506 -10800 # Node ID 7d02e2a7672db42b93c7e1c828c03a37bf9e7f6f # Parent 0a8c9bfc05a1603df869ffdfcd3b278e37af7cc4 Header caching redesigned. New design allows caching decisions per field, so they can be divided to temporary/permanent. Cached headers are now always returned in original order, old code didn't guarantee it. Some other caching changes. (still missing code to store changes in caching decisions) diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/imap/imap-fetch-body-section.c --- a/src/imap/imap-fetch-body-section.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/imap/imap-fetch-body-section.c Sun Jul 18 05:25:06 2004 +0300 @@ -254,7 +254,7 @@ if (ctx->dest != NULL) buffer_append(ctx->dest, str, size); - if (ctx->output != NULL) { + else { if (o_stream_send(ctx->output, str, size) < 0) return FALSE; } @@ -273,7 +273,9 @@ ctx->match_func = header_match; if (ctx->fetch_ctx->body_fetch_from_cache) { - input = ctx->mail->get_headers(ctx->mail, ctx->fields); + input = ctx->mail-> + get_headers(ctx->mail, + ctx->fetch_ctx->headers_ctx); if (input == NULL) return FALSE; } diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/imap/imap-fetch.c --- a/src/imap/imap-fetch.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/imap/imap-fetch.c Sun Jul 18 05:25:06 2004 +0300 @@ -347,7 +347,7 @@ struct mail *mail; struct imap_fetch_body_data *body; const char *null = NULL; - const char *const *wanted_headers, *const *arr; + const char *const *arr; buffer_t *buffer; memset(&ctx, 0, sizeof(ctx)); @@ -375,27 +375,33 @@ /* If we have only BODY[HEADER.FIELDS (...)] fetches, get them separately rather than parsing the full header so mail storage can try to cache them. */ - ctx.body_fetch_from_cache = TRUE; - buffer = buffer_create_dynamic(pool_datastack_create(), 64, (size_t)-1); - for (body = bodies; body != NULL; body = body->next) { - if (strncmp(body->section, "HEADER.FIELDS ", 14) != 0) { - ctx.body_fetch_from_cache = FALSE; - break; - } + ctx.body_fetch_from_cache = (imap_data & (IMAP_FETCH_RFC822 | + IMAP_FETCH_RFC822_HEADER | + IMAP_FETCH_RFC822_TEXT)) == 0; + if (ctx.body_fetch_from_cache) { + buffer = buffer_create_dynamic(pool_datastack_create(), + 64, (size_t)-1); + for (body = bodies; body != NULL; body = body->next) { + if (strncmp(body->section, "HEADER.FIELDS ", 14) != 0) { + ctx.body_fetch_from_cache = FALSE; + break; + } - arr = imap_fetch_get_body_fields(body->section + 14); - while (*arr != NULL) { - buffer_append(buffer, arr, sizeof(*arr)); - arr++; + arr = imap_fetch_get_body_fields(body->section + 14); + while (*arr != NULL) { + buffer_append(buffer, arr, sizeof(*arr)); + arr++; + } } + buffer_append(buffer, &null, sizeof(null)); + ctx.headers_ctx = !ctx.body_fetch_from_cache ? NULL : + mailbox_header_lookup_init(box, buffer_get_data(buffer, + NULL)); } - buffer_append(buffer, &null, sizeof(null)); - wanted_headers = !ctx.body_fetch_from_cache ? NULL : - buffer_get_data(buffer, NULL); t = mailbox_transaction_begin(box, TRUE); ctx.search_ctx = mailbox_search_init(t, NULL, search_args, NULL, - fetch_data, wanted_headers); + fetch_data, ctx.headers_ctx); if (ctx.search_ctx == NULL) ctx.failed = TRUE; else { @@ -411,6 +417,8 @@ if (mailbox_search_deinit(ctx.search_ctx) < 0) ctx.failed = TRUE; } + if (ctx.headers_ctx != NULL) + mailbox_header_lookup_deinit(ctx.headers_ctx); if (ctx.failed) mailbox_transaction_rollback(t); diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/imap/imap-fetch.h --- a/src/imap/imap-fetch.h Sun Jul 18 04:44:59 2004 +0300 +++ b/src/imap/imap-fetch.h Sun Jul 18 05:25:06 2004 +0300 @@ -24,6 +24,7 @@ enum mail_fetch_field fetch_data; enum imap_fetch_field imap_data; struct imap_fetch_body_data *bodies; + struct mailbox_header_lookup_ctx *headers_ctx; string_t *str; struct ostream *output; diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/imap/imap-sort.c --- a/src/imap/imap-sort.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/imap/imap-sort.c Sun Jul 18 05:25:06 2004 +0300 @@ -194,6 +194,7 @@ enum mail_sort_type norm_prog[MAX_SORT_PROGRAM_SIZE]; enum mail_fetch_field wanted_fields; const char *wanted_headers[MAX_WANTED_HEADERS]; + struct mailbox_header_lookup_ctx *headers_ctx; struct sort_context *ctx; struct mail *mail; buffer_t *buf; @@ -222,14 +223,17 @@ memset(wanted_headers, 0, sizeof(wanted_headers)); wanted_fields = init_sort_elements(ctx, wanted_headers); + headers_ctx = mailbox_header_lookup_init(client->mailbox, + wanted_headers); /* initialize searching */ ctx->t = mailbox_transaction_begin(client->mailbox, FALSE); ctx->search_ctx = mailbox_search_init(ctx->t, charset, args, norm_prog, - wanted_fields, wanted_headers); + wanted_fields, headers_ctx); if (ctx->search_ctx == NULL) { mailbox_transaction_rollback(ctx->t); + mailbox_header_lookup_deinit(headers_ctx); return -1; } @@ -259,6 +263,7 @@ str_len(ctx->str)); } + mailbox_header_lookup_deinit(headers_ctx); mail_sort_deinit(ctx); return ret; } diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/imap/imap-thread.c --- a/src/imap/imap-thread.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/imap/imap-thread.c Sun Jul 18 05:25:06 2004 +0300 @@ -107,6 +107,7 @@ "message-id", "in-reply-to", "references", "subject", NULL }; + struct mailbox_header_lookup_ctx *headers_ctx; struct thread_context *ctx; struct mail *mail; int ret; @@ -115,14 +116,17 @@ i_fatal("Only REFERENCES threading supported"); ctx = t_new(struct thread_context, 1); + headers_ctx = mailbox_header_lookup_init(client->mailbox, + wanted_headers); /* initialize searching */ ctx->t = mailbox_transaction_begin(client->mailbox, FALSE); ctx->search_ctx = mailbox_search_init(ctx->t, charset, args, NULL, - MAIL_FETCH_DATE, wanted_headers); + MAIL_FETCH_DATE, headers_ctx); if (ctx->search_ctx == NULL) { mailbox_transaction_rollback(ctx->t); + mailbox_header_lookup_deinit(headers_ctx); return -1; } @@ -149,6 +153,8 @@ ret = mailbox_search_deinit(ctx->search_ctx); if (mailbox_transaction_commit(ctx->t) < 0) ret = -1; + + mailbox_header_lookup_deinit(headers_ctx); mail_thread_deinit(ctx); return ret; } diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-imap/imap-bodystructure.c --- a/src/lib-imap/imap-bodystructure.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-imap/imap-bodystructure.c Sun Jul 18 05:25:06 2004 +0300 @@ -350,24 +350,27 @@ string_t *str, int extended) { struct message_part_body_data *data = part->context; + int text; if (data == NULL) { /* there was no content headers, use an empty structure */ data = t_new(struct message_part_body_data, 1); } - if (part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) + if (part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) { str_append(str, "\"message\" \"rfc822\""); - else { + text = FALSE; + } else { /* "content type" "subtype" */ + text = data->content_type == NULL || + strcasecmp(data->content_type, "\"text\"") == 0; str_append(str, NVL(data->content_type, "\"text\"")); str_append_c(str, ' '); if (data->content_subtype != NULL) str_append(str, data->content_subtype); else { - if (data->content_type == NULL || - strcasecmp(data->content_type, "\"text\"") == 0) + if (text) str_append(str, "\"plain\""); else str_append(str, "\"unknown\""); @@ -378,8 +381,7 @@ /* ("content type param key" "value" ...) */ str_append_c(str, ' '); if (data->content_type_params == NULL) { - if (data->content_type != NULL && - strncasecmp(data->content_type, "\"text\"", 6) != 0) + if (!text) str_append(str, "NIL"); else str_append(str, "("DEFAULT_CHARSET")"); @@ -395,7 +397,7 @@ NVL(data->content_transfer_encoding, "\"7bit\""), part->body_size.virtual_size); - if (part->flags & MESSAGE_PART_FLAG_TEXT) { + if (text) { /* text/.. contains line count */ str_printfa(str, " %u", part->body_size.lines); } else if (part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) { diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-index/Makefile.am --- a/src/lib-index/Makefile.am Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-index/Makefile.am Sun Jul 18 05:25:06 2004 +0300 @@ -8,6 +8,7 @@ mail-cache.c \ mail-cache-compress.c \ mail-cache-decisions.c \ + mail-cache-fields.c \ mail-cache-lookup.c \ mail-cache-transaction.c \ mail-index.c \ diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-index/mail-cache-compress.c --- a/src/lib-index/mail-cache-compress.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-index/mail-cache-compress.c Sun Jul 18 05:25:06 2004 +0300 @@ -10,49 +10,40 @@ struct mail_cache_copy_context { int new_msg; - char field_seen[32], keep_fields[32], temp_fields[32]; - buffer_t *buffer, *header; + buffer_t *buffer, *field_seen; + uint8_t field_seen_value; }; static int -mail_cache_compress_callback(struct mail_cache_view *view __attr_unused__, - enum mail_cache_field field, +mail_cache_compress_callback(struct mail_cache_view *view, uint32_t file_field, const void *data, size_t data_size, void *context) { - struct mail_cache_copy_context *ctx = context; + struct mail_cache_copy_context *ctx = context; + enum mail_cache_decision_type dec; + unsigned int field; + uint8_t *field_seen; uint32_t size32; - int i; + field_seen = buffer_get_space_unsafe(ctx->field_seen, file_field, 1); + if (*field_seen == ctx->field_seen_value) { + /* duplicate */ + return 1; + } + *field_seen = ctx->field_seen_value; + + field = view->cache->file_field_map[file_field]; + dec = view->cache->fields[field].decision & ~MAIL_CACHE_DECISION_FORCED; if (ctx->new_msg) { - if (!ctx->temp_fields[field]) + if (dec == MAIL_CACHE_DECISION_NO) return 1; } else { - if (!ctx->keep_fields[field]) + if (dec != MAIL_CACHE_DECISION_YES) return 1; } - if (ctx->field_seen[field]) { - /* drop duplicates */ - return 1; - } - ctx->field_seen[field] = TRUE; + buffer_append(ctx->buffer, &file_field, sizeof(file_field)); - for (i = 0; i < MAIL_CACHE_HEADERS_COUNT; i++) { - if (mail_cache_header_fields[i] == field) { - /* it's header - save it into header field */ - size32 = buffer_get_used_size(ctx->header); - if (size32 > 0) { - /* remove old terminating \0 */ - buffer_set_used_size(ctx->header, size32-1); - } - buffer_append(ctx->header, data, data_size); - return 1; - } - } - - buffer_append(ctx->buffer, &field, sizeof(field)); - - if (mail_cache_field_sizes[field] == (unsigned int)-1) { + if (view->cache->fields[field].field_size == (unsigned int)-1) { size32 = (uint32_t)data_size; buffer_append(ctx->buffer, &size32, sizeof(size32)); } @@ -72,12 +63,11 @@ const struct mail_index_header *idx_hdr; struct mail_cache_header hdr; struct mail_cache_record cache_rec; - enum mail_cache_field field; struct ostream *output; - const char *str; - uint32_t size32, message_count, seq, first_new_seq, old_offset; + buffer_t *buffer; + size_t size; + uint32_t message_count, seq, first_new_seq, old_offset; uoff_t offset; - int i, ret, header_idx; /* get sequence of first message which doesn't need it's temp fields removed. */ @@ -104,85 +94,44 @@ hdr.indexid = idx_hdr->indexid; hdr.file_seq = idx_hdr->cache_file_seq + 1; - if (cache->hdr != NULL) { - memcpy(hdr.field_usage_decision_type, - cache->hdr->field_usage_decision_type, - sizeof(hdr.field_usage_decision_type)); - memcpy(hdr.field_usage_last_used, - cache->hdr->field_usage_last_used, - sizeof(hdr.field_usage_last_used)); - } else { - memcpy(hdr.field_usage_decision_type, - cache->default_field_usage_decision_type, - sizeof(hdr.field_usage_decision_type)); + if (cache->fields_count != 0) { + hdr.field_header_offset = + mail_cache_uint32_to_offset(sizeof(hdr)); + } + o_stream_send(output, &hdr, sizeof(hdr)); + + if (cache->fields_count != 0) { + t_push(); + buffer = buffer_create_dynamic(pool_datastack_create(), + 256, (size_t)-1); + mail_cache_header_fields_get(cache, buffer); + o_stream_send(output, buffer_get_data(buffer, NULL), + buffer_get_used_size(buffer)); + t_pop(); } memset(&ctx, 0, sizeof(ctx)); ctx.buffer = buffer_create_dynamic(default_pool, 4096, (size_t)-1); - ctx.header = buffer_create_dynamic(default_pool, 4096, (size_t)-1); - - for (i = 0; i < 32; i++) { - if (hdr.field_usage_decision_type[i] & MAIL_CACHE_DECISION_YES) - ctx.keep_fields[i] = TRUE; - else if (hdr.field_usage_decision_type[i] & - MAIL_CACHE_DECISION_TEMP) { - ctx.temp_fields[i] = TRUE; - ctx.keep_fields[i] = TRUE; - } - } - - o_stream_send(output, &hdr, sizeof(hdr)); - - /* merge all the header pieces into one. if some message doesn't have - all the required pieces, we'll just have to drop them all. */ - for (i = MAIL_CACHE_HEADERS_COUNT-1; i >= 0; i--) { - str = mail_cache_get_header_fields_str(cache, i); - if (str != NULL) - break; - } - - if (str == NULL) - header_idx = -1; - else { - hdr.header_offsets[0] = - mail_cache_uint32_to_offset(output->offset); - header_idx = i; - - size32 = strlen(str) + 1; - o_stream_send(output, &size32, sizeof(size32)); - o_stream_send(output, str, size32); - if ((size32 & 3) != 0) - o_stream_send(output, null4, 4 - (size32 & 3)); - } + ctx.field_seen = buffer_create_dynamic(default_pool, 64, (size_t)-1); + ctx.field_seen_value = 0; mail_index_reset_cache(t, hdr.file_seq); - ret = 0; for (seq = 1; seq <= message_count; seq++) { ctx.new_msg = seq >= first_new_seq; buffer_set_used_size(ctx.buffer, 0); - buffer_set_used_size(ctx.header, 0); - memset(ctx.field_seen, 0, sizeof(ctx.field_seen)); + + if (++ctx.field_seen_value == 0) { + memset(buffer_get_modifyable_data(ctx.field_seen, NULL), + 0, buffer_get_size(ctx.field_seen)); + ctx.field_seen_value++; + } memset(&cache_rec, 0, sizeof(cache_rec)); buffer_append(ctx.buffer, &cache_rec, sizeof(cache_rec)); - mail_cache_foreach(cache_view, seq, - mail_cache_compress_callback, &ctx); - - size32 = buffer_get_used_size(ctx.header); - if (size32 > 0 && ctx.field_seen[header_idx]) { - field = MAIL_CACHE_HEADERS1; - buffer_append(ctx.buffer, &field, sizeof(field)); - buffer_append(ctx.buffer, &size32, sizeof(size32)); - buffer_append(ctx.buffer, - buffer_get_data(ctx.header, NULL), - size32); - if ((size32 & 3) != 0) { - buffer_append(ctx.buffer, null4, - 4 - (size32 & 3)); - } - } + (void)mail_cache_foreach(cache_view, seq, + mail_cache_compress_callback, &ctx); cache_rec.size = buffer_get_used_size(ctx.buffer); if (cache_rec.size == sizeof(cache_rec)) @@ -197,7 +146,6 @@ } hdr.used_file_size = output->offset; buffer_free(ctx.buffer); - buffer_free(ctx.header); o_stream_seek(output, 0); o_stream_send(output, &hdr, sizeof(hdr)); @@ -236,6 +184,12 @@ return -1; locked = ret > 0; + /* get the latest info on fields */ + if (mail_cache_header_fields_read(cache) < 0) { + if (locked) mail_cache_unlock(cache); + return -1; + } + #ifdef DEBUG i_warning("Compressing cache file %s", cache->filepath); #endif @@ -246,6 +200,7 @@ MAIL_CACHE_LOCK_IMMEDIATE_TIMEOUT, NULL, NULL); if (fd == -1) { mail_cache_set_syscall_error(cache, "file_dotlock_open()"); + if (locked) mail_cache_unlock(cache); return -1; } @@ -268,13 +223,11 @@ if (mail_cache_map(cache, 0, 0) < 0) ret = -1; + else if (mail_cache_header_fields_read(cache) < 0) + ret = -1; } } - /* headers could have changed, reread them */ - memset(cache->split_offsets, 0, sizeof(cache->split_offsets)); - memset(cache->split_headers, 0, sizeof(cache->split_headers)); - if (locked) mail_cache_unlock(cache); diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-index/mail-cache-decisions.c --- a/src/lib-index/mail-cache-decisions.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-index/mail-cache-decisions.c Sun Jul 18 05:25:06 2004 +0300 @@ -67,39 +67,21 @@ */ #include "lib.h" -#include "write-full.h" +#include "ioloop.h" #include "mail-cache-private.h" -#include - -static void -mail_cache_set_decision_type(struct mail_cache *cache, - enum mail_cache_field field, - enum mail_cache_decision_type type) +void mail_cache_decision_lookup(struct mail_cache_view *view, uint32_t seq, + unsigned int field) { - uint8_t value = type; - - /* update the header without locking, we'll just write one byte and - it's very unlikely someone else tries to write different value for - it at the same time. even then it's just a wrong decision which - will be corrected sometimes later, not too bad.. */ - if (pwrite_full(cache->fd, &value, 1, - offsetof(struct mail_cache_header, - field_usage_decision_type) + field) < 0) { - mail_cache_set_syscall_error(cache, "pwrite_full()"); - } -} - -void mail_cache_decision_lookup(struct mail_cache_view *view, uint32_t seq, - enum mail_cache_field field) -{ + struct mail_cache *cache = view->cache; const struct mail_index_header *hdr; uint32_t uid; - if (view->cache->hdr->field_usage_decision_type[field] != - MAIL_CACHE_DECISION_TEMP) { + i_assert(field < cache->fields_count); + + if (cache->fields[field].decision != MAIL_CACHE_DECISION_TEMP) { /* a) forced decision - b) not cached, mail_cache_mark_missing() will handle this + b) not cached, mail_cache_decision_add() will handle this c) permanently cached already, okay. */ return; } @@ -109,7 +91,13 @@ mail_index_get_header(view->view, &hdr) < 0) return; - if (uid < view->cache->field_usage_uid_highwater[field] || + if (ioloop_time - cache->fields[field].last_used > 3600*24) { + /* update last_used about once a day */ + cache->fields[field].last_used = ioloop_time; + cache->field_header_write_pending = TRUE; + } + + if (uid < cache->fields[field].uid_highwater || uid < hdr->day_first_uid[7]) { /* a) nonordered access within this session. if client doesn't request messages in growing order, we assume it doesn't @@ -118,32 +106,34 @@ client with no local cache. if it was just a new client generating the local cache for the first time, we'll drop back to TEMP within few months. */ - mail_cache_set_decision_type(view->cache, field, - MAIL_CACHE_DECISION_YES); + cache->fields[field].decision = MAIL_CACHE_DECISION_YES; + cache->field_header_write_pending = TRUE; } else { - view->cache->field_usage_uid_highwater[field] = uid; + cache->fields[field].uid_highwater = uid; } } void mail_cache_decision_add(struct mail_cache_view *view, uint32_t seq, - enum mail_cache_field field) + unsigned int field) { + struct mail_cache *cache = view->cache; uint32_t uid; - if (MAIL_CACHE_IS_UNUSABLE(view->cache)) + i_assert(field < cache->fields_count); + + if (MAIL_CACHE_IS_UNUSABLE(cache)) return; - if (view->cache->hdr->field_usage_decision_type[field] != - MAIL_CACHE_DECISION_NO) { + if (cache->fields[field].decision != MAIL_CACHE_DECISION_NO) { /* a) forced decision b) we're already caching it, so it just wasn't in cache */ return; } /* field used the first time */ - mail_cache_set_decision_type(view->cache, field, - MAIL_CACHE_DECISION_TEMP); + cache->fields[field].decision = MAIL_CACHE_DECISION_TEMP; + cache->field_header_write_pending = TRUE; if (mail_index_lookup_uid(view->view, seq, &uid) == 0) - view->cache->field_usage_uid_highwater[field] = uid; + cache->fields[field].uid_highwater = uid; } diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-index/mail-cache-fields.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-index/mail-cache-fields.c Sun Jul 18 05:25:06 2004 +0300 @@ -0,0 +1,276 @@ +/* Copyright (C) 2004 Timo Sirainen */ + +#include "lib.h" +#include "buffer.h" +#include "hash.h" +#include "mail-cache-private.h" + +#include + +#define CACHE_HDR_PREFETCH 1024 + +static const unsigned char *null4[] = { 0, 0, 0, 0 }; + +void mail_cache_register_fields(struct mail_cache *cache, + struct mail_cache_field *fields, + size_t fields_count) +{ + void *orig_key, *orig_value; + unsigned int new_idx; + size_t i; + + new_idx = cache->fields_count; + for (i = 0; i < fields_count; i++) { + if (hash_lookup_full(cache->field_name_hash, fields[i].name, + &orig_key, &orig_value)) { + fields[i].idx = + POINTER_CAST_TO(orig_value, unsigned int); + continue; + } + + fields[i].idx = new_idx++; + } + + if (new_idx == cache->fields_count) + return; + + /* @UNSAFE */ + cache->fields = p_realloc(cache->field_pool, cache->fields, + cache->fields_count * sizeof(*cache->fields), + new_idx * sizeof(*cache->fields)); + cache->field_file_map = + p_realloc(cache->field_pool, cache->field_file_map, + cache->fields_count * sizeof(*cache->field_file_map), + new_idx * sizeof(*cache->field_file_map)); + + for (i = 0; i < fields_count; i++) { + unsigned int idx = fields[i].idx; + + if (idx < cache->fields_count) + continue; + + /* new index - save it */ + cache->fields[idx] = fields[i]; + cache->fields[idx].name = + p_strdup(cache->field_pool, fields[i].name); + cache->field_file_map[idx] = (uint32_t)-1; + + switch (cache->fields[idx].type) { + case MAIL_CACHE_FIELD_FIXED_SIZE: + case MAIL_CACHE_FIELD_BITMASK: + break; + case MAIL_CACHE_FIELD_VARIABLE_SIZE: + case MAIL_CACHE_FIELD_STRING: + case MAIL_CACHE_FIELD_HEADER: + cache->fields[idx].field_size = (unsigned int)-1; + break; + } + + hash_insert(cache->field_name_hash, + (char *)cache->fields[idx].name, + POINTER_CAST(idx)); + } + cache->fields_count = new_idx; +} + +unsigned int +mail_cache_register_lookup(struct mail_cache *cache, const char *name) +{ + void *orig_key, *orig_value; + + if (hash_lookup_full(cache->field_name_hash, name, + &orig_key, &orig_value)) + return POINTER_CAST_TO(orig_value, unsigned int); + else + return (unsigned int)-1; +} + +static int mail_cache_header_fields_get_offset(struct mail_cache *cache, + uint32_t *offset_r) +{ + const struct mail_cache_header_fields *field_hdr; + uint32_t offset, next_offset; + + if (MAIL_CACHE_IS_UNUSABLE(cache)) { + *offset_r = 0; + return 0; + } + + /* find the latest header */ + offset = 0; + next_offset = + mail_cache_offset_to_uint32(cache->hdr->field_header_offset); + while (next_offset != 0) { + offset = next_offset; + + if (mail_cache_map(cache, offset, + sizeof(*field_hdr) + CACHE_HDR_PREFETCH) < 0) + return -1; + + field_hdr = CONST_PTR_OFFSET(cache->mmap_base, offset); + next_offset = + mail_cache_offset_to_uint32(field_hdr->next_offset); + } + + *offset_r = offset; + return 0; +} + +int mail_cache_header_fields_read(struct mail_cache *cache) +{ + const struct mail_cache_header_fields *field_hdr = NULL; + struct mail_cache_field field; + const uint32_t *last_used, *sizes; + const uint8_t *types, *decisions; + const char *p, *names, *end; + uint32_t offset, i; + + if (mail_cache_header_fields_get_offset(cache, &offset) < 0) + return -1; + + if (offset == 0) { + /* no fields - the file is empty */ + return 0; + } + + field_hdr = CONST_PTR_OFFSET(cache->mmap_base, offset); + if (offset + field_hdr->size > cache->mmap_length) { + mail_cache_set_corrupted(cache, + "field header points outside file"); + return -1; + } + + /* check the fixed size of the header. name[] has to be checked + separately */ + if (field_hdr->size < sizeof(*field_hdr) + + field_hdr->fields_count * (sizeof(uint32_t)*2 + 1 + 2)) { + mail_cache_set_corrupted(cache, "invalid field header size"); + return -1; + } + + if (field_hdr->size > sizeof(*field_hdr) + CACHE_HDR_PREFETCH) { + if (mail_cache_map(cache, offset, field_hdr->size) < 0) + return -1; + } + field_hdr = CONST_PTR_OFFSET(cache->mmap_base, offset); + + cache->file_field_map = + i_realloc(cache->file_field_map, + cache->file_fields_count * sizeof(unsigned int), + field_hdr->fields_count * sizeof(unsigned int)); + cache->file_fields_count = field_hdr->fields_count; + + last_used = MAIL_CACHE_FIELD_LAST_USED(field_hdr); + sizes = MAIL_CACHE_FIELD_SIZE(field_hdr); + types = MAIL_CACHE_FIELD_TYPE(field_hdr); + decisions = MAIL_CACHE_FIELD_DECISION(field_hdr); + names = MAIL_CACHE_FIELD_NAMES(field_hdr); + end = CONST_PTR_OFFSET(field_hdr, field_hdr->size); + + /* clear the old mapping */ + for (i = 0; i < cache->fields_count; i++) + cache->field_file_map[i] = (uint32_t)-1; + + memset(&field, 0, sizeof(field)); + for (i = 0; i < field_hdr->fields_count; i++) { + for (p = names; p != end && *p != '\0'; p++) ; + if (p == end) { + mail_cache_set_corrupted(cache, + "field header names corrupted"); + return -1; + } + + field.name = names; + field.type = types[i]; + field.field_size = sizes[i]; + field.decision = decisions[i]; + field.last_used = (time_t)last_used[i]; + mail_cache_register_fields(cache, &field, 1); + cache->field_file_map[field.idx] = i; + cache->file_field_map[i] = field.idx; + + names = p + 1; + } + return 0; +} + +int mail_cache_header_fields_update(struct mail_cache *cache) +{ + int locked = cache->locked; + + if (!locked) { + if (mail_cache_lock(cache) <= 0) + return -1; + } + + // FIXME + + if (!locked) + mail_cache_unlock(cache); +} + +#define UGLY_COPY_MACRO(field_name, type) \ + for (i = 0; i < cache->file_fields_count; i++) { \ + field = cache->file_field_map[i]; \ + field_name = (type)cache->fields[field].field_name; \ + buffer_append(dest, &field_name, sizeof(field_name)); \ + } \ + for (i = 0; i < cache->fields_count; i++) { \ + if (cache->field_file_map[i] != (uint32_t)-1) \ + continue; \ + field_name = (type)cache->fields[i].field_name; \ + buffer_append(dest, &field_name, sizeof(field_name)); \ + } + +void mail_cache_header_fields_get(struct mail_cache *cache, buffer_t *dest) +{ + struct mail_cache_header_fields hdr; + unsigned int field; + const char *name; + uint32_t i, last_used, field_size; + uint8_t type, decision; + + memset(&hdr, 0, sizeof(hdr)); + hdr.fields_count = cache->fields_count; + buffer_append(dest, &hdr, sizeof(hdr)); + + /* we have to keep the field order for the existing fields. */ + UGLY_COPY_MACRO(last_used, uint32_t); + UGLY_COPY_MACRO(field_size, uint32_t); + UGLY_COPY_MACRO(type, uint8_t); + UGLY_COPY_MACRO(decision, uint8_t); + + for (i = 0; i < cache->file_fields_count; i++) { + field = cache->file_field_map[i]; + name = cache->fields[field].name; + buffer_append(dest, name, strlen(name)+1); + } + for (i = 0; i < cache->fields_count; i++) { + if (cache->field_file_map[i] != (uint32_t)-1) + continue; + name = cache->fields[i].name; + buffer_append(dest, name, strlen(name)+1); + } + + hdr.size = buffer_get_used_size(dest); + buffer_write(dest, 0, &hdr, sizeof(hdr)); + + if ((hdr.size & 3) != 0) + buffer_append(dest, null4, 4 - (hdr.size & 3)); +} + +int mail_cache_header_fields_get_next_offset(struct mail_cache *cache, + uint32_t *offset_r) +{ + if (mail_cache_header_fields_get_offset(cache, offset_r) < 0) + return -1; + + if (*offset_r == 0) { + *offset_r = offsetof(struct mail_cache_header, + field_header_offset); + } else { + *offset_r += offsetof(struct mail_cache_header_fields, + next_offset); + } + return 0; +} diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-index/mail-cache-lookup.c --- a/src/lib-index/mail-cache-lookup.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-index/mail-cache-lookup.c Sun Jul 18 05:25:06 2004 +0300 @@ -5,114 +5,9 @@ #include "str.h" #include "mail-cache-private.h" -#define CACHE_PREFETCH 1024 - -const char * -mail_cache_get_header_fields_str(struct mail_cache *cache, unsigned int idx) -{ - uint32_t offset, data_size; - const unsigned char *buf; - - if (MAIL_CACHE_IS_UNUSABLE(cache)) - return NULL; - - offset = mail_cache_offset_to_uint32(cache->hdr->header_offsets[idx]); - - if (offset == 0) - return NULL; - - if (mail_cache_map(cache, offset, CACHE_PREFETCH) < 0) - return NULL; - - if (offset + sizeof(data_size) > cache->mmap_length) { - mail_cache_set_corrupted(cache, "Header %u points outside file", - idx); - return NULL; - } - - buf = cache->mmap_base; - memcpy(&data_size, buf + offset, sizeof(data_size)); - offset += sizeof(data_size); - - if (data_size == 0) { - mail_cache_set_corrupted(cache, - "Header %u points to empty string", idx); - return NULL; - } - - if (data_size + sizeof(data_size) > CACHE_PREFETCH) { - if (mail_cache_map(cache, offset, data_size) < 0) - return NULL; - } - - if (offset + data_size > cache->mmap_length) { - mail_cache_set_corrupted(cache, "Header %u points outside file", - idx); - return NULL; - } - - buf = cache->mmap_base; - if (buf[offset + data_size - 1] != '\0') { - mail_cache_set_corrupted(cache, - "Header %u points to invalid string", idx); - return NULL; - } +#include - return buf + offset; -} - -const char *const * -mail_cache_split_header(struct mail_cache *cache, const char *header) -{ - const char *const *arr, *const *tmp; - const char *null = NULL; - char *str; - buffer_t *buf; - - if (header == NULL) - return NULL; - - arr = t_strsplit(header, "\n"); - buf = buffer_create_dynamic(cache->split_header_pool, 32, (size_t)-1); - for (tmp = arr; *tmp != NULL; tmp++) { - str = p_strdup(cache->split_header_pool, *tmp); - buffer_append(buf, &str, sizeof(str)); - } - buffer_append(buf, &null, sizeof(null)); - - return buffer_get_data(buf, NULL); -} - -const char *const *mail_cache_get_header_fields(struct mail_cache_view *view, - unsigned int idx) -{ - struct mail_cache *cache = view->cache; - const char *str; - int i; - - i_assert(idx < MAIL_CACHE_HEADERS_COUNT); - - if (MAIL_CACHE_IS_UNUSABLE(view->cache)) - return NULL; - - /* t_strsplit() is a bit slow, so we cache it */ - if (cache->hdr->header_offsets[idx] != cache->split_offsets[idx]) { - p_clear(cache->split_header_pool); - - t_push(); - for (i = 0; i < MAIL_CACHE_HEADERS_COUNT; i++) { - cache->split_offsets[i] = - cache->hdr->header_offsets[i]; - - str = mail_cache_get_header_fields_str(cache, i); - cache->split_headers[i] = - mail_cache_split_header(cache, str); - } - t_pop(); - } - - return cache->split_headers[idx]; -} +#define CACHE_PREFETCH 1024 struct mail_cache_record * mail_cache_get_record(struct mail_cache *cache, uint32_t offset) @@ -177,23 +72,38 @@ const struct mail_cache_record *cache_rec, mail_cache_foreach_callback_t *callback, void *context) { + struct mail_cache *cache = view->cache; size_t pos, next_pos, max_size, data_size; - uint32_t field; + uint32_t file_field; + unsigned int field; int ret; max_size = cache_rec->size; if (max_size < sizeof(*cache_rec) + sizeof(uint32_t)*2) { - mail_cache_set_corrupted(view->cache, - "record has invalid size"); + mail_cache_set_corrupted(cache, "record has invalid size"); return -1; } max_size -= sizeof(uint32_t); for (pos = sizeof(*cache_rec); pos < max_size; ) { - field = *((const uint32_t *)CONST_PTR_OFFSET(cache_rec, pos)); + file_field = + *((const uint32_t *)CONST_PTR_OFFSET(cache_rec, pos)); pos += sizeof(uint32_t); - data_size = mail_cache_field_sizes[field]; + if (file_field >= cache->file_fields_count) { + /* new field, have to re-read fields header to figure + out it's size */ + if (mail_cache_header_fields_read(cache) < 0) + return -1; + if (file_field >= cache->file_fields_count) { + mail_cache_set_corrupted(cache, + "field index too large"); + return -1; + } + } + + field = cache->file_field_map[file_field]; + data_size = cache->fields[field].field_size; if (data_size == (unsigned int)-1) { data_size = *((const uint32_t *) CONST_PTR_OFFSET(cache_rec, pos)); @@ -202,14 +112,15 @@ next_pos = pos + ((data_size + 3) & ~3); if (next_pos > cache_rec->size) { - mail_cache_set_corrupted(view->cache, + mail_cache_set_corrupted(cache, "Record continues outside it's allocated size"); return -1; } - ret = callback(view, field, CONST_PTR_OFFSET(cache_rec, pos), + ret = callback(view, file_field, + CONST_PTR_OFFSET(cache_rec, pos), data_size, context); - if (ret <= 0) + if (ret != 1) return ret; pos = next_pos; @@ -227,8 +138,15 @@ if (MAIL_CACHE_IS_UNUSABLE(view->cache)) return 0; - if ((ret = mail_cache_lookup_offset(view, seq, &offset)) <= 0) - return ret; + if (view->cached_offset_seq == seq) + offset = view->cached_offset; + else { + if ((ret = mail_cache_lookup_offset(view, seq, &offset)) <= 0) + return ret; + + view->cached_offset_seq = seq; + view->cached_offset = offset; + } cache_rec = mail_cache_get_record(view->cache, offset); while (cache_rec != NULL) { @@ -251,13 +169,14 @@ return 1; } -static int mail_cache_seq_callback(struct mail_cache_view *view, - enum mail_cache_field field, - const void *data __attr_unused__, - size_t data_size __attr_unused__, - void *context __attr_unused__) +static int +mail_cache_seq_callback(struct mail_cache_view *view, uint32_t file_field, + const void *data __attr_unused__, + size_t data_size __attr_unused__, + void *context __attr_unused__) { - view->cached_exists[field] = TRUE; + buffer_write(view->cached_exists_buf, file_field, + &view->cached_exists_value, 1); return 1; } @@ -265,53 +184,63 @@ { int ret; + if (++view->cached_exists_value == 0) { + /* wrapped, we'll have to clear the buffer */ + memset(buffer_get_modifyable_data(view->cached_exists_buf, + NULL), 0, + buffer_get_size(view->cached_exists_buf)); + view->cached_exists_value++; + } + view->cached_exists_seq = seq; - memset(view->cached_exists, 0, sizeof(view->cached_exists)); - ret = mail_cache_foreach(view, seq, mail_cache_seq_callback, NULL); return ret < 0 ? -1 : 0; } int mail_cache_field_exists(struct mail_cache_view *view, uint32_t seq, - enum mail_cache_field field) + unsigned int field) { - i_assert(field < MAIL_CACHE_FIELD_COUNT); + const uint8_t *data; + uint32_t file_field; + size_t size; - if (MAIL_CACHE_IS_UNUSABLE(view->cache)) + i_assert(field < view->cache->fields_count); + + file_field = view->cache->field_file_map[field]; + if (file_field == (uint32_t)-1) return 0; if (view->cached_exists_seq != seq) { if (mail_cache_seq(view, seq) < 0) return -1; } - return view->cached_exists[field]; + + data = buffer_get_data(view->cached_exists_buf, &size); + return size <= file_field ? FALSE : + data[file_field] == view->cached_exists_value; } enum mail_cache_decision_type -mail_cache_field_get_decision(struct mail_cache *cache, - enum mail_cache_field field) +mail_cache_field_get_decision(struct mail_cache *cache, unsigned int field) { - i_assert(field < MAIL_CACHE_FIELD_COUNT); + i_assert(field < cache->fields_count); - if (MAIL_CACHE_IS_UNUSABLE(cache)) - return cache->default_field_usage_decision_type[field]; - - return cache->hdr->field_usage_decision_type[field]; + return cache->fields[field].decision; } struct mail_cache_lookup_context { buffer_t *dest_buf; - enum mail_cache_field field; + uint32_t file_field; }; static int mail_cache_lookup_callback(struct mail_cache_view *view __attr_unused__, - enum mail_cache_field field, - const void *data, size_t data_size, void *context) + uint32_t file_field, const void *data, + size_t data_size, void *context) { struct mail_cache_lookup_context *ctx = context; - if (ctx->field != field) + if (ctx->file_field != file_field) return 1; buffer_append(ctx->dest_buf, data, data_size); @@ -319,60 +248,174 @@ } int mail_cache_lookup_field(struct mail_cache_view *view, buffer_t *dest_buf, - uint32_t seq, enum mail_cache_field field) + uint32_t seq, unsigned int field) { - struct mail_cache_lookup_context ctx; + struct mail_cache_lookup_context ctx; + int ret; - i_assert(field < MAIL_CACHE_FIELD_COUNT); - - if (MAIL_CACHE_IS_UNUSABLE(view->cache)) - return 0; + if ((ret = mail_cache_field_exists(view, seq, field)) <= 0) + return ret; mail_cache_decision_lookup(view, seq, field); - if (view->cached_exists_seq != seq) { - if (mail_cache_seq(view, seq) < 0) - return -1; - } - - if (!view->cached_exists[field]) - return 0; - /* should exist. find it. */ - ctx.field = field; + ctx.file_field = view->cache->field_file_map[field]; ctx.dest_buf = dest_buf; return mail_cache_foreach(view, seq, mail_cache_lookup_callback, &ctx) == 0; } -int mail_cache_lookup_string_field(struct mail_cache_view *view, string_t *dest, - uint32_t seq, enum mail_cache_field field) -{ - size_t old_size, new_size; +struct header_lookup_data_rec { + uint32_t offset; + uint32_t data_size; +}; - i_assert(field < MAIL_CACHE_FIELD_COUNT); +struct header_lookup_data { + uint32_t line_num; + struct header_lookup_data_rec *data; +}; - if (MAIL_CACHE_IS_UNUSABLE(view->cache)) - return 0; +struct header_lookup_context { + unsigned int *fields; + size_t fields_count; + buffer_t *data; + + unsigned int max_field; + uint8_t *fields_found; +}; - old_size = str_len(dest); - if (!mail_cache_lookup_field(view, dest, seq, field)) - return 0; +static int +headers_find_callback(struct mail_cache_view *view, uint32_t file_field, + const void *data, size_t data_size, void *context) +{ + struct header_lookup_context *ctx = context; + const uint32_t *lines = data; + struct header_lookup_data hdr_data; + struct header_lookup_data_rec *hdr_data_rec; + unsigned int i, lines_count; + + if (file_field > ctx->max_field || ctx->fields_found[file_field] != 1) { + /* a) don't want it, b) duplicate */ + return 1; + } + ctx->fields_found[file_field]++; - new_size = str_len(dest); - if (old_size == new_size || - str_data(dest)[new_size-1] != '\0') { - mail_cache_set_corrupted(view->cache, - "String field %x doesn't end with NUL", field); - return -1; + /* data = { line_nums[], 0, "headers" } */ + for (i = 0; data_size >= sizeof(uint32_t); i++) { + data_size -= sizeof(uint32_t); + if (lines[i] == 0) + break; } - str_truncate(dest, new_size-1); + lines_count = i; + + /* FIXME: this relies on mmap() too heavily */ + hdr_data_rec = t_new(struct header_lookup_data_rec, 1); + hdr_data_rec->offset = (const char *)&lines[lines_count+1] - + (const char *)view->cache->mmap_base; + hdr_data_rec->data_size = (uint32_t)data_size; + + for (i = 0; i < lines_count; i++) { + hdr_data.line_num = lines[i]; + hdr_data.data = hdr_data_rec; + buffer_append(ctx->data, &hdr_data, sizeof(hdr_data)); + } return 1; } -enum mail_cache_record_flag -mail_cache_get_record_flags(struct mail_cache_view *view, uint32_t seq) +static int header_lookup_data_cmp(const void *p1, const void *p2) +{ + const struct header_lookup_data *d1 = p1, *d2 = p2; + + return (int)d1->line_num - (int)d2->line_num; +} + +int mail_cache_lookup_headers(struct mail_cache_view *view, string_t *dest, + uint32_t seq, unsigned int fields[], + size_t fields_count) { - // FIXME: - return 0; + struct mail_cache *cache = view->cache; + struct header_lookup_context ctx; + struct header_lookup_data *data; + const unsigned char *p, *start, *end; + size_t i, size, hdr_size; + unsigned int field_idx; + int ret; + + if (fields_count == 0) + return 1; + + t_push(); + + /* @UNSAFE */ + memset(&ctx, 0, sizeof(ctx)); + ctx.fields = t_new(unsigned int, fields_count); + ctx.fields_count = fields_count; + + ctx.max_field = 1; + ctx.fields_found = t_buffer_get(ctx.max_field); + for (i = 0; i < fields_count; i++) { + i_assert(fields[i] < cache->fields_count); + field_idx = cache->field_file_map[fields[i]]; + if (field_idx == (unsigned int)-1) { + /* not cached at all */ + t_pop(); + return 0; + } + + if (field_idx > ctx.max_field) { + ctx.fields_found = t_buffer_reget(ctx.fields_found, + field_idx + 1); + memset(ctx.fields_found + ctx.max_field + 1, 0, + field_idx - ctx.max_field - 1); + ctx.max_field = field_idx; + } + ctx.fields_found[field_idx] = 1; + ctx.fields[i] = field_idx; + } + t_buffer_alloc(ctx.max_field + 1); + + ctx.data = buffer_create_dynamic(pool_datastack_create(), + 256, (size_t)-1); + + /* we need to return them in sorted order. create array: + { line number -> cache file offset } */ + ret = mail_cache_foreach(view, seq, headers_find_callback, &ctx); + if (ret <= 0) { + t_pop(); + return ret; + } + + /* check that all fields were found */ + for (i = 0; i < ctx.max_field; i++) { + if (ctx.fields_found[i] == 1) { + t_pop(); + return 0; + } + } + + data = buffer_get_modifyable_data(ctx.data, &size); + size /= sizeof(*data); + qsort(data, size, sizeof(*data), header_lookup_data_cmp); + + /* then start filling dest buffer from the headers */ + for (i = 0; i < size; i++) { + start = CONST_PTR_OFFSET(cache->mmap_base, + data[i].data->offset); + end = start + data[i].data->data_size; + + for (p = start; p != end; p++) { + if (*p == '\n' && + (p+1 == end || (p[1] != ' ' && p[1] != '\t'))) { + p++; + break; + } + } + hdr_size = (size_t)(p - start); + data[i].data->offset += hdr_size; + data[i].data->data_size += hdr_size; + buffer_append(dest, start, hdr_size); + } + + t_pop(); + return 1; } diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-index/mail-cache-private.h --- a/src/lib-index/mail-cache-private.h Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-index/mail-cache-private.h Sun Jul 18 05:25:06 2004 +0300 @@ -50,12 +50,44 @@ uint32_t used_file_size; uint32_t deleted_space; - uint32_t field_usage_last_used[32]; /* time_t */ - uint8_t field_usage_decision_type[32]; + uint32_t field_header_offset; +}; + +struct mail_cache_header_fields { + uint32_t next_offset; + uint32_t size; + uint32_t fields_count; - uint32_t header_offsets[MAIL_CACHE_HEADERS_COUNT]; +#if 0 + /* last time the field was accessed. not updated more often than + once a day. */ + uint32_t last_used[fields_count]; + /* (uint32_t)-1 for variable sized fields */ + uint32_t size[fields_count]; + /* enum mail_cache_field_type */ + uint8_t type[fields_count]; + /* enum mail_cache_decision_type */ + uint8_t decision[fields_count]; + /* NUL-separated list of field names */ + char name[fields_count][]; +#endif }; +#define MAIL_CACHE_FIELD_LAST_USED(field_hdr) \ + CONST_PTR_OFFSET(field_hdr, sizeof(uint32_t) * 3) +#define MAIL_CACHE_FIELD_SIZE(field_hdr) \ + CONST_PTR_OFFSET(MAIL_CACHE_FIELD_LAST_USED(field_hdr), \ + sizeof(uint32_t) * (field_hdr)->fields_count) +#define MAIL_CACHE_FIELD_TYPE(field_hdr) \ + CONST_PTR_OFFSET(MAIL_CACHE_FIELD_SIZE(field_hdr), \ + sizeof(uint32_t) * (field_hdr)->fields_count) +#define MAIL_CACHE_FIELD_DECISION(field_hdr) \ + CONST_PTR_OFFSET(MAIL_CACHE_FIELD_TYPE(field_hdr), \ + sizeof(uint8_t) * (field_hdr)->fields_count) +#define MAIL_CACHE_FIELD_NAMES(field_hdr) \ + CONST_PTR_OFFSET(MAIL_CACHE_FIELD_DECISION(field_hdr), \ + sizeof(uint8_t) * (field_hdr)->fields_count) + struct mail_cache_record { uint32_t prev_offset; uint32_t size; /* full record size, including this header */ @@ -85,16 +117,19 @@ const struct mail_cache_header *hdr; struct mail_cache_header hdr_copy; - pool_t split_header_pool; - uint32_t split_offsets[MAIL_CACHE_HEADERS_COUNT]; - const char *const *split_headers[MAIL_CACHE_HEADERS_COUNT]; + pool_t field_pool; + struct mail_cache_field *fields; + uint32_t *field_file_map; + unsigned int fields_count; + struct hash_table *field_name_hash; /* name -> idx */ - uint8_t default_field_usage_decision_type[32]; - uint32_t field_usage_uid_highwater[32]; + unsigned int *file_field_map; + unsigned int file_fields_count; unsigned int locked:1; unsigned int need_compress:1; unsigned int hdr_modified:1; + unsigned int field_header_write_pending:1; }; struct mail_cache_view { @@ -104,18 +139,20 @@ struct mail_cache_transaction_ctx *transaction; uint32_t trans_seq1, trans_seq2; - char cached_exists[32]; + /* if cached_exists_buf[field] == cached_exists_value, it's cached. + this allows us to avoid constantly clearing the whole buffer. + it needs to be cleared only when cached_exists_value is wrapped. */ + buffer_t *cached_exists_buf; + uint8_t cached_exists_value; uint32_t cached_exists_seq; + uint32_t cached_offset, cached_offset_seq; }; typedef int mail_cache_foreach_callback_t(struct mail_cache_view *view, - enum mail_cache_field field, + uint32_t file_field, const void *data, size_t data_size, void *context); -extern unsigned int mail_cache_field_sizes[32]; -extern enum mail_cache_field mail_cache_header_fields[MAIL_CACHE_HEADERS_COUNT]; - uint32_t mail_cache_uint32_to_offset(uint32_t offset); uint32_t mail_cache_offset_to_uint32(uint32_t offset); @@ -124,10 +161,11 @@ int mail_cache_lock(struct mail_cache *cache); void mail_cache_unlock(struct mail_cache *cache); -const char * -mail_cache_get_header_fields_str(struct mail_cache *cache, unsigned int idx); -const char *const * -mail_cache_split_header(struct mail_cache *cache, const char *header); +int mail_cache_header_fields_read(struct mail_cache *cache); +int mail_cache_header_fields_update(struct mail_cache *cache); +void mail_cache_header_fields_get(struct mail_cache *cache, buffer_t *dest); +int mail_cache_header_fields_get_next_offset(struct mail_cache *cache, + uint32_t *offset_r); struct mail_cache_record * mail_cache_get_record(struct mail_cache *cache, uint32_t offset); @@ -152,9 +190,9 @@ int mail_cache_delete(struct mail_cache *cache, uint32_t offset); void mail_cache_decision_lookup(struct mail_cache_view *view, uint32_t seq, - enum mail_cache_field field); + uint32_t field); void mail_cache_decision_add(struct mail_cache_view *view, uint32_t seq, - enum mail_cache_field field); + uint32_t field); void mail_cache_set_syscall_error(struct mail_cache *cache, const char *function); diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-index/mail-cache-transaction.c --- a/src/lib-index/mail-cache-transaction.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-index/mail-cache-transaction.c Sun Jul 18 05:25:06 2004 +0300 @@ -16,9 +16,6 @@ struct mail_cache_view *view; struct mail_index_transaction *trans; - uint32_t update_header_offsets[MAIL_CACHE_HEADERS_COUNT]; - unsigned int next_unused_header_lowwater; - buffer_t *cache_data, *cache_data_seq; uint32_t prev_seq; size_t prev_pos; @@ -279,6 +276,9 @@ size_t size; int ret; + i_assert((min_size & 3) == 0); + i_assert((max_size & 3) == 0); + if (min_size > ctx->reserved_space) { if (!locked) { if (mail_cache_lock(ctx->cache) <= 0) @@ -302,6 +302,7 @@ ctx->reserved_space -= size; if (available_space_r != NULL) *available_space_r = size; + i_assert((size & 3) == 0); if (size == max_size && commit) { /* final commit - see if we can free the rest of the @@ -436,8 +437,7 @@ int mail_cache_transaction_commit(struct mail_cache_transaction_ctx *ctx) { struct mail_cache *cache = ctx->cache; - uint32_t offset; - int i, ret = 0; + int ret = 0; if (!ctx->changes) { mail_cache_transaction_free(ctx); @@ -461,24 +461,7 @@ ret = -1; } - if (ret == 0) { - for (i = 0; i < MAIL_CACHE_HEADERS_COUNT; i++) { - offset = ctx->update_header_offsets[i]; - if (offset != 0) { - cache->hdr_copy.header_offsets[i] = - mail_cache_uint32_to_offset(offset); - cache->hdr_modified = TRUE; - } - } - } - mail_cache_unlock(cache); - - if (ctx->next_unused_header_lowwater == MAIL_CACHE_HEADERS_COUNT) { - /* they're all used - compress the cache to get more */ - cache->need_compress = TRUE; - } - mail_cache_transaction_free(ctx); return ret; } @@ -488,7 +471,6 @@ struct mail_cache *cache = ctx->cache; const uint32_t *buf; size_t size; - unsigned int i; if (mail_cache_lock(cache) > 0) { mail_cache_transaction_free_space(ctx); @@ -510,112 +492,86 @@ mail_cache_unlock(cache); } - /* make sure we don't cache the headers */ - for (i = 0; i < ctx->next_unused_header_lowwater; i++) { - uint32_t offset = cache->hdr->header_offsets[i]; - if (mail_cache_offset_to_uint32(offset) == 0) - cache->split_offsets[i] = 1; - } - mail_cache_transaction_free(ctx); } -static const char *write_header_string(const char *const headers[], - uint32_t *size_r) -{ - buffer_t *buffer; - size_t size; - - buffer = buffer_create_dynamic(pool_datastack_create(), - 512, (size_t)-1); - - while (*headers != NULL) { - if (buffer_get_used_size(buffer) != 0) - buffer_append(buffer, "\n", 1); - buffer_append(buffer, *headers, strlen(*headers)); - headers++; - } - buffer_append(buffer, null4, 1); - - size = buffer_get_used_size(buffer); - if ((size & 3) != 0) { - buffer_append(buffer, null4, 4 - (size & 3)); - size += 4 - (size & 3); - } - *size_r = size; - return buffer_get_data(buffer, NULL); -} - -int mail_cache_set_header_fields(struct mail_cache_transaction_ctx *ctx, - unsigned int idx, const char *const headers[]) +static int +mail_cache_header_write_fields(struct mail_cache_transaction_ctx *ctx) { struct mail_cache *cache = ctx->cache; - uint32_t offset, size, total_size; - const char *header_str, *prev_str; + buffer_t *buffer; + const void *data; + size_t size; + uint32_t offset, hdr_offset; + int ret = 0; - i_assert(*headers != NULL); - i_assert(idx < MAIL_CACHE_HEADERS_COUNT); - i_assert(idx >= ctx->next_unused_header_lowwater); - i_assert(mail_cache_offset_to_uint32(cache->hdr-> - header_offsets[idx]) == 0); + if (mail_cache_lock(cache) <= 0) + return -1; t_push(); - - header_str = write_header_string(headers, &size); - if (idx != 0) { - prev_str = mail_cache_get_header_fields_str(cache, idx-1); - if (prev_str == NULL) { - t_pop(); - return FALSE; - } + buffer = buffer_create_dynamic(pool_datastack_create(), + 256, (size_t)-1); + mail_cache_header_fields_get(cache, buffer); + data = buffer_get_data(buffer, &size); - i_assert(strcmp(header_str, prev_str) != 0); - } - - total_size = size + sizeof(uint32_t); - offset = mail_cache_transaction_get_space(ctx, total_size, total_size, - NULL, FALSE); - if (offset != 0) { - if (pwrite_full(cache->fd, &size, sizeof(size), offset) < 0 || - pwrite_full(cache->fd, header_str, size, - offset + sizeof(uint32_t)) < 0) { + offset = mail_cache_transaction_get_space(ctx, size, size, &size, TRUE); + if (offset == 0) + ret = -1; + else if (pwrite_full(cache->fd, data, size, offset) < 0) { + mail_cache_set_syscall_error(cache, "pwrite_full()"); + ret = -1; + } else if (fdatasync(cache->fd) < 0) { + mail_cache_set_syscall_error(cache, "fdatasync()"); + ret = -1; + } else if (mail_cache_header_fields_get_next_offset(cache, + &hdr_offset) < 0) + ret = -1; + else { + /* after it's guaranteed to be in disk, update header offset */ + offset = mail_cache_uint32_to_offset(offset); + if (pwrite_full(cache->fd, &offset, sizeof(offset), + hdr_offset) < 0) { mail_cache_set_syscall_error(cache, "pwrite_full()"); - offset = 0; + ret = -1; + } else { + /* we'll need to fix mappings. */ + if (mail_cache_header_fields_read(cache) < 0) + ret = -1; } } - - if (offset != 0) { - ctx->update_header_offsets[idx] = offset; - ctx->changes = TRUE; + t_pop(); - /* update cached headers */ - cache->split_offsets[idx] = cache->hdr->header_offsets[idx]; - cache->split_headers[idx] = - mail_cache_split_header(cache, header_str); - - /* make sure get_header_fields() still works for this header - while the transaction isn't yet committed. */ - ctx->next_unused_header_lowwater = idx + 1; - } - - t_pop(); - return offset > 0; + mail_cache_unlock(cache); + return ret; } void mail_cache_add(struct mail_cache_transaction_ctx *ctx, uint32_t seq, - enum mail_cache_field field, - const void *data, size_t data_size) + unsigned int field, const void *data, size_t data_size) { - uint32_t fixed_size, data_size32; + uint32_t file_field, data_size32; + unsigned int fixed_size; size_t full_size; - i_assert(field < MAIL_CACHE_FIELD_COUNT); - i_assert(data_size > 0); + i_assert(field < ctx->cache->fields_count); i_assert(data_size < (uint32_t)-1); + if (ctx->cache->fields[field].decision == + (MAIL_CACHE_DECISION_NO | MAIL_CACHE_DECISION_FORCED)) + return; + + file_field = ctx->cache->field_file_map[field]; + if (file_field == (uint32_t)-1) { + /* we'll have to add this field to headers */ + if (mail_cache_header_write_fields(ctx) < 0) + return; + + file_field = ctx->cache->field_file_map[field]; + i_assert(file_field != (uint32_t)-1); + } + mail_cache_decision_add(ctx->view, seq, field); - fixed_size = mail_cache_field_sizes[field]; + fixed_size = ctx->cache->fields[field].field_size; i_assert(fixed_size == (unsigned int)-1 || fixed_size == data_size); data_size32 = (uint32_t)data_size; @@ -643,7 +599,7 @@ return; } - buffer_append(ctx->cache_data, &field, sizeof(field)); + buffer_append(ctx->cache_data, &file_field, sizeof(file_field)); if (fixed_size == (unsigned int)-1) { buffer_append(ctx->cache_data, &data_size32, sizeof(data_size32)); @@ -654,12 +610,6 @@ buffer_append(ctx->cache_data, null4, 4 - (data_size & 3)); } -int mail_cache_update_record_flags(struct mail_cache_view *view, uint32_t seq, - enum mail_cache_record_flag flags) -{ - return -1; -} - int mail_cache_transaction_lookup(struct mail_cache_transaction_ctx *ctx, uint32_t seq, uint32_t *offset_r) { diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-index/mail-cache.c --- a/src/lib-index/mail-cache.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-index/mail-cache.c Sun Jul 18 05:25:06 2004 +0300 @@ -1,37 +1,15 @@ /* Copyright (C) 2003-2004 Timo Sirainen */ #include "lib.h" +#include "buffer.h" +#include "hash.h" #include "file-lock.h" -#include "file-set-size.h" #include "mmap-util.h" #include "write-full.h" #include "mail-cache-private.h" #include -unsigned int mail_cache_field_sizes[32] = { - sizeof(enum mail_cache_record_flag), - sizeof(struct mail_sent_date), - sizeof(time_t), - sizeof(uoff_t), - - /* variable sized */ - (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, - (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, - (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, - (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, - (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, - (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, - (unsigned int)-1, (unsigned int)-1, (unsigned int)-1, (unsigned int)-1 -}; - -enum mail_cache_field mail_cache_header_fields[MAIL_CACHE_HEADERS_COUNT] = { - MAIL_CACHE_HEADERS1, - MAIL_CACHE_HEADERS2, - MAIL_CACHE_HEADERS3, - MAIL_CACHE_HEADERS4 -}; - uint32_t mail_cache_uint32_to_offset(uint32_t offset) { unsigned char buf[4]; @@ -129,6 +107,9 @@ if (mail_cache_map(cache, 0, 0) < 0) return -1; + if (mail_cache_header_fields_read(cache) < 0) + return -1; + if (cache->hdr->file_seq != cache->index->hdr->cache_file_seq) { /* still different - maybe a race condition or maybe the file_seq really is corrupted. either way, this shouldn't @@ -240,7 +221,10 @@ return -1; } - return mail_cache_map(cache, 0, sizeof(struct mail_cache_header)); + if (mail_cache_map(cache, 0, sizeof(struct mail_cache_header)) < 0) + return -1; + + return mail_cache_header_fields_read(cache); } struct mail_cache *mail_cache_open_or_create(struct mail_index *index) @@ -250,7 +234,10 @@ cache = i_new(struct mail_cache, 1); cache->index = index; cache->fd = -1; - cache->split_header_pool = pool_alloconly_create("Headers", 512); + cache->field_pool = pool_alloconly_create("Cache fields", 512); + cache->field_name_hash = + hash_create(default_pool, cache->field_pool, 0, + strcase_hash, (hash_cmp_callback_t *)strcasecmp); if (!index->mmap_disable && !index->mmap_no_write) { if (mail_cache_open_and_verify(cache) < 0) { @@ -267,18 +254,13 @@ { mail_cache_file_close(cache); - pool_unref(cache->split_header_pool); + hash_destroy(cache->field_name_hash); + pool_unref(cache->field_pool); + i_free(cache->file_field_map); i_free(cache->filepath); i_free(cache); } -void mail_cache_set_defaults(struct mail_cache *cache, - const enum mail_cache_decision_type dec[32]) -{ - memcpy(cache->default_field_usage_decision_type, dec, - sizeof(cache->default_field_usage_decision_type)); -} - int mail_cache_lock(struct mail_cache *cache) { int i, ret; @@ -367,10 +349,15 @@ view = i_new(struct mail_cache_view, 1); view->cache = cache; view->view = iview; + view->cached_exists_buf = + buffer_create_dynamic(default_pool, + cache->file_fields_count + 10, + (size_t)-1); return view; } void mail_cache_view_close(struct mail_cache_view *view) { + buffer_free(view->cached_exists_buf); i_free(view); } diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-index/mail-cache.h --- a/src/lib-index/mail-cache.h Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-index/mail-cache.h Sun Jul 18 05:25:06 2004 +0300 @@ -5,8 +5,6 @@ #define MAIL_CACHE_FILE_PREFIX ".cache" -#define MAIL_CACHE_HEADERS_COUNT 4 - struct mail_cache; struct mail_cache_view; struct mail_cache_transaction_ctx; @@ -23,54 +21,37 @@ MAIL_CACHE_DECISION_FORCED = 0x80 }; -enum mail_cache_record_flag { - /* If binary flags are set, it's not checked whether mail is - missing CRs. So this flag may be set as an optimization for - regular non-binary mails as well if it's known that it contains - valid CR+LF line breaks. */ - MAIL_INDEX_FLAG_BINARY_HEADER = 0x0001, - MAIL_INDEX_FLAG_BINARY_BODY = 0x0002, - - /* Mail header or body is known to contain NUL characters. */ - MAIL_INDEX_FLAG_HAS_NULS = 0x0004, - /* Mail header or body is known to not contain NUL characters. */ - MAIL_INDEX_FLAG_HAS_NO_NULS = 0x0008 +enum mail_cache_field_type { + MAIL_CACHE_FIELD_FIXED_SIZE, + MAIL_CACHE_FIELD_VARIABLE_SIZE, + MAIL_CACHE_FIELD_STRING, + MAIL_CACHE_FIELD_BITMASK, + MAIL_CACHE_FIELD_HEADER }; -/* when modifying, remember to update mail_cache_field_sizes[] too */ -enum mail_cache_field { - /* fixed size fields */ - MAIL_CACHE_INDEX_FLAGS = 0, - MAIL_CACHE_SENT_DATE, - MAIL_CACHE_RECEIVED_DATE, - MAIL_CACHE_VIRTUAL_FULL_SIZE, +struct mail_cache_field { + const char *name; + unsigned int idx; - /* variable sized field */ - MAIL_CACHE_HEADERS1, - MAIL_CACHE_HEADERS2, - MAIL_CACHE_HEADERS3, - MAIL_CACHE_HEADERS4, - MAIL_CACHE_BODY, - MAIL_CACHE_BODYSTRUCTURE, - MAIL_CACHE_ENVELOPE, - MAIL_CACHE_MESSAGEPART, - MAIL_CACHE_UID_STRING, + enum mail_cache_field_type type; + unsigned int field_size; + enum mail_cache_decision_type decision; - MAIL_CACHE_FIELD_COUNT + /* internal: */ + uint32_t uid_highwater; + time_t last_used; }; -struct mail_sent_date { - time_t time; - int32_t timezone; -}; - -extern enum mail_cache_field mail_cache_header_fields[MAIL_CACHE_HEADERS_COUNT]; - struct mail_cache *mail_cache_open_or_create(struct mail_index *index); void mail_cache_free(struct mail_cache *cache); -void mail_cache_set_defaults(struct mail_cache *cache, - const enum mail_cache_decision_type dec[32]); +/* Register fields. fields[].idx is updated to contain field index. */ +void mail_cache_register_fields(struct mail_cache *cache, + struct mail_cache_field *fields, + size_t fields_count); +/* Returns registered field index, or (unsigned int)-1 if not found. */ +unsigned int +mail_cache_register_lookup(struct mail_cache *cache, const char *name); /* Returns TRUE if cache should be compressed. */ int mail_cache_need_compress(struct mail_cache *cache); @@ -86,46 +67,28 @@ mail_cache_get_transaction(struct mail_cache_view *view, struct mail_index_transaction *t); -/* Return NULL-terminated list of headers for given index, or NULL if - header index isn't used. */ -const char *const *mail_cache_get_header_fields(struct mail_cache_view *view, - unsigned int idx); -/* Set list of headers for given index. */ -int mail_cache_set_header_fields(struct mail_cache_transaction_ctx *ctx, - unsigned int idx, const char *const headers[]); - /* Add new field to given record. Updates are not allowed. Fixed size fields must be exactly the expected size. */ void mail_cache_add(struct mail_cache_transaction_ctx *ctx, uint32_t seq, - enum mail_cache_field field, - const void *data, size_t data_size); + unsigned int field, const void *data, size_t data_size); -/* Retursn TRUE if field exists. */ +/* Retursn 1 if field exists, 0 if not, -1 if error. */ int mail_cache_field_exists(struct mail_cache_view *view, uint32_t seq, - enum mail_cache_field field); + unsigned int field); /* Returns current caching decision for given field. */ enum mail_cache_decision_type -mail_cache_field_get_decision(struct mail_cache *cache, - enum mail_cache_field field); +mail_cache_field_get_decision(struct mail_cache *cache, unsigned int field); /* Set data_r and size_r to point to wanted field in cache file. - Returns TRUE if field was found. If field contains multiple fields, - first one found is returned. This is mostly useful for finding headers. */ + Returns 1 if field was found, 0 if not, -1 if error. */ int mail_cache_lookup_field(struct mail_cache_view *view, buffer_t *dest_buf, - uint32_t seq, enum mail_cache_field field); - -/* Return string field. */ -int mail_cache_lookup_string_field(struct mail_cache_view *view, string_t *dest, - uint32_t seq, enum mail_cache_field field); + uint32_t seq, unsigned int field); -/* Return record flags. */ -enum mail_cache_record_flag -mail_cache_get_record_flags(struct mail_cache_view *view, uint32_t seq); - -/* Update record flags. The cache file must be locked and the flags must be - already inserted to the record. */ -int mail_cache_update_record_flags(struct mail_cache_view *view, uint32_t seq, - enum mail_cache_record_flag flags); +/* Return specified cached headers. Returns 1 if all fields were found, + 0 if not, -1 if error. dest is updated only if all fields were found. */ +int mail_cache_lookup_headers(struct mail_cache_view *view, string_t *dest, + uint32_t seq, unsigned int fields[], + size_t fields_count); /* "Error in index cache file %s: ...". */ void mail_cache_set_corrupted(struct mail_cache *cache, const char *fmt, ...) diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-mail/istream-header-filter.c --- a/src/lib-mail/istream-header-filter.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-mail/istream-header-filter.c Sun Jul 18 05:25:06 2004 +0300 @@ -112,14 +112,16 @@ } } -static void read_and_hide_headers(struct istream *input, - const char *const *headers, - size_t headers_count, buffer_t *dest, - struct message_size *hdr_size) +static void +read_and_hide_headers(struct istream *input, int filter, + const char *const *headers, size_t headers_count, + buffer_t *dest, struct message_size *hdr_size, + header_filter_callback *callback, void *context) { struct message_header_parser_ctx *hdr_ctx; struct message_header_line *hdr; uoff_t virtual_size = 0; + int matched; hdr_ctx = message_parse_header_init(input, hdr_size, FALSE); while ((hdr = message_parse_header_next(hdr_ctx)) != NULL) { @@ -131,8 +133,12 @@ break; } - if (bsearch(hdr->name, headers, headers_count, - sizeof(*headers), bsearch_strcasecmp) != NULL) { + matched = bsearch(hdr->name, headers, headers_count, + sizeof(*headers), bsearch_strcasecmp) != NULL; + if (callback != NULL) + callback(hdr, matched, context); + + if (matched == filter) { /* ignore */ } else if (dest != NULL) { if (!hdr->continued) { @@ -157,8 +163,9 @@ } struct istream * -i_stream_create_header_filter(pool_t pool, struct istream *input, - const char *const *headers, size_t headers_count) +i_stream_create_header_filter(pool_t pool, struct istream *input, int filter, + const char *const *headers, size_t headers_count, + header_filter_callback *callback, void *context) { struct header_filter_istream *mstream; @@ -168,8 +175,11 @@ mstream->headers = buffer_create_dynamic(default_pool, 8192, (size_t)-1); - read_and_hide_headers(input, headers, headers_count, mstream->headers, - &mstream->header_size); + read_and_hide_headers(input, filter, headers, headers_count, + mstream->headers, &mstream->header_size, + callback, context); + if (callback != NULL) + callback(NULL, FALSE, context); mstream->istream.buffer = buffer_get_data(mstream->headers, NULL); mstream->istream.pos = mstream->header_size.virtual_size; diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-mail/istream-header-filter.h --- a/src/lib-mail/istream-header-filter.h Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-mail/istream-header-filter.h Sun Jul 18 05:25:06 2004 +0300 @@ -1,9 +1,14 @@ #ifndef __ISTREAM_HEADER_FILTER_H #define __ISTREAM_HEADER_FILTER_H -/* NOTE: NULL-terminated headers list must be sorted. */ +typedef void header_filter_callback(struct message_header_line *hdr, + int matched, void *context); + +/* NOTE: headers list must be sorted. If filter is TRUE, given headers are + removed from output, otherwise only given headers are included in output. */ struct istream * -i_stream_create_header_filter(pool_t pool, struct istream *input, - const char *const *headers, size_t headers_count); +i_stream_create_header_filter(pool_t pool, struct istream *input, int filter, + const char *const *headers, size_t headers_count, + header_filter_callback *callback, void *context); #endif diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/index/index-mail-headers.c --- a/src/lib-storage/index/index-mail-headers.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/index/index-mail-headers.c Sun Jul 18 05:25:06 2004 +0300 @@ -1,45 +1,12 @@ /* Copyright (C) 2003 Timo Sirainen */ -/* - Headers are stored in 1-4 pieces. There's a list of header names that each - piece contains, so if piece doesn't actually contain some listed header, - it's known not to exist in the mail at all. - - Header name lists are stored in sorted order, so we can use binary - searching. - - We have to be able to do 3 things: - - Get value for one header - - Get a list of headers, possibly containing more than requested - - Save some of the uncached headers into cache - - First is easy. Second means that we have to store the wanted headers in - a single string which we can directly return. - - Third is a bit tricky if we want to avoid parsing and copying the data - uselessly. It's possible if we want to cache all requested uncached - headers. That should be the common case, so I'll optimize for that. - Another even more common case is that everything is already cached. So: - - - If we request only cached headers, parse them and copy only wanted - headers to header_data. - - If we request a non-cached header, trash the header_data and all - pointers to it. Copy all cached headers to beginning if it and save - a marker where it ends. - - If we again request single cached header, we'll have to parse the - header_data up to the marker again. - - When saving the uncached headers, we know that they all come after the - marker. If we want to save them all, it's directly there in a string. - Otherwise we have to parse them and copy the wanted headers, but it's - still less work. -*/ - #include "lib.h" #include "istream.h" #include "buffer.h" #include "str.h" #include "message-date.h" #include "message-parser.h" +#include "istream-header-filter.h" #include "imap-envelope.h" #include "imap-bodystructure.h" #include "index-storage.h" @@ -47,241 +14,148 @@ #include -struct cached_header { - const char *name; - size_t value_idx; /* in header_data */ +struct index_header_lookup_ctx { + struct mailbox_header_lookup_ctx ctx; + pool_t pool; - unsigned int parsing:1; - unsigned int fully_saved:1; + size_t count; + unsigned int *idx; + const char **name; }; -static struct cached_header * -cached_header_find(struct index_mail *mail, const char *name, - unsigned int *idx_r) +static int header_line_cmp(const void *p1, const void *p2) +{ + const struct index_mail_line *l1 = p1, *l2 = p2; + int diff; + + diff = (int)l1->field_idx - (int)l2->field_idx; + return diff != 0 ? diff : + (int)l1->line_num - (int)l2->line_num; +} + +static void index_mail_parse_header_finish(struct index_mail *mail) { - struct cached_header **data; - size_t size; - unsigned int idx, left_idx, right_idx; - int ret; + static uint32_t null = 0; + struct index_mail_line *lines; + const unsigned char *header, *data; + const uint8_t *match; + buffer_t *buf; + size_t i, j, size, data_size, match_idx, match_size; + int noncontiguous; + + t_push(); + + lines = buffer_get_modifyable_data(mail->header_lines, &size); + size /= sizeof(*lines); + + /* sort it first so fields are grouped together and ordered by + line number */ + qsort(lines, size, sizeof(*lines), header_line_cmp); + + match = buffer_get_data(mail->header_match, &match_size); + header = buffer_get_data(mail->header_data, NULL); + buf = buffer_create_dynamic(pool_datastack_create(), 256, (size_t)-1); - data = buffer_get_modifyable_data(mail->data.headers, &size); + for (i = match_idx = 0; i < size; i = j) { + while (match_idx < lines[i].field_idx && + match_idx < match_size) { + if (match[match_idx] == mail->header_match_value) { + /* this header doesn't exist. remember that. */ + mail_cache_add(mail->trans->cache_trans, + mail->data.seq, match_idx, + NULL, 0); + } + match_idx++; + } + match_idx++; + + buffer_set_used_size(buf, 0); + buffer_append(buf, &lines[i].line_num, + sizeof(lines[i].line_num)); + + noncontiguous = FALSE; + for (j = i+1; j < size; j++) { + if (lines[j].field_idx != lines[i].field_idx) + break; - idx = left_idx = 0; - right_idx = size / sizeof(struct cached_header *); + if (lines[j].start_pos != lines[j-1].end_pos) + noncontiguous = TRUE; + buffer_append(buf, &lines[j].line_num, + sizeof(lines[j].line_num)); + } + buffer_append(buf, &null, sizeof(uint32_t)); - while (left_idx < right_idx) { - idx = (left_idx + right_idx) / 2; + if (noncontiguous) { + for (; i < j; i++) { + buffer_append(buf, header + lines[i].start_pos, + lines[i].end_pos - + lines[i].start_pos); + } + } else { + buffer_append(buf, header + lines[i].start_pos, + lines[j-1].end_pos - lines[i].start_pos); + } + + data = buffer_get_data(buf, &data_size); + mail_cache_add(mail->trans->cache_trans, mail->data.seq, + lines[i].field_idx, data, data_size); + } + + t_pop(); +} - ret = strcasecmp(data[idx]->name, name); - if (ret < 0) - left_idx = ++idx; - else if (ret > 0) - right_idx = idx; - else { - if (idx_r != NULL) - *idx_r = idx; - return data[idx]; +void index_mail_parse_header_init(struct index_mail *mail, + struct mailbox_header_lookup_ctx *_headers) +{ + struct index_header_lookup_ctx *headers = + (struct index_header_lookup_ctx *)_headers; + size_t i; + + if (mail->header_data == NULL) { + mail->header_data = + buffer_create_dynamic(default_pool, 4096, (size_t)-1); + mail->header_lines = + buffer_create_dynamic(default_pool, 256, (size_t)-1); + mail->header_match = + buffer_create_dynamic(default_pool, 64, (size_t)-1); + } else { + buffer_set_used_size(mail->header_data, 0); + buffer_set_used_size(mail->header_lines, 0); + } + + if (++mail->header_match_value == 0) { + /* wrapped, we'll have to clear the buffer */ + memset(buffer_get_modifyable_data(mail->header_match, NULL), 0, + buffer_get_size(mail->header_match)); + mail->header_match_value++; + } + + if (headers != NULL) { + for (i = 0; i < headers->count; i++) { + buffer_write(mail->header_match, headers->idx[i], + &mail->header_match_value, 1); } } - if (idx_r != NULL) - *idx_r = idx; - return NULL; -} - -static struct cached_header * -cached_header_add(struct index_mail *mail, const char *name) -{ - struct cached_header *hdr; - unsigned int idx; - - i_assert(*name != '\0'); - - hdr = cached_header_find(mail, name, &idx); - if (hdr != NULL) - return hdr; - - hdr = p_new(mail->pool, struct cached_header, 1); - hdr->name = p_strdup(mail->pool, name); - - buffer_insert(mail->data.headers, idx * sizeof(hdr), &hdr, sizeof(hdr)); - return hdr; -} - -static int strcasecmp_p(const void *p1, const void *p2) -{ - char *const *s1 = p1, *const *s2 = p2; - - return strcasecmp(*s1, *s2); -} - -static const char *const *sort_array(const char *const *arr) -{ - static const char *null = NULL; - buffer_t *buffer; - const char **data; - int i, already_sorted; - - /* copy the wanted_headers array */ - buffer = buffer_create_dynamic(pool_datastack_create(), - 256, (size_t)-1); - already_sorted = TRUE; - for (i = 0; arr[i] != NULL; i++) { - if (i > 0 && already_sorted && - strcasecmp(arr[i], arr[i-1]) <= 0) - already_sorted = FALSE; - buffer_append(buffer, &arr[i], sizeof(const char *)); - } - buffer_append(buffer, &null, sizeof(const char *)); - - /* and sort it */ - data = buffer_get_modifyable_data(buffer, NULL); - if (!already_sorted) - qsort(data, i, sizeof(const char *), strcasecmp_p); - return data; -} - -static int find_wanted_headers(struct mail_cache_view *cache_view, - const char *const wanted_headers[]) -{ - const char *const *headers, *const *tmp; - int i, ret, cmp; - - if (wanted_headers == NULL || *wanted_headers == NULL) - return -1; - - t_push(); - wanted_headers = sort_array(wanted_headers); - - ret = -1; - for (i = MAIL_CACHE_HEADERS_COUNT-1; i >= 0; i--) { - headers = mail_cache_get_header_fields(cache_view, i); - if (headers == NULL) - continue; - - for (tmp = wanted_headers; *headers != NULL; headers++) { - cmp = strcasecmp(*tmp, *headers); - if (cmp == 0) { - if (*++tmp == NULL) - break; - } else { - if (cmp < 0) - break; - } - } - - if (*tmp != NULL) - break; - - /* find the minimum matching header number */ - ret = i; - } - t_pop(); - - return ret; -} - -static int mail_find_wanted_headers(struct index_mail *mail, - const char *const wanted_headers[]) -{ - int idx; - - idx = find_wanted_headers(mail->trans->cache_view, wanted_headers); - if (idx < 0) - return -1; - - for (; idx < MAIL_CACHE_HEADERS_COUNT; idx++) { - if (mail_cache_field_exists(mail->trans->cache_view, - mail->data.seq, - mail_cache_header_fields[idx]) > 0) - return idx; - } - - return -1; -} - -static const char *const *cached_header_get_names(struct index_mail *mail) -{ - const struct cached_header **data; - const char *null = NULL; - buffer_t *buffer; - size_t i, size; - - data = buffer_get_modifyable_data(mail->data.headers, &size); - size /= sizeof(struct cached_header *); - - buffer = buffer_create_dynamic(pool_datastack_create(), - 128, (size_t)-1); - for (i = 0; i < size; i++) - buffer_append(buffer, &data[i]->name, sizeof(const char *)); - buffer_append(buffer, &null, sizeof(const char *)); - - return buffer_get_data(buffer, NULL); -} - -static void cached_headers_mark_fully_saved(struct index_mail *mail) -{ - struct cached_header **data; - size_t i, size; - - data = buffer_get_modifyable_data(mail->data.headers, &size); - size /= sizeof(struct cached_header *); - - for (i = 0; i < size; i++) { - if (data[i]->parsing) { - data[i]->parsing = FALSE; - data[i]->fully_saved = TRUE; + if (mail->wanted_headers != NULL && mail->wanted_headers != headers) { + headers = mail->wanted_headers; + for (i = 0; i < headers->count; i++) { + buffer_write(mail->header_match, headers->idx[i], + &mail->header_match_value, 1); } } } -void index_mail_parse_header_init(struct index_mail *mail, - const char *const headers[]) +static void index_mail_parse_finish_imap_envelope(struct index_mail *mail) { - struct cached_header **data; - size_t i, size; - int cmp; - - if (mail->data.header_data == NULL) - mail->data.header_data = str_new(mail->pool, 4096); - - data = buffer_get_modifyable_data(mail->data.headers, &size); - size /= sizeof(struct cached_header *); + string_t *str; - mail->data.parsing_count = 0; - if (headers == NULL) { - /* parsing all headers */ - for (i = 0; i < size; i++) { - if (!data[i]->fully_saved) { - data[i]->parsing = TRUE; - mail->data.parsing_count++; - } - } - } else { - t_push(); - headers = sort_array(headers); - for (i = 0; i < size && *headers != NULL;) { - cmp = strcasecmp(*headers, data[i]->name); - if (cmp <= 0) { - if (cmp == 0) { - if (!data[i]->fully_saved) { - data[i]->parsing = TRUE; - mail->data.parsing_count++; - } - i++; - } - headers++; - } else { - i++; - } - } - t_pop(); - } + str = str_new(mail->pool, 256); + imap_envelope_write_part_data(mail->data.envelope_data, str); + mail->data.envelope = str_c(str); - if (mail->data.save_sent_date || mail->data.save_envelope) { - /* parse the whole header */ - mail->data.parsing_count = -1; - } + mail_cache_add(mail->trans->cache_trans, mail->data.seq, + MAIL_CACHE_ENVELOPE, str_data(str), str_len(str)); } int index_mail_parse_header(struct message_part *part, @@ -289,27 +163,24 @@ struct index_mail *mail) { struct index_mail_data *data = &mail->data; - struct cached_header *cached_hdr; + enum mail_cache_decision_type decision; + const char *cache_field_name; + unsigned int field_idx; int timezone; - if (data->bodystructure_header_parse) - imap_bodystructure_parse_header(mail->pool, part, hdr); + data->parse_line_num++; - if (part != NULL && part->parent != NULL) - return FALSE; + if (data->save_bodystructure_header) { + i_assert(part != NULL); + imap_bodystructure_parse_header(mail->pool, part, hdr); + } if (data->save_envelope) { imap_envelope_parse_header(mail->pool, &data->envelope_data, hdr); - if (hdr == NULL) { - /* finalize the envelope */ - string_t *str; - - str = str_new(mail->pool, 256); - imap_envelope_write_part_data(data->envelope_data, str); - data->envelope = str_c(str); - } + if (hdr == NULL) + index_mail_parse_finish_imap_envelope(mail); } if (hdr == NULL) { @@ -325,8 +196,8 @@ MAIL_CACHE_SENT_DATE, &data->sent_date, sizeof(data->sent_date)); } - - cached_headers_mark_fully_saved(mail); + index_mail_parse_header_finish(mail); + data->save_bodystructure_header = FALSE; return TRUE; } @@ -347,444 +218,319 @@ } } - cached_hdr = cached_header_find(mail, hdr->name, NULL); - if (cached_hdr != NULL && !cached_hdr->fully_saved) { - if (data->header_stream == NULL) { - if (!hdr->continued) { - str_append(data->header_data, hdr->name); - str_append(data->header_data, ": "); - } - if (cached_hdr->value_idx == 0) { - cached_hdr->value_idx = - str_len(data->header_data); - } - str_append_n(data->header_data, - hdr->value, hdr->value_len); - if (!hdr->no_newline) - str_append(data->header_data, "\n"); - } else { - /* it's already in header_data. it means it's fully - cached and we don't have to worry about other than - the first header line. */ - i_assert(cached_hdr->value_idx == 0); - cached_hdr->value_idx = data->header_stream->v_offset; + if (!hdr->continued) { + t_push(); + cache_field_name = t_strconcat("hdr.", hdr->name, NULL); + data->parse_line.field_idx = + mail_cache_register_lookup(mail->ibox->cache, + cache_field_name); + t_pop(); + } + field_idx = data->parse_line.field_idx; + + if (field_idx == (unsigned int)-1) { + /* we don't want this field */ + return TRUE; + } - cached_hdr->fully_saved = TRUE; - if (--data->parsing_count == 0) - return FALSE; + if (!hdr->continued) { + decision = mail_cache_field_get_decision(mail->ibox->cache, + field_idx); + data->parse_line.cache = + (decision & ~MAIL_CACHE_DECISION_FORCED) != + MAIL_CACHE_DECISION_NO; + if (data->parse_line.cache && + mail_cache_field_exists(mail->trans->cache_view, + data->seq, field_idx) > 0) { + /* already cached */ + data->parse_line.cache = FALSE; } } + + if (!data->parse_line.cache) { + const uint8_t *match; + size_t size; + + match = buffer_get_data(mail->header_match, &size); + if (field_idx >= size || + match[field_idx] != mail->header_match_value) { + /* we don't need to do anything with this header */ + return TRUE; + } + } + + if (!hdr->continued) { + data->parse_line.start_pos = str_len(mail->header_data); + data->parse_line.line_num = data->parse_line_num; + str_append(mail->header_data, hdr->name); + str_append(mail->header_data, ": "); + } + str_append_n(mail->header_data, hdr->value, hdr->value_len); + if (!hdr->no_newline) + str_append(mail->header_data, "\n"); + if (!hdr->continues) { + data->parse_line.end_pos = str_len(mail->header_data); + buffer_append(mail->header_lines, &data->parse_line, + sizeof(data->parse_line)); + } return TRUE; } -static void index_mail_parse_header_cb(struct message_part *part, - struct message_header_line *hdr, - void *context) +static void +index_mail_parse_header_cb(struct message_part *part, + struct message_header_line *hdr, void *context) { struct index_mail *mail = context; (void)index_mail_parse_header(part, hdr, mail); } -static int index_mail_can_cache_headers(struct index_mail *mail) -{ - enum mail_cache_field field; - - field = mail_cache_header_fields[MAIL_CACHE_HEADERS_COUNT-1]; - if (mail_cache_field_exists(mail->trans->cache_view, mail->data.seq, - field) != 0) - return FALSE; /* all headers used */ - - /* FIXME: add some smart checks here. we don't necessarily want to - cache everything.. */ - return TRUE; -} - -static void cached_headers_clear_values(struct index_mail *mail) -{ - struct cached_header **data; - size_t i, size, clear_offset; - - clear_offset = str_len(mail->data.header_data); - data = buffer_get_modifyable_data(mail->data.headers, &size); - size /= sizeof(struct cached_header *); - - for (i = 0; i < size; i++) { - if (data[i]->value_idx >= clear_offset) - data[i]->value_idx = 0; - } -} - -static int parse_cached_headers(struct index_mail *mail, int idx) -{ - struct index_mail_data *data = &mail->data; - struct message_header_parser_ctx *hdr_ctx; - struct message_header_line *hdr; - struct istream *istream; - const char *const *idx_headers; - string_t *str; - - if (idx < data->header_data_cached) { - /* it's already in header_data. */ - istream = i_stream_create_from_data(mail->pool, - str_data(data->header_data), - str_len(data->header_data)); - /* we might be parsing a bit more.. */ - idx = data->header_data_cached-1; - data->header_stream = istream; - } else { - str = str_new(mail->pool, 32); - if (!mail_cache_lookup_string_field( - mail->trans->cache_view, str, data->seq, - mail_cache_header_fields[idx])) { - /* broken - we expected the header to exist */ - return FALSE; - } - - data->header_data_cached_partial = TRUE; - istream = i_stream_create_from_data(mail->pool, str_data(str), - str_len(str)); - } - - idx_headers = mail_cache_get_header_fields(mail->trans->cache_view, - idx); - if (idx_headers == NULL) { - mail_cache_set_corrupted(mail->ibox->cache, - "Headers %d names not found", idx); - return FALSE; - } - - index_mail_parse_header_init(mail, idx_headers); - - hdr_ctx = message_parse_header_init(istream, NULL, TRUE); - while ((hdr = message_parse_header_next(hdr_ctx)) != NULL) { - if (!index_mail_parse_header(NULL, hdr, mail)) - break; - } - message_parse_header_deinit(hdr_ctx); - index_mail_parse_header(NULL, NULL, mail); - - data->header_stream = NULL; - i_stream_unref(istream); - - return TRUE; -} - -static void trash_partial_headers(struct index_mail *mail) -{ - struct index_mail_data *data = &mail->data; - - data->header_data_cached_partial = FALSE; - data->header_data_cached = data->header_data_cached_contiguous; - - str_truncate(data->header_data, data->header_data_uncached_offset); - cached_headers_clear_values(mail); -} - int index_mail_parse_headers(struct index_mail *mail) { struct index_mail_data *data = &mail->data; - const char *const *headers; - int idx, max; if (data->stream == NULL) { if (mail->mail.get_stream(&mail->mail, NULL, NULL) == NULL) return FALSE; } - if (mail->data.header_data == NULL) - mail->data.header_data = str_new(mail->pool, 4096); - - if (!data->header_fully_parsed && index_mail_can_cache_headers(mail)) { - if (data->header_data_cached_partial) { - /* too difficult to handle efficiently, trash it */ - trash_partial_headers(mail); - } - - /* add all cached headers to beginning of header_data */ - idx = data->header_data_cached; max = idx-1; - for (; idx < MAIL_CACHE_HEADERS_COUNT; idx++) { - if (!mail_cache_lookup_string_field( - mail->trans->cache_view, mail->data.header_data, - mail->data.seq, mail_cache_header_fields[idx])) - continue; - - max = idx; - } - data->header_data_cached = max+1; - data->header_data_uncached_offset = - str_len(mail->data.header_data); - - /* make sure we cache everything */ - for (idx = MAIL_CACHE_HEADERS_COUNT-1; idx >= 0; idx--) { - headers = mail_cache_get_header_fields( - mail->trans->cache_view, idx); - if (headers != NULL) - break; - } - - if (headers != NULL) { - while (*headers != NULL) { - cached_header_add(mail, *headers); - headers++; - } - } - - if (max >= 0) { - /* now we'll have to set value_idx for all headers that - are already cached */ - if (!parse_cached_headers(mail, max)) { - /* FIXME: handle better */ - return FALSE; - } - } - - /* it's possible that we're parsing headers without wanting - to save any of them */ - if (buffer_get_used_size(data->headers) != 0) { - data->header_save = TRUE; - data->header_save_idx = idx; - } - } - - data->bodystructure_header_parse = data->bodystructure_header_want; index_mail_parse_header_init(mail, NULL); - if (data->parts != NULL || data->parser_ctx != NULL) { - message_parse_header(data->parts, data->stream, &data->hdr_size, - index_mail_parse_header_cb, mail); - } else { + if (data->parts == NULL && data->parser_ctx == NULL) { + /* initialize bodystructure parsing in case we read the whole + message. */ data->parser_ctx = message_parser_init(mail->pool, data->stream); message_parser_parse_header(data->parser_ctx, &data->hdr_size, index_mail_parse_header_cb, mail); + } else { + /* just read the header */ + message_parse_header(data->parts, data->stream, &data->hdr_size, + index_mail_parse_header_cb, mail); } data->hdr_size_set = TRUE; - - if (data->bodystructure_header_want) { - data->bodystructure_header_want = FALSE; - data->bodystructure_header_parse = FALSE; - data->bodystructure_header_parsed = TRUE; - } - data->parse_header = FALSE; - data->header_fully_parsed = TRUE; return TRUE; } +static void +imap_envelope_parse_callback(struct message_part *part __attr_unused__, + struct message_header_line *hdr, void *context) +{ + struct index_mail *mail = context; + + imap_envelope_parse_header(mail->pool, &mail->data.envelope_data, hdr); + + if (hdr == NULL) + index_mail_parse_finish_imap_envelope(mail); +} + +void index_mail_headers_get_envelope(struct index_mail *mail) +{ + struct mailbox_header_lookup_ctx *header_ctx; + struct istream *stream; + + header_ctx = mailbox_header_lookup_init(&mail->ibox->box, + imap_envelope_headers); + stream = mail->mail.get_headers(&mail->mail, header_ctx); + if (mail->data.envelope == NULL) { + /* we got the headers from cache - parse them to get the + envelope */ + message_parse_header(NULL, stream, NULL, + imap_envelope_parse_callback, mail); + mail->data.save_envelope = FALSE; + } + mailbox_header_lookup_deinit(header_ctx); +} + +static unsigned int +get_header_field_idx(struct index_mailbox *ibox, const char *field) +{ + struct mail_cache_field header_field = { + NULL, 0, MAIL_CACHE_FIELD_HEADER, 0, + MAIL_CACHE_DECISION_TEMP, 0, 0 + }; + const char *cache_field_name; + unsigned int field_idx; + + t_push(); + cache_field_name = t_strconcat("hdr.", field, NULL); + field_idx = mail_cache_register_lookup(ibox->cache, cache_field_name); + if (field_idx == (unsigned int)-1) { + header_field.name = cache_field_name; + mail_cache_register_fields(ibox->cache, &header_field, 1); + field_idx = header_field.idx; + } + t_pop(); + return field_idx; +} + const char *index_mail_get_header(struct mail *_mail, const char *field) { - struct index_mail *mail = (struct index_mail *) _mail; - struct cached_header *hdr; - const unsigned char *start, *end, *p; - const char *arr[2]; - int idx; + struct index_mail *mail = (struct index_mail *)_mail; + const unsigned char *data; + unsigned int field_idx; + string_t *dest; + size_t i, len, value_pos; + int ret; - hdr = cached_header_add(mail, field); - if (!hdr->fully_saved) { - if (mail->data.parse_header) { - /* we need to parse header anyway */ - idx = -1; - } else { - arr[0] = field; arr[1] = NULL; - idx = mail_find_wanted_headers(mail, arr); + field_idx = get_header_field_idx(mail->ibox, field); - if (idx >= 0) { - if (!parse_cached_headers(mail, idx)) { - /* broken cache, parse again */ - idx = -1; - } - } - } - - if (idx < 0) { - if (!index_mail_parse_headers(mail)) - return NULL; + dest = str_new(mail->pool, 128); + if (mail_cache_lookup_headers(mail->trans->cache_view, dest, + mail->data.seq, &field_idx, 1) <= 0) { + /* not in cache / error */ + if (index_mail_parse_headers(mail) < 0) + return NULL; - /* might have been moved in memory, get it again */ - hdr = cached_header_find(mail, field, NULL); - } - } - - if (hdr->value_idx == 0) - return NULL; - - start = str_data(mail->data.header_data); - end = start + str_len(mail->data.header_data); - start += hdr->value_idx; - for (p = start; p != end; p++) { - if (*p == '\n') { - if (p+1 == end || (p[1] != ' ' && p[1] != '\t')) - break; - } + ret = mail_cache_lookup_headers(mail->trans->cache_view, dest, + mail->data.seq, &field_idx, 1); + i_assert(ret != 0); + if (ret < 0) + return NULL; } - return t_strdup_until(start, p); -} - -struct istream *index_mail_get_headers(struct mail *_mail, - const char *const minimum_fields[]) -{ - struct index_mail *mail = (struct index_mail *) _mail; - struct index_mail_data *data = &mail->data; - struct cached_header *hdr; - const char *const *tmp; - int i, idx, all_saved; - - i_assert(*minimum_fields != NULL); - - if (data->header_data == NULL) - data->header_data = str_new(mail->pool, 4096); + /* cached. skip "header name: " in dest. */ + data = str_data(dest); + len = str_len(dest); + for (i = 0; i < len; i++) { + if (data[i] == ':') { + if (i+1 != len && data[++i] == ' ') i++; + break; + } + } + value_pos = i; - idx = mail_find_wanted_headers(mail, minimum_fields); - if (idx >= 0) { - /* copy from cache to header_data */ - if (data->header_data_cached_partial) { - /* Some headers may already partially be in - header_data, we don't want them twice */ - trash_partial_headers(mail); - } - for (i = data->header_data_cached; i <= idx; i++) { - (void)mail_cache_lookup_string_field( - mail->trans->cache_view, data->header_data, - data->seq, mail_cache_header_fields[i]); - } - data->header_data_cached = idx+1; - data->header_data_uncached_offset = str_len(data->header_data); - data->header_data_cached_contiguous = idx+1; - } else { - /* it's not cached yet - see if we have them parsed */ - all_saved = TRUE; - for (tmp = minimum_fields; *tmp != NULL; tmp++) { - hdr = cached_header_add(mail, *tmp); - if (!hdr->fully_saved) - all_saved = FALSE; - } - - if (!all_saved) { - if (!index_mail_parse_headers(mail)) - return NULL; + /* return only the first field in case there's multiple. */ + for (; i < len; i++) { + if (data[i] == '\n') { + if (i+1 == len || + (data[i+1] != ' ' && data[i+1] != '\t')) { + buffer_set_used_size(dest, i); + break; + } } } - return i_stream_create_from_data(mail->pool, - str_data(data->header_data), - str_len(data->header_data)); + return str_c(dest) + value_pos; +} + +static void header_cache_callback(struct message_header_line *hdr, + int matched __attr_unused__, void *context) +{ + struct index_mail *mail = context; + + (void)index_mail_parse_header(NULL, hdr, mail); } -void index_mail_headers_init(struct index_mail *mail) +struct istream * +index_mail_get_headers(struct mail *_mail, + struct mailbox_header_lookup_ctx *_headers) { - struct mail_cache_view *cache_view = mail->trans->cache_view; - int idx = -2, idx2 = -2; + struct index_mail *mail = (struct index_mail *)_mail; + struct index_header_lookup_ctx *headers = + (struct index_header_lookup_ctx *)_headers; + string_t *dest; + + dest = str_new(mail->pool, 256); + if (mail_cache_lookup_headers(mail->trans->cache_view, dest, + mail->data.seq, headers->idx, + headers->count) > 0) { + return i_stream_create_from_data(mail->pool, + str_data(dest), str_len(dest)); + } + /* not in cache / error */ + p_free(mail->pool, dest); - if (mail->wanted_headers != NULL && *mail->wanted_headers != NULL) - idx = find_wanted_headers(cache_view, mail->wanted_headers); + if (mail->data.stream == NULL) { + if (mail->mail.get_stream(&mail->mail, NULL, NULL) == NULL) + return FALSE; + } + + if (mail->data.filter_stream != NULL) + i_stream_unref(mail->data.filter_stream); - if (idx != -1 && (mail->wanted_fields & MAIL_FETCH_IMAP_ENVELOPE)) - idx2 = find_wanted_headers(cache_view, imap_envelope_headers); + index_mail_parse_header_init(mail, _headers); + mail->data.filter_stream = + i_stream_create_header_filter(mail->pool, mail->data.stream, + FALSE, + headers->name, headers->count, + header_cache_callback, mail); + return mail->data.filter_stream; +} - mail->wanted_headers_idx = idx == -1 || idx2 == -1 ? -1 : - idx > idx2 ? idx : idx2; +static int strcasecmp_p(const void *p1, const void *p2) +{ + const char *const *s1 = p1, *const *s2 = p2; + + return strcasecmp(*s1, *s2); } -void index_mail_headers_init_next(struct index_mail *mail) +struct mailbox_header_lookup_ctx * +index_header_lookup_init(struct mailbox *box, const char *const headers[]) { - struct index_mail_data *data = &mail->data; - const char *const *tmp; - int idx; + struct index_mailbox *ibox = (struct index_mailbox *)box; + struct mail_cache_field *fields, header_field = { + NULL, 0, MAIL_CACHE_FIELD_HEADER, 0, + MAIL_CACHE_DECISION_TEMP, 0, 0 + }; + struct index_header_lookup_ctx *ctx; + const char *const *name; + const char **sorted_headers; + buffer_t *buf; + pool_t pool; + size_t i, size; - mail->data.headers = buffer_create_dynamic(mail->pool, 64, (size_t)-1); + for (size = 0, name = headers; *name != NULL; name++) + size++; + + t_push(); - idx = mail->wanted_headers_idx; - if (mail->wanted_headers != NULL) { - const char *const *tmp; - - for (tmp = mail->wanted_headers; *tmp != NULL; tmp++) - cached_header_add(mail, *tmp); + if (size > 0) { + /* headers need to be sorted for filter stream. */ + sorted_headers = t_new(const char *, size); + memcpy(sorted_headers, headers, size * sizeof(*sorted_headers)); + qsort(sorted_headers, size, sizeof(*sorted_headers), + strcasecmp_p); + headers = sorted_headers; } - if (mail->wanted_fields & MAIL_FETCH_IMAP_ENVELOPE) { - for (tmp = imap_envelope_headers; *tmp != NULL; tmp++) - cached_header_add(mail, *tmp); - } else if ((mail->wanted_fields & MAIL_FETCH_DATE) && - data->sent_date.time == (time_t)-1) { - cached_header_add(mail, "Date"); - if (idx != -1) { - /* see if it's cached */ - const char *headers[] = { "Date", NULL }; - idx = mail_find_wanted_headers(mail, headers); + buf = buffer_create_dynamic(pool_datastack_create(), 128, (size_t)-1); + for (i = 0; i < size; i++) { + header_field.name = t_strconcat("hdr.", headers[i], NULL); + buffer_append(buf, &header_field, sizeof(header_field)); + } + + fields = buffer_get_modifyable_data(buf, &size); + size /= sizeof(*fields); + mail_cache_register_fields(ibox->cache, fields, size); + + pool = pool_alloconly_create("index_header_lookup_ctx", 256); + ctx = p_new(pool, struct index_header_lookup_ctx, 1); + ctx->ctx.box = box; + ctx->pool = pool; + ctx->count = size; + + if (size > 0) { + ctx->idx = p_new(pool, unsigned int, size); + ctx->name = p_new(pool, const char *, size); + + /* @UNSAFE */ + for (i = 0; i < size; i++) { + ctx->idx[i] = fields[i].idx; + ctx->name[i] = p_strdup(pool, sorted_headers[i]); } } - /* See if we're going to have to parse the header */ - if (idx != -2) { - if (idx >= 0) { - for (; idx < MAIL_CACHE_HEADERS_COUNT; idx++) { - if (mail_cache_field_exists( - mail->trans->cache_view, - data->seq, - mail_cache_header_fields[idx]) > 0) - break; - } - } - if (idx < 0 || idx >= MAIL_CACHE_HEADERS_COUNT) - data->parse_header = TRUE; - } -} - -static int find_unused_header_idx(struct mail_cache_view *cache_view) -{ - int i; - - for (i = 0; i < MAIL_CACHE_HEADERS_COUNT; i++) { - if (mail_cache_get_header_fields(cache_view, i) == NULL) - return i; - } - - return -1; + t_pop(); + return &ctx->ctx; } -void index_mail_headers_close(struct index_mail *mail) +void index_header_lookup_deinit(struct mailbox_header_lookup_ctx *_ctx) { - struct index_mail_data *data = &mail->data; - const char *str, *const *headers; - size_t len; - int idx; - - if (!data->header_save) - return; + struct index_header_lookup_ctx *ctx = + (struct index_header_lookup_ctx *)_ctx; - /* FIXME: this breaks if fetch_uid() and fetch/search are both - accessing headers from same message. index_mails should probably be - shared.. */ - headers = cached_header_get_names(mail); - idx = find_wanted_headers(mail->trans->cache_view, headers); - if (idx >= 0) { - /* all headers found */ - if (idx != mail->data.header_save_idx) { - mail_cache_set_corrupted(mail->ibox->cache, - "Duplicated header names list (%d and %d)", - idx, mail->data.header_save_idx); - } - } else { - /* there's some new headers */ - idx = find_unused_header_idx(mail->trans->cache_view); - if (idx < 0) - return; - - if (!mail_cache_set_header_fields(mail->trans->cache_trans, - idx, headers)) - return; - } - - str = str_c(mail->data.header_data) + data->header_data_uncached_offset; - len = str_len(mail->data.header_data) - - data->header_data_uncached_offset; - - if (idx >= data->header_data_cached) { - mail_cache_add(mail->trans->cache_trans, data->seq, - mail_cache_header_fields[idx], str, len+1); - } - data->header_save = FALSE; + pool_unref(ctx->pool); } diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/index/index-mail.c --- a/src/lib-storage/index/index-mail.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/index/index-mail.c Sun Jul 18 05:25:06 2004 +0300 @@ -13,6 +13,22 @@ #include "index-storage.h" #include "index-mail.h" +struct mail_cache_field cache_fields[MAIL_CACHE_FIELD_COUNT] = { + { "index.flags", 0, MAIL_CACHE_FIELD_BITMASK, + sizeof(uint32_t), 0, 0, 0 }, + { "date.sent", 0, MAIL_CACHE_FIELD_FIXED_SIZE, + sizeof(struct mail_sent_date), 0, 0, 0 }, + { "date.received", 0, MAIL_CACHE_FIELD_FIXED_SIZE, + sizeof(time_t), 0, 0, 0 }, + { "size.virtual", 0, MAIL_CACHE_FIELD_FIXED_SIZE, + sizeof(uoff_t), 0, 0, 0 }, + { "imap.body", 0, MAIL_CACHE_FIELD_STRING, 0, 0, 0, 0 }, + { "imap.bodystructure", 0, MAIL_CACHE_FIELD_STRING, 0, 0, 0, 0 }, + { "imap.envelope", 0, MAIL_CACHE_FIELD_STRING, 0, 0, 0, 0 }, + { "mime.parts", 0, MAIL_CACHE_FIELD_VARIABLE_SIZE, 0, 0, 0, 0 }, + { "mail.uid", 0, MAIL_CACHE_FIELD_STRING, 0, 0, 0, 0 } +}; + static void index_mail_parse_body(struct index_mail *mail, int need_parts); static struct message_part *get_cached_parts(struct index_mail *mail) @@ -25,8 +41,8 @@ part_buf = buffer_create_dynamic(pool_datastack_create(), 128, (size_t)-1); if (mail_cache_lookup_field(mail->trans->cache_view, part_buf, - mail->data.seq, - MAIL_CACHE_MESSAGEPART) <= 0) { + mail->data.seq, + cache_fields[MAIL_CACHE_MESSAGEPART].idx) <= 0) { t_pop(); return NULL; } @@ -56,13 +72,14 @@ } const char *index_mail_get_cached_string(struct index_mail *mail, - enum mail_cache_field field) + enum index_cache_field field) { string_t *str; str = str_new(mail->pool, 32); - if (mail_cache_lookup_string_field(mail->trans->cache_view, str, - mail->data.seq, field) <= 0) { + if (mail_cache_lookup_field(mail->trans->cache_view, str, + mail->data.seq, + cache_fields[field].idx) <= 0) { p_free(mail->pool, str); return NULL; } @@ -71,7 +88,7 @@ } static int index_mail_get_fixed_field(struct index_mail *mail, - enum mail_cache_field field, + enum index_cache_field field, void *data, size_t data_size) { buffer_t *buf; @@ -80,7 +97,8 @@ t_push(); buf = buffer_create_data(pool_datastack_create(), data, data_size); if (mail_cache_lookup_field(mail->trans->cache_view, buf, - mail->data.seq, field) <= 0) { + mail->data.seq, + cache_fields[field].idx) <= 0) { ret = FALSE; } else { i_assert(buffer_get_used_size(buf) == data_size); @@ -92,11 +110,12 @@ } uoff_t index_mail_get_cached_uoff_t(struct index_mail *mail, - enum mail_cache_field field) + enum index_cache_field field) { uoff_t uoff; - if (!index_mail_get_fixed_field(mail, field, &uoff, sizeof(uoff))) + if (!index_mail_get_fixed_field(mail, cache_fields[field].idx, + &uoff, sizeof(uoff))) uoff = (uoff_t)-1; return uoff; @@ -207,7 +226,8 @@ } data->sent_date.timezone = tz; mail_cache_add(mail->trans->cache_trans, mail->data.seq, - MAIL_CACHE_SENT_DATE, &data->sent_date, + cache_fields[MAIL_CACHE_SENT_DATE].idx, + &data->sent_date, sizeof(data->sent_date)); } } @@ -258,9 +278,9 @@ return data->size; } -static void parse_bodystructure_header(struct message_part *part, - struct message_header_line *hdr, - void *context) +static void parse_bodystructure_part_header(struct message_part *part, + struct message_header_line *hdr, + void *context) { pool_t pool = context; @@ -281,11 +301,12 @@ i_stream_seek(data->stream, data->hdr_size.physical_size); - if (data->bodystructure_header_parsed) { + if (data->save_bodystructure_body) { /* bodystructure header is parsed, we want the body's mime headers too */ + i_assert(!data->save_bodystructure_header); message_parser_parse_body(data->parser_ctx, - parse_bodystructure_header, + parse_bodystructure_part_header, NULL, mail->pool); } else { message_parser_parse_body(data->parser_ctx, NULL, NULL, NULL); @@ -307,7 +328,7 @@ } /* update cache_flags */ - cache_flags = + /*FIXME:cache_flags = mail_cache_get_record_flags(mail->trans->cache_view, mail->data.seq); if (mail->mail.has_nuls) @@ -317,16 +338,16 @@ (void)mail_cache_update_record_flags(mail->trans->cache_view, mail->data.seq, - cache_flags); + cache_flags);*/ } /* see if we want to cache the message part */ if (mail_cache_field_exists(mail->trans->cache_view, mail->data.seq, - MAIL_CACHE_MESSAGEPART) != 0) + cache_fields[MAIL_CACHE_MESSAGEPART].idx) != 0) return; decision = mail_cache_field_get_decision(mail->ibox->cache, - MAIL_CACHE_MESSAGEPART); + cache_fields[MAIL_CACHE_MESSAGEPART].idx); if (decision != (MAIL_CACHE_DECISION_NO | MAIL_CACHE_DECISION_FORCED) && (decision != MAIL_CACHE_DECISION_NO || need_parts || (mail->wanted_fields & MAIL_FETCH_MESSAGE_PARTS) != 0)) { @@ -337,7 +358,8 @@ buf_data = buffer_get_data(buffer, &buf_size); mail_cache_add(mail->trans->cache_trans, mail->data.seq, - MAIL_CACHE_MESSAGEPART, buf_data, buf_size); + cache_fields[MAIL_CACHE_MESSAGEPART].idx, + buf_data, buf_size); t_pop(); } } @@ -378,15 +400,17 @@ } static void index_mail_parse_bodystructure(struct index_mail *mail, - enum mail_cache_field field) + enum index_cache_field field) { struct index_mail_data *data = &mail->data; enum mail_cache_decision_type dec; string_t *str; int bodystructure_cached = FALSE; - if (!data->bodystructure_header_parsed) { - data->bodystructure_header_want = TRUE; + if (data->save_bodystructure_header || !data->save_bodystructure_body) { + /* we haven't parsed the header yet */ + data->save_bodystructure_header = TRUE; + data->save_bodystructure_body = TRUE; if (!index_mail_parse_headers(mail)) return; } @@ -394,18 +418,18 @@ if (data->parts != NULL) { i_assert(data->parts->next == NULL); message_parse_from_parts(data->parts->children, data->stream, - parse_bodystructure_header, + parse_bodystructure_part_header, mail->pool); } else { index_mail_parse_body(mail, FALSE); } dec = mail_cache_field_get_decision(mail->ibox->cache, - MAIL_CACHE_BODYSTRUCTURE); + cache_fields[MAIL_CACHE_BODYSTRUCTURE].idx); if (field == MAIL_CACHE_BODYSTRUCTURE || ((dec & ~MAIL_CACHE_DECISION_FORCED) != MAIL_CACHE_DECISION_NO && mail_cache_field_exists(mail->trans->cache_view, data->seq, - MAIL_CACHE_BODYSTRUCTURE)) == 0) { + cache_fields[MAIL_CACHE_BODYSTRUCTURE].idx) == 0)) { str = str_new(mail->pool, 128); imap_bodystructure_write(data->parts, str, TRUE); data->bodystructure = str_c(str); @@ -413,17 +437,18 @@ if (dec != (MAIL_CACHE_DECISION_NO | MAIL_CACHE_DECISION_FORCED)) { mail_cache_add(mail->trans->cache_trans, data->seq, - MAIL_CACHE_BODYSTRUCTURE, - str_c(str), str_len(str)+1); + cache_fields[MAIL_CACHE_BODYSTRUCTURE].idx, + str_c(str), str_len(str)+1); bodystructure_cached = TRUE; } } - dec = mail_cache_field_get_decision(mail->ibox->cache, MAIL_CACHE_BODY); + dec = mail_cache_field_get_decision(mail->ibox->cache, + cache_fields[MAIL_CACHE_BODY].idx); if (field == MAIL_CACHE_BODY || ((dec & ~MAIL_CACHE_DECISION_FORCED) != MAIL_CACHE_DECISION_NO && mail_cache_field_exists(mail->trans->cache_view, data->seq, - MAIL_CACHE_BODY)) == 0) { + cache_fields[MAIL_CACHE_BODY].idx) == 0)) { str = str_new(mail->pool, 128); imap_bodystructure_write(data->parts, str, FALSE); data->body = str_c(str); @@ -431,7 +456,7 @@ if (!bodystructure_cached && dec != (MAIL_CACHE_DECISION_NO | MAIL_CACHE_DECISION_FORCED)) { mail_cache_add(mail->trans->cache_trans, data->seq, - MAIL_CACHE_BODY, + cache_fields[MAIL_CACHE_BODY].idx, str_c(str), str_len(str)+1); } } @@ -455,16 +480,16 @@ depending on what we want cached */ str = str_new(mail->pool, 128); - if (mail_cache_lookup_string_field(mail->trans->cache_view, - str, mail->data.seq, - MAIL_CACHE_BODY) > 0) { + if (mail_cache_lookup_field(mail->trans->cache_view, str, + mail->data.seq, + cache_fields[MAIL_CACHE_BODY].idx) > 0) { data->body = str_c(str); return data->body; } - if (mail_cache_lookup_string_field(mail->trans->cache_view, - str, mail->data.seq, - MAIL_CACHE_BODYSTRUCTURE) > 0) { - data->bodystructure = str_c(str); + if (mail_cache_lookup_field(mail->trans->cache_view, str, + mail->data.seq, + cache_fields[MAIL_CACHE_BODYSTRUCTURE].idx) > 0) { + data->bodystructure = p_strdup(mail->pool, str_c(str)); str_truncate(str, 0); if (imap_body_parse_from_bodystructure( @@ -474,10 +499,10 @@ } /* broken, continue.. */ - data->bodystructure = NULL; mail_cache_set_corrupted(mail->ibox->cache, "Corrupted BODYSTRUCTURE for mail %u", mail->mail.uid); + data->bodystructure = NULL; } p_free(mail->pool, str); @@ -488,9 +513,9 @@ return data->bodystructure; str = str_new(mail->pool, 128); - if (mail_cache_lookup_string_field(mail->trans->cache_view, - str, mail->data.seq, - MAIL_CACHE_BODYSTRUCTURE) > 0) { + if (mail_cache_lookup_field(mail->trans->cache_view, str, + mail->data.seq, + cache_fields[MAIL_CACHE_BODYSTRUCTURE].idx) > 0) { data->bodystructure = str_c(str); return data->bodystructure; } @@ -499,11 +524,8 @@ index_mail_parse_bodystructure(mail, MAIL_CACHE_BODYSTRUCTURE); return data->bodystructure; case MAIL_FETCH_IMAP_ENVELOPE: - if (data->envelope != NULL) - return data->envelope; - - data->save_envelope = TRUE; - (void)_mail->get_header(_mail, "Date"); + if (data->envelope == NULL) + index_mail_headers_get_envelope(mail); return data->envelope; case MAIL_FETCH_FROM_ENVELOPE: return NULL; @@ -523,8 +545,10 @@ void index_mail_init(struct index_transaction_context *t, struct index_mail *mail, enum mail_fetch_field wanted_fields, - const char *const wanted_headers[]) + struct mailbox_header_lookup_ctx *_wanted_headers) { + struct index_header_lookup_ctx *wanted_headers = + (struct index_header_lookup_ctx *)_wanted_headers; const struct mail_index_header *hdr; int ret; @@ -541,16 +565,14 @@ mail->trans = t; mail->wanted_fields = wanted_fields; mail->wanted_headers = wanted_headers; - - index_mail_headers_init(mail); } static void index_mail_close(struct index_mail *mail) { if (mail->data.stream != NULL) i_stream_unref(mail->data.stream); - - index_mail_headers_close(mail); + if (mail->data.filter_stream != NULL) + i_stream_unref(mail->data.filter_stream); } int index_mail_next(struct index_mail *mail, uint32_t seq) @@ -564,13 +586,12 @@ return -1; } - t_push(); + index_mail_close(mail); - index_mail_close(mail); memset(data, 0, sizeof(*data)); p_clear(mail->pool); - cache_flags = mail_cache_get_record_flags(mail->trans->cache_view, seq); + cache_flags = 0;//FIXME:mail_cache_get_record_flags(mail->trans->cache_view, seq); mail->mail.seq = seq; mail->mail.uid = rec->uid; @@ -583,6 +604,8 @@ data->size = (uoff_t)-1; data->received_date = data->sent_date.time = (time_t)-1; + t_push(); + /* if some wanted fields are cached, get them */ if (mail->wanted_fields & MAIL_FETCH_MESSAGE_PARTS) data->parts = get_cached_parts(mail); @@ -611,20 +634,20 @@ data->parts = get_cached_parts(mail); data->open_mail = TRUE; data->parse_header = data->parts == NULL; - data->bodystructure_header_want = TRUE; + data->save_bodystructure_header = TRUE; + data->save_bodystructure_body = TRUE; } else if ((mail->wanted_fields & MAIL_FETCH_IMAP_BODY) && data->body == NULL && data->bodystructure == NULL) { if (data->parts == NULL) data->parts = get_cached_parts(mail); data->open_mail = TRUE; data->parse_header = data->parts == NULL; - data->bodystructure_header_want = TRUE; + data->save_bodystructure_header = TRUE; + data->save_bodystructure_body = TRUE; } else if (mail->wanted_fields & (MAIL_FETCH_STREAM_HEADER | MAIL_FETCH_STREAM_BODY)) data->open_mail = TRUE; - index_mail_headers_init_next(mail); - if ((mail->wanted_fields & MAIL_FETCH_DATE) && data->sent_date.time == (time_t)-1) data->save_sent_date = TRUE; @@ -641,9 +664,14 @@ if (mail->ibox->mail_deinit != NULL) mail->ibox->mail_deinit(mail); - t_push(); index_mail_close(mail); - t_pop(); + + if (mail->header_data != NULL) + buffer_free(mail->header_data); + if (mail->header_lines != NULL) + buffer_free(mail->header_lines); + if (mail->header_match != NULL) + buffer_free(mail->header_match); pool_unref(mail->pool); memset(mail, 0, sizeof(*mail)); diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/index/index-mail.h --- a/src/lib-storage/index/index-mail.h Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/index/index-mail.h Sun Jul 18 05:25:06 2004 +0300 @@ -5,6 +5,50 @@ #include "mail-cache.h" #include "mail-storage-private.h" +enum index_cache_field { + /* fixed size fields */ + MAIL_CACHE_INDEX_FLAGS = 0, + MAIL_CACHE_SENT_DATE, + MAIL_CACHE_RECEIVED_DATE, + MAIL_CACHE_VIRTUAL_FULL_SIZE, + + /* variable sized field */ + MAIL_CACHE_BODY, + MAIL_CACHE_BODYSTRUCTURE, + MAIL_CACHE_ENVELOPE, + MAIL_CACHE_MESSAGEPART, + MAIL_CACHE_UID_STRING, + + MAIL_CACHE_FIELD_COUNT +}; +extern struct mail_cache_field cache_fields[MAIL_CACHE_FIELD_COUNT]; + +enum mail_cache_record_flag { + /* If binary flags are set, it's not checked whether mail is + missing CRs. So this flag may be set as an optimization for + regular non-binary mails as well if it's known that it contains + valid CR+LF line breaks. */ + MAIL_INDEX_FLAG_BINARY_HEADER = 0x0001, + MAIL_INDEX_FLAG_BINARY_BODY = 0x0002, + + /* Mail header or body is known to contain NUL characters. */ + MAIL_INDEX_FLAG_HAS_NULS = 0x0004, + /* Mail header or body is known to not contain NUL characters. */ + MAIL_INDEX_FLAG_HAS_NO_NULS = 0x0008 +}; + +struct mail_sent_date { + time_t time; + int32_t timezone; +}; + +struct index_mail_line { + unsigned int field_idx; + uint32_t start_pos, end_pos; + uint32_t line_num; + unsigned int cache:1; +}; + struct message_header_line; struct index_mail_data { @@ -13,13 +57,8 @@ uoff_t size; struct mail_sent_date sent_date; - - buffer_t *headers; - string_t *header_data; - int header_data_cached, header_data_cached_contiguous; - size_t header_data_uncached_offset; - struct istream *header_stream; - int header_save_idx; + struct index_mail_line parse_line; + uint32_t parse_line_num; struct message_part *parts; const char *envelope, *body, *bodystructure, *uid_string; @@ -28,24 +67,20 @@ uint32_t seq; const struct mail_index_record *rec; - struct istream *stream; + struct istream *stream, *filter_stream; struct message_size hdr_size, body_size; struct message_parser_ctx *parser_ctx; int parsing_count; unsigned int parse_header:1; - unsigned int bodystructure_header_want:1; - unsigned int bodystructure_header_parse:1; - unsigned int bodystructure_header_parsed:1; unsigned int save_envelope:1; unsigned int save_sent_date:1; + unsigned int save_bodystructure_header:1; + unsigned int save_bodystructure_body:1; unsigned int hdr_size_set:1; unsigned int body_size_set:1; + unsigned int open_mail:1; unsigned int deleted:1; - unsigned int header_data_cached_partial:1; - unsigned int header_fully_parsed:1; - unsigned int header_save:1; - unsigned int open_mail:1; }; struct index_mail { @@ -55,37 +90,37 @@ pool_t pool; struct index_mailbox *ibox; struct index_transaction_context *trans; - unsigned int expunge_counter; - buffer_t *header_buf; uint32_t uid_validity; enum mail_fetch_field wanted_fields; - const char *const *wanted_headers; - int wanted_headers_idx; + struct index_header_lookup_ctx *wanted_headers; + + /* per-mail variables, here for performance reasons: */ + string_t *header_data; + buffer_t *header_lines; + buffer_t *header_match; + uint8_t header_match_value; }; void index_mail_init(struct index_transaction_context *t, struct index_mail *mail, enum mail_fetch_field wanted_fields, - const char *const wanted_headers[]); + struct mailbox_header_lookup_ctx *wanted_headers); int index_mail_next(struct index_mail *mail, uint32_t seq); void index_mail_deinit(struct index_mail *mail); void index_mail_parse_header_init(struct index_mail *mail, - const char *const headers[]); + struct mailbox_header_lookup_ctx *headers); int index_mail_parse_header(struct message_part *part, struct message_header_line *hdr, struct index_mail *mail); - int index_mail_parse_headers(struct index_mail *mail); - -void index_mail_headers_init(struct index_mail *mail); -void index_mail_headers_init_next(struct index_mail *mail); -void index_mail_headers_close(struct index_mail *mail); +void index_mail_headers_get_envelope(struct index_mail *mail); const char *index_mail_get_header(struct mail *_mail, const char *field); -struct istream *index_mail_get_headers(struct mail *_mail, - const char *const minimum_fields[]); +struct istream * +index_mail_get_headers(struct mail *_mail, + struct mailbox_header_lookup_ctx *headers); const struct mail_full_flags *index_mail_get_flags(struct mail *_mail); const struct message_part *index_mail_get_parts(struct mail *_mail); @@ -104,9 +139,9 @@ int index_mail_expunge(struct mail *mail); const char *index_mail_get_cached_string(struct index_mail *mail, - enum mail_cache_field field); + enum index_cache_field field); uoff_t index_mail_get_cached_uoff_t(struct index_mail *mail, - enum mail_cache_field field); + enum index_cache_field field); uoff_t index_mail_get_cached_virtual_size(struct index_mail *mail); time_t index_mail_get_cached_received_date(struct index_mail *mail); diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/index/index-search.c --- a/src/lib-storage/index/index-search.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/index/index-search.c Sun Jul 18 05:25:06 2004 +0300 @@ -444,7 +444,7 @@ if (hdr->eoh) return; - index_mail_parse_header(part, hdr, &ctx->index_context->imail); + index_mail_parse_header(NULL, hdr, &ctx->index_context->imail); if (ctx->custom_header || strcasecmp(hdr->name, "Date") == 0) { ctx->hdr = hdr; @@ -482,6 +482,7 @@ struct index_search_context *ctx) { struct istream *input; + struct mailbox_header_lookup_ctx *headers_ctx; const char *const *headers; int have_headers, have_body; @@ -496,20 +497,33 @@ if (have_body) headers = NULL; - input = headers == NULL ? - ctx->mail->get_stream(ctx->mail, NULL, NULL) : - ctx->mail->get_headers(ctx->mail, headers); - if (input == NULL) - return FALSE; + if (headers == NULL) { + headers_ctx = NULL; + input = ctx->mail->get_stream(ctx->mail, NULL, NULL); + if (input == NULL) + return FALSE; + } else { + /* FIXME: do this once in init */ + headers_ctx = + mailbox_header_lookup_init(&ctx->ibox->box, + headers); + input = ctx->mail->get_headers(ctx->mail, headers_ctx); + if (input == NULL) { + mailbox_header_lookup_deinit(headers_ctx); + return FALSE; + } + } memset(&hdr_ctx, 0, sizeof(hdr_ctx)); hdr_ctx.index_context = ctx; hdr_ctx.custom_header = TRUE; hdr_ctx.args = args; - index_mail_parse_header_init(&ctx->imail, headers); + index_mail_parse_header_init(&ctx->imail, headers_ctx); message_parse_header(NULL, input, NULL, search_header, &hdr_ctx); + if (headers_ctx != NULL) + mailbox_header_lookup_deinit(headers_ctx); } else { struct message_size hdr_size; @@ -720,7 +734,7 @@ const char *charset, struct mail_search_arg *args, const enum mail_sort_type *sort_program, enum mail_fetch_field wanted_fields, - const char *const wanted_headers[]) + struct mailbox_header_lookup_ctx *wanted_headers) { struct index_transaction_context *t = (struct index_transaction_context *)_t; diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/index/index-storage.c --- a/src/lib-storage/index/index-storage.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/index/index-storage.c Sun Jul 18 05:25:06 2004 +0300 @@ -11,6 +11,8 @@ #include #include +#define DEFAULT_NEVER_CACHE_FIELDS "imap.envelope" + /* How many seconds to keep index opened for reuse after it's been closed */ #define INDEX_CACHE_TIMEOUT 10 /* How many closed indexes to keep */ @@ -173,28 +175,9 @@ destroy_unrefed(TRUE); } -static void set_cache_fields(const char *fields, - enum mail_cache_decision_type dest[32], - enum mail_cache_decision_type dec) +static void set_cache_decisions(const char *fields, + enum mail_cache_decision_type dec) { - static enum mail_cache_field field_enums[] = { - MAIL_CACHE_SENT_DATE, - MAIL_CACHE_RECEIVED_DATE, - MAIL_CACHE_VIRTUAL_FULL_SIZE, - MAIL_CACHE_BODY, - MAIL_CACHE_BODYSTRUCTURE, - MAIL_CACHE_MESSAGEPART - }; - static const char *field_names[] = { - "sent_date", - "received_date", - "virtual_size", - "body", - "bodystructure", - "messagepart", - NULL - }; - const char *const *arr; int i; @@ -202,33 +185,34 @@ return; for (arr = t_strsplit_spaces(fields, " ,"); *arr != NULL; arr++) { - for (i = 0; field_names[i] != NULL; i++) { - if (strcasecmp(field_names[i], *arr) == 0) { - dest[field_enums[i]] = dec; + for (i = 0; i < MAIL_CACHE_FIELD_COUNT; i++) { + if (strcasecmp(cache_fields[i].name, *arr) == 0) { + cache_fields[i].decision = dec; break; } } - if (field_names[i] == NULL) { + if (i == MAIL_CACHE_FIELD_COUNT) { i_error("Invalid cache field name '%s', ignoring ", *arr); } } } -static const enum mail_cache_decision_type *get_default_cache_decisions(void) +static void index_cache_register_defaults(struct mail_cache *cache) { - static enum mail_cache_decision_type dec[32]; - static int dec_set = FALSE; + const char *never_env; + + never_env = getenv("MAIL_NEVER_CACHE_FIELDS"); + if (never_env == NULL) + never_env = DEFAULT_NEVER_CACHE_FIELDS; - if (dec_set) - return dec; + set_cache_decisions(getenv("MAIL_CACHE_FIELDS"), + MAIL_CACHE_DECISION_TEMP); + set_cache_decisions(never_env, MAIL_CACHE_DECISION_NO | + MAIL_CACHE_DECISION_FORCED); - memset(dec, 0, sizeof(dec)); - set_cache_fields(getenv("MAIL_CACHE_FIELDS"), dec, - MAIL_CACHE_DECISION_TEMP); - set_cache_fields(getenv("MAIL_NEVER_CACHE_FIELDS"), dec, - MAIL_CACHE_DECISION_NO | MAIL_CACHE_DECISION_FORCED); - return dec; + mail_cache_register_fields(cache, cache_fields, + MAIL_CACHE_FIELD_COUNT); } void index_storage_lock_notify(struct index_mailbox *ibox, @@ -327,8 +311,7 @@ break; ibox->cache = mail_index_get_cache(index); - mail_cache_set_defaults(ibox->cache, - get_default_cache_decisions()); + index_cache_register_defaults(ibox->cache); ibox->view = mail_index_view_open(index); return ibox; } while (0); diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/index/index-storage.h --- a/src/lib-storage/index/index-storage.h Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/index/index-storage.h Sun Jul 18 05:25:06 2004 +0300 @@ -181,6 +181,10 @@ int index_storage_get_uids(struct mailbox *box, uint32_t uid1, uint32_t uid2, uint32_t *seq1_r, uint32_t *seq2_r); +struct mailbox_header_lookup_ctx * +index_header_lookup_init(struct mailbox *box, const char *const headers[]); +void index_header_lookup_deinit(struct mailbox_header_lookup_ctx *ctx); + int index_storage_search_get_sorting(struct mailbox *box, enum mail_sort_type *sort_program); struct mail_search_context * @@ -188,7 +192,7 @@ const char *charset, struct mail_search_arg *args, const enum mail_sort_type *sort_program, enum mail_fetch_field wanted_fields, - const char *const wanted_headers[]); + struct mailbox_header_lookup_ctx *wanted_headers); int index_storage_search_deinit(struct mail_search_context *ctx); struct mail *index_storage_search_next(struct mail_search_context *ctx); diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/index/maildir/maildir-storage.c --- a/src/lib-storage/index/maildir/maildir-storage.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/index/maildir/maildir-storage.c Sun Jul 18 05:25:06 2004 +0300 @@ -861,7 +861,9 @@ maildir_transaction_rollback, index_storage_fetch, index_storage_get_uids, - index_storage_search_get_sorting, + index_header_lookup_init, + index_header_lookup_deinit, + index_storage_search_get_sorting, index_storage_search_init, index_storage_search_deinit, index_storage_search_next, diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/index/mbox/mbox-mail.c --- a/src/lib-storage/index/mbox/mbox-mail.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/index/mbox/mbox-mail.c Sun Jul 18 05:25:06 2004 +0300 @@ -130,9 +130,10 @@ offset, (uoff_t)-1); data->stream = i_stream_create_header_filter(default_pool, - raw_stream, + raw_stream, TRUE, mbox_hide_headers, - mbox_hide_headers_count); + mbox_hide_headers_count, + NULL, NULL); i_stream_unref(raw_stream); } diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/index/mbox/mbox-storage.c --- a/src/lib-storage/index/mbox/mbox-storage.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/index/mbox/mbox-storage.c Sun Jul 18 05:25:06 2004 +0300 @@ -889,6 +889,8 @@ mbox_transaction_rollback, index_storage_fetch, index_storage_get_uids, + index_header_lookup_init, + index_header_lookup_deinit, index_storage_search_get_sorting, index_storage_search_init, index_storage_search_deinit, diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/mail-storage-private.h --- a/src/lib-storage/mail-storage-private.h Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/mail-storage-private.h Sun Jul 18 05:25:06 2004 +0300 @@ -89,15 +89,19 @@ int (*get_uids)(struct mailbox *box, uint32_t uid1, uint32_t uid2, uint32_t *seq1_r, uint32_t *seq2_r); + struct mailbox_header_lookup_ctx * + (*header_lookup_init)(struct mailbox *box, + const char *const headers[]); + void (*header_lookup_deinit)(struct mailbox_header_lookup_ctx *ctx); + int (*search_get_sorting)(struct mailbox *box, enum mail_sort_type *sort_program); struct mail_search_context * - (*search_init)(struct mailbox_transaction_context *t, - const char *charset, - struct mail_search_arg *args, - const enum mail_sort_type *sort_program, - enum mail_fetch_field wanted_fields, - const char *const wanted_headers[]); + (*search_init)(struct mailbox_transaction_context *t, + const char *charset, struct mail_search_arg *args, + const enum mail_sort_type *sort_program, + enum mail_fetch_field wanted_fields, + struct mailbox_header_lookup_ctx *wanted_headers); int (*search_deinit)(struct mail_search_context *ctx); struct mail *(*search_next)(struct mail_search_context *ctx); @@ -128,6 +132,10 @@ struct mailbox *box; }; +struct mailbox_header_lookup_ctx { + struct mailbox *box; +}; + /* Set error message in storage. Critical errors are logged with i_error(), but user sees only "internal error" message. */ void mail_storage_clear_error(struct mail_storage *storage); diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/mail-storage.c --- a/src/lib-storage/mail-storage.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/mail-storage.c Sun Jul 18 05:25:06 2004 +0300 @@ -370,6 +370,17 @@ return box->get_uids(box, uid1, uid2, seq1_r, seq2_r); } +struct mailbox_header_lookup_ctx * +mailbox_header_lookup_init(struct mailbox *box, const char *const headers[]) +{ + return box->header_lookup_init(box, headers); +} + +void mailbox_header_lookup_deinit(struct mailbox_header_lookup_ctx *ctx) +{ + ctx->box->header_lookup_deinit(ctx); +} + int mailbox_search_get_sorting(struct mailbox *box, enum mail_sort_type *sort_program) { @@ -381,7 +392,7 @@ const char *charset, struct mail_search_arg *args, const enum mail_sort_type *sort_program, enum mail_fetch_field wanted_fields, - const char *const wanted_headers[]) + struct mailbox_header_lookup_ctx *wanted_headers) { return t->box->search_init(t, charset, args, sort_program, wanted_fields, wanted_headers); diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/mail-storage.h --- a/src/lib-storage/mail-storage.h Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/mail-storage.h Sun Jul 18 05:25:06 2004 +0300 @@ -289,6 +289,11 @@ int mailbox_get_uids(struct mailbox *box, uint32_t uid1, uint32_t uid2, uint32_t *seq1_r, uint32_t *seq2_r); +/* Initialize header lookup for given headers. */ +struct mailbox_header_lookup_ctx * +mailbox_header_lookup_init(struct mailbox *box, const char *const headers[]); +void mailbox_header_lookup_deinit(struct mailbox_header_lookup_ctx *ctx); + /* Modify sort_program to specify a sort program acceptable for search_init(). If mailbox supports no sorting, it's simply set to {MAIL_SORT_END}. */ @@ -308,7 +313,7 @@ const char *charset, struct mail_search_arg *args, const enum mail_sort_type *sort_program, enum mail_fetch_field wanted_fields, - const char *const wanted_headers[]); + struct mailbox_header_lookup_ctx *wanted_headers); /* Deinitialize search request. */ int mailbox_search_deinit(struct mail_search_context *ctx); /* Search the next message. Returned mail object can be used until @@ -365,10 +370,10 @@ /* Get value for single header field */ const char *(*get_header)(struct mail *mail, const char *field); - /* Returns partial headers which contain _at least_ the given fields, - but it may contain others as well. */ - struct istream *(*get_headers)(struct mail *mail, - const char *const minimum_fields[]); + /* Returns stream containing specified headers. */ + struct istream * + (*get_headers)(struct mail *mail, + struct mailbox_header_lookup_ctx *headers); /* Returns input stream pointing to beginning of message header. hdr_size and body_size are updated unless they're NULL. */ @@ -376,7 +381,7 @@ struct message_size *hdr_size, struct message_size *body_size); - /* Get the any of the "special" fields. */ + /* Get any of the "special" fields. */ const char *(*get_special)(struct mail *mail, enum mail_fetch_field field); diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/proxy-mail.c --- a/src/lib-storage/proxy-mail.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/proxy-mail.c Sun Jul 18 05:25:06 2004 +0300 @@ -45,6 +45,14 @@ return p->mail->get_header(p->mail, field); } +static struct istream * +_get_headers(struct mail *mail, struct mailbox_header_lookup_ctx *headers) +{ + struct proxy_mail *p = (struct proxy_mail *) mail; + + return p->mail->get_headers(p->mail, headers); +} + static struct istream *_get_stream(struct mail *mail, struct message_size *hdr_size, struct message_size *body_size) @@ -91,6 +99,7 @@ pm->get_date = _get_date; pm->get_size = _get_size; pm->get_header = _get_header; + pm->get_headers = _get_headers; pm->get_stream = _get_stream; pm->get_special = _get_special; pm->update_flags = _update_flags; diff -r 0a8c9bfc05a1 -r 7d02e2a7672d src/lib-storage/proxy-mailbox.c --- a/src/lib-storage/proxy-mailbox.c Sun Jul 18 04:44:59 2004 +0300 +++ b/src/lib-storage/proxy-mailbox.c Sun Jul 18 05:25:06 2004 +0300 @@ -66,6 +66,14 @@ return p->box->get_uids(p->box, uid1, uid2, seq1_r, seq2_r); } +static struct mailbox_header_lookup_ctx * +_header_lookup_init(struct mailbox *box, const char *const headers[]) +{ + struct proxy_mailbox *p = (struct proxy_mailbox *) box; + + return p->box->header_lookup_init(p->box, headers); +} + static int _search_get_sorting(struct mailbox *box, enum mail_sort_type *sort_program) { @@ -79,7 +87,7 @@ const char *charset, struct mail_search_arg *args, const enum mail_sort_type *sort_program, enum mail_fetch_field wanted_fields, - const char *const wanted_headers[]) + struct mailbox_header_lookup_ctx *wanted_headers) { struct proxy_mailbox_transaction_context *pt = (struct proxy_mailbox_transaction_context *)t; @@ -157,6 +165,7 @@ pb->notify_changes = _notify_changes; pb->fetch = _fetch; pb->get_uids = _get_uids; + pb->header_lookup_init = _header_lookup_init; pb->search_get_sorting = _search_get_sorting; pb->search_init = _search_init;