# HG changeset patch # User Timo Sirainen # Date 1212972964 -10800 # Node ID 929198b1f31307470cafd3caa1f43b3d852ec2c6 # Parent 882888286bf5611b40e6c1fa605367922a23c04a# Parent bc346cfdf954baec92d4c8006536286941bc97fe Merged initial mail thread indexing implementation. diff -r 882888286bf5 -r 929198b1f313 TODO diff -r 882888286bf5 -r 929198b1f313 configure.in --- a/configure.in Mon Jun 09 03:23:34 2008 +0300 +++ b/configure.in Mon Jun 09 03:56:04 2008 +0300 @@ -1,5 +1,5 @@ AC_PREREQ([2.59]) -AC_INIT([dovecot],[1.1.rc3],[dovecot@dovecot.org]) +AC_INIT([dovecot],[1.2.UNSTABLE],[dovecot@dovecot.org]) AC_CONFIG_SRCDIR([src]) AM_INIT_AUTOMAKE diff -r 882888286bf5 -r 929198b1f313 dovecot-example.conf diff -r 882888286bf5 -r 929198b1f313 src/imap/Makefile.am --- a/src/imap/Makefile.am Mon Jun 09 03:23:34 2008 +0300 +++ b/src/imap/Makefile.am Mon Jun 09 03:56:04 2008 +0300 @@ -82,6 +82,8 @@ imap-status.c \ imap-sync.c \ imap-thread.c \ + imap-thread-links.c \ + imap-thread-finish.c \ mail-storage-callbacks.c \ main.c diff -r 882888286bf5 -r 929198b1f313 src/imap/cmd-thread.c --- a/src/imap/cmd-thread.c Mon Jun 09 03:23:34 2008 +0300 +++ b/src/imap/cmd-thread.c Mon Jun 09 03:56:04 2008 +0300 @@ -38,6 +38,8 @@ str = IMAP_ARG_STR(args); if (strcasecmp(str, "REFERENCES") == 0) threading = MAIL_THREAD_REFERENCES; + else if (strcasecmp(str, "X-REFERENCES2") == 0) + threading = MAIL_THREAD_REFERENCES2; else if (strcasecmp(str, "ORDEREDSUBJECT") == 0) { client_send_command_error(cmd, "ORDEREDSUBJECT threading is currently not supported."); diff -r 882888286bf5 -r 929198b1f313 src/imap/imap-thread-finish.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/imap/imap-thread-finish.c Mon Jun 09 03:56:04 2008 +0300 @@ -0,0 +1,678 @@ +/* Copyright (c) 2002-2008 Dovecot authors, see the included COPYING file */ + +#include "common.h" +#include "array.h" +#include "str.h" +#include "ostream.h" +#include "imap-base-subject.h" +#include "imap-thread-private.h" + +#include + +#define STR_FLUSH_LENGTH 512 + +struct mail_thread_shadow_node { + uint32_t first_child_idx, next_sibling_idx; +}; + +struct mail_thread_child_node { + uint32_t idx; + uint32_t uid; + time_t sort_date; +}; +ARRAY_DEFINE_TYPE(mail_thread_child_node, struct mail_thread_child_node); + +struct mail_thread_root_node { + /* node.idx usually points to indexes from mail hash. However + REFERENCES step (5) may add temporary dummy roots. They use larger + index numbers than exist in the hash. */ + struct mail_thread_child_node node; + + /* Used temporarily by (5)(B) base subject gathering. + root_idx1 is node's index in roots[] array + 1. + parent_root_idx points to root_idx1, or 0 for root. */ + unsigned int root_idx1; + uint32_t parent_root_idx1; + + /* subject contained a Re: or Fwd: */ + unsigned int reply_or_forward:1; + /* a dummy node */ + unsigned int dummy:1; + /* ignore this node - it's a dummy without children */ + unsigned int ignore:1; +}; + +struct thread_finish_context { + struct mailbox *box; + struct ostream *output; + + struct mail *tmp_mail; + struct mail_hash_transaction *hash_trans; + + ARRAY_DEFINE(roots, struct mail_thread_root_node); + ARRAY_DEFINE(shadow_nodes, struct mail_thread_shadow_node); + unsigned int next_new_root_idx; + + unsigned int id_is_uid:1; + unsigned int use_sent_date:1; + unsigned int flushed:1; +}; + +struct subject_gather_context { + struct thread_finish_context *ctx; + + pool_t subject_pool; + struct hash_table *subject_hash; +}; + +static void +add_base_subject(struct subject_gather_context *ctx, const char *subject, + struct mail_thread_root_node *node) +{ + struct mail_thread_root_node *hash_node; + char *hash_subject; + void *key, *value; + bool is_reply_or_forward; + + subject = imap_get_base_subject_cased(pool_datastack_create(), subject, + &is_reply_or_forward); + /* (ii) If the thread subject is empty, skip this message. */ + if (*subject == '\0') + return; + + /* (iii) Look up the message associated with the thread + subject in the subject table. */ + if (!hash_lookup_full(ctx->subject_hash, subject, &key, &value)) { + /* (iv) If there is no message in the subject table with the + thread subject, add the current message and the thread + subject to the subject table. */ + hash_subject = p_strdup(ctx->subject_pool, subject); + hash_insert(ctx->subject_hash, hash_subject, node); + } else { + hash_subject = key; + hash_node = value; + + /* Otherwise, if the message in the subject table is not a + dummy, AND either of the following criteria are true: + + The current message is a dummy, OR + + The message in the subject table is a reply or forward + and the current message is not. + + then replace the message in the subject table with the + current message. */ + if (!hash_node->dummy && + (node->dummy || + (hash_node->reply_or_forward && !is_reply_or_forward))) { + hash_node->parent_root_idx1 = node->root_idx1; + hash_update(ctx->subject_hash, hash_subject, node); + } else { + node->parent_root_idx1 = hash_node->root_idx1; + } + } + + node->reply_or_forward = is_reply_or_forward; +} + +static int mail_thread_child_node_cmp(const void *p1, const void *p2) +{ + const struct mail_thread_child_node *c1 = p1, *c2 = p2; + + if (c1->sort_date < c2->sort_date) + return -1; + if (c1->sort_date > c2->sort_date) + return 1; + + if (c1->uid < c2->uid) + return -1; + if (c1->uid > c2->uid) + return 1; + return 0; +} + +static int +thread_child_node_fill(struct thread_finish_context *ctx, + struct mail_thread_child_node *child) +{ + const struct mail_thread_node *node; + int tz; + + node = mail_hash_lookup_idx(ctx->hash_trans, child->idx); + i_assert(node->uid != 0 && node->exists); + child->uid = node->uid; + + if (!mail_set_uid(ctx->tmp_mail, child->uid)) { + /* the UID should have existed. we would have rebuild + the thread tree otherwise. */ + mail_hash_transaction_set_corrupted(ctx->hash_trans, + t_strdup_printf("Found expunged UID %u", child->uid)); + return -1; + } + + /* get sent date if we want to use it and if it's valid */ + if (!ctx->use_sent_date) + child->sort_date = 0; + else if (mail_get_date(ctx->tmp_mail, &child->sort_date, &tz) < 0) + child->sort_date = 0; + + if (child->sort_date == 0) { + /* fallback to received date */ + (void)mail_get_received_date(ctx->tmp_mail, &child->sort_date); + } + return 0; +} + +static int +thread_sort_children(struct thread_finish_context *ctx, uint32_t parent_idx, + ARRAY_TYPE(mail_thread_child_node) *sorted_children) +{ + const struct mail_thread_shadow_node *shadows; + const struct mail_thread_node *node; + struct mail_thread_child_node child, *children; + unsigned int count; + + memset(&child, 0, sizeof(child)); + array_clear(sorted_children); + + /* add all child indexes to the array */ + shadows = array_get(&ctx->shadow_nodes, &count); + child.idx = shadows[parent_idx].first_child_idx; + i_assert(child.idx != 0); + if (shadows[child.idx].next_sibling_idx == 0) { + /* only child - don't bother setting sort date */ + node = mail_hash_lookup_idx(ctx->hash_trans, child.idx); + i_assert(node->uid != 0 && node->exists); + child.uid = node->uid; + + array_append(sorted_children, &child, 1); + return 0; + } + while (child.idx != 0) { + if (thread_child_node_fill(ctx, &child) < 0) + return -1; + + array_append(sorted_children, &child, 1); + child.idx = shadows[child.idx].next_sibling_idx; + } + + /* sort the children */ + children = array_get_modifiable(sorted_children, &count); + qsort(children, count, sizeof(*children), mail_thread_child_node_cmp); + return 0; +} + +static int gather_base_subjects(struct thread_finish_context *ctx) +{ + struct subject_gather_context gather_ctx; + struct mail_thread_root_node *roots; + const struct mail_thread_node *node; + const char *subject; + unsigned int i, count; + ARRAY_TYPE(mail_thread_child_node) sorted_children; + const struct mail_thread_child_node *children; + uint32_t idx; + int ret = 0; + + memset(&gather_ctx, 0, sizeof(gather_ctx)); + gather_ctx.ctx = ctx; + + roots = array_get_modifiable(&ctx->roots, &count); + gather_ctx.subject_pool = + pool_alloconly_create(MEMPOOL_GROWING"base subjects", + nearest_power(count * 20)); + gather_ctx.subject_hash = + hash_create(default_pool, gather_ctx.subject_pool, count * 2, + str_hash, (hash_cmp_callback_t *)strcmp); + + i_array_init(&sorted_children, 64); + for (i = 0; i < count; i++) { + roots[i].root_idx1 = i + 1; + if (!roots[i].dummy) + idx = roots[i].node.idx; + else if (!roots[i].ignore) { + /* find the oldest child */ + if (thread_sort_children(ctx, roots[i].node.idx, + &sorted_children) < 0) { + ret = -1; + break; + } + children = array_idx(&sorted_children, 0); + idx = children[0].idx; + } else { + /* dummy without children */ + continue; + } + + node = mail_hash_lookup_idx(ctx->hash_trans, idx); + i_assert(node->uid != 0 && node->exists); + + if (!mail_set_uid(ctx->tmp_mail, node->uid)) { + /* the UID should have existed. we would have rebuild + the thread tree otherwise. */ + mail_hash_transaction_set_corrupted( + ctx->hash_trans, "Found expunged UID"); + ret = -1; + break; + } + if (mail_get_first_header(ctx->tmp_mail, HDR_SUBJECT, + &subject) > 0) T_BEGIN { + add_base_subject(&gather_ctx, subject, &roots[i]); + } T_END; + } + i_assert(roots[count-1].parent_root_idx1 <= count); + array_free(&sorted_children); + hash_destroy(&gather_ctx.subject_hash); + pool_unref(&gather_ctx.subject_pool); + + return ret; +} + +static void thread_add_shadow_child(struct thread_finish_context *ctx, + uint32_t parent_idx, uint32_t child_idx) +{ + struct mail_thread_shadow_node *parent_shadow, *child_shadow; + + parent_shadow = array_idx_modifiable(&ctx->shadow_nodes, parent_idx); + child_shadow = array_idx_modifiable(&ctx->shadow_nodes, child_idx); + + child_shadow->next_sibling_idx = parent_shadow->first_child_idx; + parent_shadow->first_child_idx = child_idx; +} + +static void mail_thread_root_thread_merge(struct thread_finish_context *ctx, + struct mail_thread_root_node *cur) +{ + struct mail_thread_root_node *roots, *root, new_root; + struct mail_thread_shadow_node *shadows; + unsigned int count; + uint32_t idx, next_idx; + + i_assert(cur->parent_root_idx1 != 0); + + /* The highest parent is the same as the current message in the + subject table. */ + roots = array_get_modifiable(&ctx->roots, &count); + root = cur; + do { + i_assert(root->parent_root_idx1 <= count); + root = &roots[root->parent_root_idx1 - 1]; + } while (root->parent_root_idx1 != 0); + i_assert(!root->ignore); + + shadows = array_idx_modifiable(&ctx->shadow_nodes, 0); + if (cur->dummy) { + /* If both messages are dummies, append the current + message's children to the children of the message in + the subject table (the children of both messages + become siblings), and then delete the current message. */ + i_assert(root->dummy); + + idx = shadows[cur->node.idx].first_child_idx; + while (idx != 0) { + next_idx = shadows[idx].next_sibling_idx; + thread_add_shadow_child(ctx, root->node.idx, idx); + idx = next_idx; + } + + shadows[cur->node.idx].first_child_idx = 0; + cur->ignore = TRUE; + } else if (root->dummy || (cur->reply_or_forward && + !root->reply_or_forward)) { + /* a) If the message in the subject table is a dummy and the + current message is not, make the current message a + child of the message in the subject table (a sibling + of its children). + + b) If the current message is a reply or forward and the + message in the subject table is not, make the current + message a child of the message in the subject table (a + sibling of its children). */ + thread_add_shadow_child(ctx, root->node.idx, cur->node.idx); + cur->ignore = TRUE; + } else { + /* Otherwise, create a new dummy message and make both + the current message and the message in the subject + table children of the dummy. Then replace the message + in the subject table with the dummy message. */ + memset(&new_root, 0, sizeof(new_root)); + new_root.root_idx1 = array_count(&ctx->roots) + 1; + new_root.node.idx = ctx->next_new_root_idx++; + new_root.dummy = TRUE; + + thread_add_shadow_child(ctx, new_root.node.idx, root->node.idx); + thread_add_shadow_child(ctx, new_root.node.idx, cur->node.idx); + + root->parent_root_idx1 = new_root.root_idx1; + root->ignore = TRUE; + cur->ignore = TRUE; + + /* append last, since it breaks root and cur pointers */ + array_append(&ctx->roots, &new_root, 1); + + /* make sure all shadow indexes are accessible directly */ + (void)array_idx_modifiable(&ctx->shadow_nodes, + new_root.node.idx); + } +} + +static bool merge_subject_threads(struct thread_finish_context *ctx) +{ + struct mail_thread_root_node *roots; + unsigned int i, count; + bool changed = FALSE; + + roots = array_get_modifiable(&ctx->roots, &count); + for (i = 0; i < count; i++) { + if (roots[i].parent_root_idx1 != 0 && !roots[i].ignore) { + mail_thread_root_thread_merge(ctx, &roots[i]); + /* more roots may have been added */ + roots = array_idx_modifiable(&ctx->roots, 0); + changed = TRUE; + } + } + + return changed; +} + +static int sort_root_nodes(struct thread_finish_context *ctx) +{ + ARRAY_TYPE(mail_thread_child_node) sorted_children; + const struct mail_thread_child_node *children; + const struct mail_thread_shadow_node *shadows; + struct mail_thread_root_node *roots; + unsigned int i, count, child_count; + int ret = 0; + + i_array_init(&sorted_children, 64); + shadows = array_idx(&ctx->shadow_nodes, 0); + roots = array_get_modifiable(&ctx->roots, &count); + for (i = 0; i < count; i++) { + if (roots[i].ignore) + continue; + if (roots[i].dummy) { + /* sort by the first child */ + if (shadows[roots[i].node.idx].first_child_idx == 0) { + /* childless dummy node */ + roots[i].ignore = TRUE; + continue; + } + if (thread_sort_children(ctx, roots[i].node.idx, + &sorted_children) < 0) { + ret = -1; + break; + } + children = array_get(&sorted_children, &child_count); + if (child_count == 1) { + /* only one child - deferred step (3). + promote the child to the root. */ + roots[i].node = children[0]; + if (thread_child_node_fill(ctx, + &roots[i].node) < 0) + return -1; + roots[i].dummy = FALSE; + } else { + roots[i].node.uid = children[0].uid; + roots[i].node.sort_date = children[0].sort_date; + } + } else { + if (thread_child_node_fill(ctx, &roots[i].node) < 0) { + ret = -1; + break; + } + } + } + array_free(&sorted_children); + if (ret < 0) + return -1; + + qsort(roots, count, sizeof(*roots), mail_thread_child_node_cmp); + return 0; +} + +static int +str_add_id(struct thread_finish_context *ctx, string_t *str, uint32_t uid) +{ + i_assert(uid != 0); + + if (!ctx->id_is_uid) { + mailbox_get_seq_range(ctx->box, uid, uid, &uid, &uid); + if (uid == 0) { + mail_hash_transaction_set_corrupted(ctx->hash_trans, + t_strdup_printf("Found expunged UID %u", uid)); + return -1; + } + } + str_printfa(str, "%u", uid); + + if (str_len(str) >= STR_FLUSH_LENGTH) { + (void)o_stream_send(ctx->output, str_data(str), str_len(str)); + str_truncate(str, 0); + ctx->flushed = TRUE; + } + return 0; +} + +static int send_nodes(struct thread_finish_context *ctx, string_t *str, + uint32_t parent_idx) +{ + ARRAY_TYPE(mail_thread_child_node) sorted_children; + const struct mail_thread_child_node *children; + const struct mail_thread_shadow_node *shadows; + unsigned int i, child_count; + uint32_t idx; + int ret; + + t_array_init(&sorted_children, 8); + if (thread_sort_children(ctx, parent_idx, &sorted_children) < 0) + return -1; + + shadows = array_idx(&ctx->shadow_nodes, 0); + children = array_get(&sorted_children, &child_count); + if (child_count == 1) { + /* only one child - special case to avoid extra paranthesis */ + if (str_add_id(ctx, str, children[0].uid) < 0) + return -1; + idx = children[0].idx; + if (shadows[idx].first_child_idx != 0) { + str_append_c(str, ' '); + T_BEGIN { + ret = send_nodes(ctx, str, idx); + } T_END; + if (ret < 0) + return -1; + } + return 0; + } + + for (i = 0; i < child_count; i++) { + idx = children[i].idx; + + if (shadows[idx].first_child_idx == 0) { + /* no children */ + str_append_c(str, '('); + if (str_add_id(ctx, str, children[i].uid) < 0) + return -1; + str_append_c(str, ')'); + } else { + /* node with children */ + str_append_c(str, '('); + if (str_add_id(ctx, str, children[i].uid) < 0) + return -1; + str_append_c(str, ' '); + T_BEGIN { + ret = send_nodes(ctx, str, idx); + } T_END; + if (ret < 0) + return -1; + str_append_c(str, ')'); + } + } + return 0; +} + +static int send_root(struct thread_finish_context *ctx, string_t *str, + const struct mail_thread_root_node *root) +{ + const struct mail_thread_shadow_node *shadow; + const struct mail_thread_node *node; + int ret; + + str_append_c(str, '('); + if (!root->dummy) { + node = mail_hash_lookup_idx(ctx->hash_trans, root->node.idx); + i_assert(node->uid != 0 && node->exists); + if (str_add_id(ctx, str, node->uid) < 0) + return -1; + } + + shadow = array_idx(&ctx->shadow_nodes, root->node.idx); + if (shadow->first_child_idx != 0) { + if (!root->dummy) + str_append_c(str, ' '); + + T_BEGIN { + ret = send_nodes(ctx, str, root->node.idx); + } T_END; + if (ret < 0) + return -1; + } + + str_append_c(str, ')'); + return 0; +} + +static int send_roots(struct thread_finish_context *ctx) +{ + const struct mail_thread_root_node *roots; + unsigned int i, count; + string_t *str; + int ret = 0; + + str = str_new(default_pool, STR_FLUSH_LENGTH + 32); + str_append(str, "* THREAD "); + + roots = array_get(&ctx->roots, &count); + for (i = 0; i < count; i++) { + if (!roots[i].ignore) { + if (send_root(ctx, str, &roots[i]) < 0) { + ret = -1; + break; + } + } + } + + if (ret == 0) { + str_append(str, "\r\n"); + (void)o_stream_send(ctx->output, str_data(str), str_len(str)); + } else if (ctx->flushed) { + o_stream_close(ctx->output); + } + str_free(&str); + return ret; +} + +int mail_thread_finish(struct mail *tmp_mail, + struct mail_hash_transaction *hash_trans, + enum mail_thread_type thread_type, + struct ostream *output, bool id_is_uid) +{ + struct thread_finish_context ctx; + const struct mail_hash_header *hdr; + struct mail_thread_node *node, *parent; + struct mail_thread_root_node root; + ARRAY_TYPE(uint32_t) free_indexes; + const uint32_t *indexes; + uint32_t idx, parent_idx; + unsigned int i, count; + int ret; + + memset(&ctx, 0, sizeof(ctx)); + ctx.box = tmp_mail->box; + ctx.output = output; + ctx.tmp_mail = tmp_mail; + ctx.hash_trans = hash_trans; + ctx.id_is_uid = id_is_uid; + ctx.use_sent_date = thread_type == MAIL_THREAD_REFERENCES; + + hdr = mail_hash_get_header(ctx.hash_trans); + if (hdr->record_count == 0) + return 0; + ctx.next_new_root_idx = hdr->record_count + 1; + + /* (2) save root nodes and (3) remove dummy messages */ + memset(&root, 0, sizeof(root)); + i_array_init(&free_indexes, 128); + i_array_init(&ctx.roots, I_MIN(128, hdr->record_count)); + i_array_init(&ctx.shadow_nodes, hdr->record_count); + for (idx = 1; idx < hdr->record_count; idx++) { + node = mail_hash_lookup_idx(ctx.hash_trans, idx); + if (MAIL_HASH_RECORD_IS_DELETED(&node->rec)) + continue; + + if (thread_node_is_root(node)) { + /* node is a duplicate root, free it later */ + array_append(&free_indexes, &idx, 1); + continue; + } + parent = node->parent_idx == 0 ? NULL : + mail_hash_lookup_idx(ctx.hash_trans, node->parent_idx); + if (thread_node_is_root(parent)) { + if (parent != NULL) { + /* parent is a duplicate root. replace it with + the real root. */ + node->parent_idx = 0; + mail_hash_update(ctx.hash_trans, idx); + } + root.node.idx = idx; + root.dummy = !node->exists; + array_append(&ctx.roots, &root, 1); + } else if (node->exists) { + /* Find the node's first non-dummy parent and add the + node as its child. If there are no non-dummy + parents, add it as the highest dummy's child. */ + parent_idx = node->parent_idx; + while (!parent->exists && parent->parent_idx != 0) { + parent_idx = parent->parent_idx; + parent = mail_hash_lookup_idx(ctx.hash_trans, + parent_idx); + } + thread_add_shadow_child(&ctx, parent_idx, idx); + } + } + /* make sure all shadow indexes are accessible directly */ + (void)array_idx_modifiable(&ctx.shadow_nodes, hdr->record_count); + + indexes = array_get(&free_indexes, &count); + for (i = 0; i < count; i++) { + node = mail_hash_lookup_idx(ctx.hash_trans, indexes[i]); + mail_hash_remove(ctx.hash_trans, indexes[i], + node->msgid_crc32); + } + array_free(&free_indexes); + + /* (4) */ + if (sort_root_nodes(&ctx) < 0) + return -1; + if (thread_type == MAIL_THREAD_REFERENCES) { + /* (5) Gather together messages under the root that have + the same base subject text. */ + if (gather_base_subjects(&ctx) < 0) + return -1; + + /* (5.C) Merge threads with the same thread subject. */ + if (merge_subject_threads(&ctx)) { + /* root ordering may have changed, sort them again. */ + if (sort_root_nodes(&ctx) < 0) + return -1; + } + } + + /* (6) Sort children and send replies */ + T_BEGIN { + ret = send_roots(&ctx); + } T_END; + array_free(&ctx.roots); + array_free(&ctx.shadow_nodes); + return ret; +} diff -r 882888286bf5 -r 929198b1f313 src/imap/imap-thread-links.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/imap/imap-thread-links.c Mon Jun 09 03:56:04 2008 +0300 @@ -0,0 +1,411 @@ +/* Copyright (c) 2002-2008 Dovecot authors, see the included COPYING file */ + +#include "common.h" +#include "array.h" +#include "message-id.h" +#include "mail-storage.h" +#include "imap-thread-private.h" + +static struct mail_thread_node * +thread_msgid_get(struct thread_context *ctx, uint32_t ref_uid, + const char *msgid, uint32_t *idx_r) +{ + struct mail_thread_node *node, new_node; + struct msgid_search_key key; + const char **msgidp; + uint32_t idx; + + key.msgid = msgid; + key.msgid_crc32 = crc32_str_nonzero(msgid); + + node = mail_hash_lookup(ctx->hash_trans, &key, &idx); + if (node == NULL) { + /* not found, create */ + memset(&new_node, 0, sizeof(new_node)); + new_node.msgid_crc32 = key.msgid_crc32; + new_node.uid = ref_uid; + + mail_hash_insert(ctx->hash_trans, &key, &new_node, &idx); + node = mail_hash_lookup_idx(ctx->hash_trans, idx); + } else if (node->uid == 0 && ref_uid != 0) { + /* make non-existing node uniquely identifiable */ + if (node->exists) { + mail_hash_transaction_set_corrupted(ctx->hash_trans, + "uid=0 found"); + ctx->failed = TRUE; + } else { + node->uid = ref_uid; + mail_hash_update(ctx->hash_trans, idx); + } + } + + /* keep message-ids cached */ + msgidp = array_idx_modifiable(&ctx->msgid_cache, idx); + if (*msgidp == NULL) + *msgidp = p_strdup(ctx->msgid_pool, msgid); + + *idx_r = idx; + return node; +} + +static void thread_msg_add(struct thread_context *ctx, uint32_t uid, + const char *msgid, uint32_t *idx_r) +{ + struct mail_thread_node *node; + struct mail_thread_node unode; + + if (msgid != NULL) { + node = thread_msgid_get(ctx, 0, msgid, idx_r); + if (!node->exists) { + /* add UID to node */ + node->uid = uid; + node->exists = TRUE; + } else { + /* duplicate, keep the original. if the original ever + gets expunged, rebuild. */ + node->expunge_rebuilds = TRUE; + msgid = NULL; + } + mail_hash_update(ctx->hash_trans, *idx_r); + } + + if (msgid == NULL) { + /* no valid message-id */ + memset(&unode, 0, sizeof(unode)); + unode.uid = uid; + unode.exists = TRUE; + mail_hash_insert(ctx->hash_trans, NULL, &unode, idx_r); + } +} + +static bool thread_node_has_ancestor(struct thread_context *ctx, + const struct mail_thread_node *node, + const struct mail_thread_node *ancestor) +{ + while (node != ancestor) { + if (node->parent_idx == 0) + return FALSE; + + node = mail_hash_lookup_idx(ctx->hash_trans, node->parent_idx); + } + return TRUE; +} + +static void thread_link_reference(struct thread_context *ctx, + uint32_t parent_idx, uint32_t child_idx) +{ + struct mail_thread_node *node, *parent, *child, *orig_parent; + uint32_t idx; + + parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx); + child = mail_hash_lookup_idx(ctx->hash_trans, child_idx); + + parent->link_refcount++; + mail_hash_update(ctx->hash_trans, parent_idx); + + if (thread_node_has_ancestor(ctx, parent, child)) { + if (parent == child) { + /* loops to itself - ignore */ + return; + } + + /* child is an ancestor of parent. Adding child -> parent_node + would introduce a loop. If any messages referencing the path + between parent_node's parent and child_node get expunged, we + have to rebuild the tree because the loop might break. + For example: + + #1: a -> b (a.ref=1, b.ref=1) + #2: b -> a (a.ref=2, b.ref=2) + #3: c -> a -> b (a.ref=3, b.ref=3, c.ref=1) + + Expunging #3 wouldn't break the loop, but expunging #1 + would. */ + node = parent; + do { + idx = node->parent_idx; + if (idx == 0) { + /* earlier lookup_idx() failed */ + ctx->failed = TRUE; + break; + } + node = mail_hash_lookup_idx(ctx->hash_trans, idx); + node->unref_rebuilds = TRUE; + mail_hash_update(ctx->hash_trans, idx); + } while (node != child); + return; + } else if (child->parent_idx == parent_idx) { + /* The same link already exists */ + return; + } + + /* Set parent_node as child_node's parent */ + orig_parent = child->parent_idx == 0 ? NULL : + mail_hash_lookup_idx(ctx->hash_trans, child->parent_idx); + if (thread_node_is_root(orig_parent)) { + child->parent_idx = parent_idx; + } else { + /* Conflicting parent already exists, keep the original */ + if (child->exists) { + /* If this message gets expunged, + the parent is changed. */ + child->expunge_rebuilds = TRUE; + } else { + /* Message doesn't exist, so it was one of the node's + children that created the original reference. If + that reference gets dropped, the parent is changed. + We could catch this in one of several ways: + + a) Parent node gets unreferenced + b) This node gets unreferenced + c) Any of the child nodes gets expunged + + b) is probably the least likely to happen, + so use it */ + child->unref_rebuilds = TRUE; + } + } + mail_hash_update(ctx->hash_trans, child_idx); +} + +static void +thread_link_references(struct thread_context *ctx, uint32_t ref_uid, + const char **references) +{ + const char *msgid, *last_msgid; + uint32_t parent_idx, child_idx; + + last_msgid = message_id_get_next(references); + if (last_msgid == NULL) + return; + (void)thread_msgid_get(ctx, ref_uid, last_msgid, &parent_idx); + + while ((msgid = message_id_get_next(references)) != NULL) { + (void)thread_msgid_get(ctx, ref_uid, msgid, &child_idx); + thread_link_reference(ctx, parent_idx, child_idx); + parent_idx = child_idx; + last_msgid = msgid; + } + + /* link the last ID to us */ + *references = last_msgid; +} + +static int thread_get_mail_header(struct mail *mail, const char *name, + const char **value_r) +{ + if (mail_get_first_header(mail, name, value_r) < 0) { + if (!mail->expunged) + return -1; + + /* Message is expunged. Instead of failing the entire THREAD + command, just treat the header as non-existing. */ + *value_r = NULL; + } + return 0; +} + +static bool references_are_crc32_unique(const char *references) +{ + const char *msgid; + uint32_t msgid_crc32; + ARRAY_TYPE(uint32_t) crc_arr; + const uint32_t *crc; + unsigned int i, count; + + t_array_init(&crc_arr, 32); + while ((msgid = message_id_get_next(&references)) != NULL) { + msgid_crc32 = crc32_str_nonzero(msgid); + crc = array_get(&crc_arr, &count); + for (i = 0; i < count; i++) { + if (crc[i] == msgid_crc32) + return FALSE; + } + array_append(&crc_arr, &msgid_crc32, 1); + } + return TRUE; +} + +int mail_thread_add(struct thread_context *ctx, struct mail *mail) +{ + const char *message_id, *in_reply_to, *references, *parent_msgid; + const struct mail_thread_node *parent, *old_parent; + struct mail_hash_header *hdr; + struct mail_thread_node *node; + uint32_t idx, parent_idx, ref_uid; + + hdr = mail_hash_get_header(ctx->hash_trans); + i_assert(mail->uid > hdr->last_uid); + hdr->last_uid = mail->uid; + hdr->message_count++; + + if (thread_get_mail_header(mail, HDR_MESSAGE_ID, &message_id) < 0 || + thread_get_mail_header(mail, HDR_REFERENCES, &references) < 0) + return -1; + + ref_uid = references_are_crc32_unique(references) ? mail->uid : 0; + thread_msg_add(ctx, mail->uid, message_id_get_next(&message_id), &idx); + thread_link_references(ctx, ref_uid, &references); + + if (references != NULL) + parent_msgid = references; + else { + /* no valid IDs in References:, use In-Reply-To: instead */ + if (thread_get_mail_header(mail, HDR_IN_REPLY_TO, + &in_reply_to) < 0) + return -1; + parent_msgid = message_id_get_next(&in_reply_to); + } + parent = parent_msgid == NULL ? NULL : + thread_msgid_get(ctx, ref_uid, parent_msgid, &parent_idx); + + node = mail_hash_lookup_idx(ctx->hash_trans, idx); + old_parent = node->parent_idx == 0 ? NULL : + mail_hash_lookup_idx(ctx->hash_trans, node->parent_idx); + + if (!thread_node_is_root(old_parent) && + (parent == NULL || old_parent->parent_idx != parent_idx)) { + /* conflicting parent, remove it. */ + node->parent_idx = 0; + /* If this message gets expunged, we have to revert back to + the original parent. */ + node->expunge_rebuilds = TRUE; + mail_hash_update(ctx->hash_trans, idx); + } + if (parent != NULL) + thread_link_reference(ctx, parent_idx, idx); + return 0; +} + +static bool +mail_thread_node_lookup(struct thread_context *ctx, uint32_t uid, + uint32_t *idx_r, struct mail_thread_node **node_r) +{ + struct msgid_search_key key; + const char *msgids; + int ret; + + if (!mail_set_uid(ctx->tmp_mail, uid)) + return FALSE; + + ret = mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID, &msgids); + if (ret <= 0) + return FALSE; + + key.msgid = message_id_get_next(&msgids); + if (key.msgid == NULL) + return FALSE; + key.msgid_crc32 = crc32_str_nonzero(key.msgid); + + *node_r = mail_hash_lookup(ctx->hash_trans, &key, idx_r); + if (*node_r == NULL) + return FALSE; + + if ((*node_r)->uid != ctx->tmp_mail->uid) { + /* duplicate Message-ID probably */ + return FALSE; + } + return TRUE; +} + +static bool thread_unref_msgid(struct thread_context *ctx, const char *msgid) +{ + struct msgid_search_key key; + struct mail_thread_node *node; + uint32_t idx; + + key.msgid = msgid; + key.msgid_crc32 = crc32_str_nonzero(msgid); + + ctx->cmp_match_count = 0; + ctx->cmp_last_idx = 0; + + node = mail_hash_lookup(ctx->hash_trans, &key, &idx); + if (node == NULL) { + if (ctx->cmp_match_count != 1 || ctx->failed) { + /* couldn't find the message-id */ + return FALSE; + } + + /* there's only one key with this crc32 value, so it + must be what we're looking for */ + idx = ctx->cmp_last_idx; + node = mail_hash_lookup_idx(ctx->hash_trans, idx); + } + if (node->unref_rebuilds) + return FALSE; + + node->link_refcount--; + if (!node->exists && node->link_refcount == 0) { + /* we turned into a root node */ + node->parent_idx = 0; + } + mail_hash_update(ctx->hash_trans, idx); + return TRUE; +} + +static bool +thread_unref_links(struct thread_context *ctx, const char *references, + bool *valid_r) +{ + const char *msgid; + + /* tmp_mail may be changed below, so we have to duplicate the + references string */ + references = t_strdup(references); + *valid_r = FALSE; + + while ((msgid = message_id_get_next(&references)) != NULL) { + *valid_r = TRUE; + if (!thread_unref_msgid(ctx, msgid)) + return FALSE; + } + return TRUE; +} + +int mail_thread_remove(struct thread_context *ctx, uint32_t uid) +{ + struct mail_hash_header *hdr; + struct mail_thread_node *node; + const char *references, *in_reply_to; + uint32_t idx; + bool have_refs; + + if (!mail_thread_node_lookup(ctx, uid, &idx, &node)) + return 0; + if (node->expunge_rebuilds) + return 0; + + if (mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES, + &references) < 0) + return -1; + + if (!thread_unref_links(ctx, references, &have_refs)) + return 0; + if (!have_refs) { + /* no valid IDs in References:, use In-Reply-To: instead */ + if (mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO, + &in_reply_to) < 0) + return -1; + in_reply_to = message_id_get_next(&in_reply_to); + if (in_reply_to != NULL) { + if (!thread_unref_msgid(ctx, in_reply_to)) + return 0; + } + } + + /* get the node again, the pointer may have changed */ + node = mail_hash_lookup_idx(ctx->hash_trans, idx); + + node->uid = 0; + node->exists = FALSE; + if (node->link_refcount == 0) { + /* we turned into a root node */ + node->parent_idx = 0; + } + mail_hash_update(ctx->hash_trans, idx); + + hdr = mail_hash_get_header(ctx->hash_trans); + hdr->message_count--; + return 1; +} diff -r 882888286bf5 -r 929198b1f313 src/imap/imap-thread-private.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/imap/imap-thread-private.h Mon Jun 09 03:56:04 2008 +0300 @@ -0,0 +1,76 @@ +#ifndef IMAP_THREAD_PRIVATE_H +#define IMAP_THREAD_PRIVATE_H + +#include "crc32.h" +#include "mail-hash.h" +#include "imap-thread.h" + +#define HDR_MESSAGE_ID "message-id" +#define HDR_IN_REPLY_TO "in-reply-to" +#define HDR_REFERENCES "references" +#define HDR_SUBJECT "subject" + +struct msgid_search_key { + const char *msgid; + uint32_t msgid_crc32; +}; + +struct mail_thread_node { + struct mail_hash_record rec; + + /* exists=TRUE: UID of the mail this node belongs to + exists=FALSE: UID of some message that references (in References: or + In-Reply-To: header) this node. Of all the valid references exactly + one has the same CRC32 as this node's msgid_crc32. */ + uint32_t uid; + uint32_t parent_idx; + uint32_t msgid_crc32; + + uint32_t link_refcount:29; + uint32_t expunge_rebuilds:1; + uint32_t unref_rebuilds:1; + uint32_t exists:1; +}; + +struct thread_context { + struct mail *tmp_mail; + + struct mail_hash *hash; + struct mail_hash_transaction *hash_trans; + + /* Hash record idx -> Message-ID */ + ARRAY_DEFINE(msgid_cache, const char *); + pool_t msgid_pool; + + unsigned int cmp_match_count; + uint32_t cmp_last_idx; + + unsigned int failed:1; + unsigned int rebuild:1; + unsigned int syncing:1; +}; + +static inline bool thread_node_is_root(const struct mail_thread_node *node) +{ + if (node == NULL) + return TRUE; + + /* check also if expunging had changed this node to a root node */ + return !node->exists && node->link_refcount == 0; +} + +static inline uint32_t crc32_str_nonzero(const char *str) +{ + uint32_t value = crc32_str(str); + return value == 0 ? 1 : value; +} + +int mail_thread_add(struct thread_context *ctx, struct mail *mail); +int mail_thread_remove(struct thread_context *ctx, uint32_t seq); + +int mail_thread_finish(struct mail *tmp_mail, + struct mail_hash_transaction *hash_trans, + enum mail_thread_type thread_type, + struct ostream *output, bool id_is_uid); + +#endif diff -r 882888286bf5 -r 929198b1f313 src/imap/imap-thread.c --- a/src/imap/imap-thread.c Mon Jun 09 03:23:34 2008 +0300 +++ b/src/imap/imap-thread.c Mon Jun 09 03:56:04 2008 +0300 @@ -1,981 +1,622 @@ /* Copyright (c) 2002-2008 Dovecot authors, see the included COPYING file */ -/* - * Merge sort code in sort_nodes() is copyright 2001 Simon Tatham. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* Implementation of draft-ietf-imapext-thread-12 threading algorithm */ +/* doc/thread-refs.txt describes the incremental algorithm we use here. */ #include "common.h" -#include "hash.h" +#include "array.h" #include "ostream.h" -#include "str.h" -#include "rfc822-parser.h" -#include "imap-base-subject.h" -#include "mail-storage.h" -#include "imap-thread.h" +#include "message-id.h" +#include "mail-search.h" +#include "imap-thread-private.h" -#include +/* FIXME: the mailbox accessing API needs some cleaning up. we shouldn't be + including all kinds of private headers */ +#include "../lib-storage/index/index-storage.h" +#include "mail-index-private.h" +#include "mail-index-sync-private.h" + +#define IMAP_THREAD_CONTEXT(obj) \ + MODULE_CONTEXT(obj, imap_thread_storage_module) /* how much memory to allocate initially. these are very rough approximations. */ -#define APPROX_MSG_COUNT 128 +#define APPROX_MSG_EXTRA_COUNT 10 #define APPROX_MSGID_SIZE 45 -/* Try to buffer this much data before sending it to output stream. */ -#define OUTPUT_BUF_SIZE 2048 +struct imap_thread_context { + struct thread_context thread_ctx; + struct client_command_context *cmd; + struct mailbox_transaction_context *t; + enum mail_thread_type thread_type; -#define NODE_IS_DUMMY(node) ((node)->id == 0) -#define NODE_HAS_PARENT(ctx, node) \ - ((node)->parent != NULL && (node)->parent != &(ctx)->root_node) + struct mail_search_context *search; + struct mail_search_arg tmp_search_arg; -struct root_info { - char *base_subject; - unsigned int reply:1; - unsigned int sorted:1; + unsigned int id_is_uid:1; }; -struct node { - struct node *parent, *first_child, *next; +struct imap_thread_mailbox { + union mailbox_module_context module_ctx; + struct mail_hash *hash; - unsigned int id; - time_t sent_date; - - union { - char *msgid; - struct root_info *info; - } u; + /* set only temporarily while needed */ + struct thread_context *ctx; }; -struct thread_context { - struct mail_search_context *search_ctx; - struct mailbox_transaction_context *t; - struct mailbox *box; - struct ostream *output; - struct mail *mail; +static void (*next_hook_mailbox_opened)(struct mailbox *box); + +static MODULE_CONTEXT_DEFINE_INIT(imap_thread_storage_module, + &mail_storage_module_register); + +static unsigned int mail_thread_hash_key(const void *key) +{ + const struct msgid_search_key *key_rec = key; + + return key_rec->msgid_crc32; +} + +static const char * +mail_thread_find_msgid_crc32(struct thread_context *ctx, uint32_t msgid_crc32) +{ + const char *msgids, *msgid; + int ret; + + /* if there are any valid references, it's in them */ + ret = mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES, &msgids); + if (ret == 0) { + /* no References: header, fallback to In-Reply-To: */ + if (mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO, + &msgids) <= 0) + return NULL; + + msgid = message_id_get_next(&msgids); + if (msgid != NULL && crc32_str_nonzero(msgid) == msgid_crc32) + return msgid; + return NULL; + } + if (ret < 0) + return NULL; + + while ((msgid = message_id_get_next(&msgids)) != NULL) { + if (crc32_str_nonzero(msgid) == msgid_crc32) { + /* found it. there aren't any colliding message-id + CRC32s in this message or it wouldn't have been + added as a reference UID. */ + return msgid; + } + } + return NULL; + +} + +static const char * +mail_thread_node_get_msgid(struct thread_context *ctx, + const struct mail_thread_node *node, uint32_t idx) +{ + const char *msgids, *msgid, **p; + + p = array_idx_modifiable(&ctx->msgid_cache, idx); + if (*p != NULL) + return *p; + + if (node->uid == 0) + return NULL; + + if (mail_set_uid(ctx->tmp_mail, node->uid) <= 0) + return NULL; + if (node->exists) { + /* we can get the Message-ID directly */ + if (mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID, + &msgids) <= 0) + return NULL; + + msgid = message_id_get_next(&msgids); + } else { + /* find from a referencing message */ + msgid = mail_thread_find_msgid_crc32(ctx, node->msgid_crc32); + } + + if (msgid == NULL) + return NULL; + + *p = p_strdup(ctx->msgid_pool, msgid); + return *p; +} + +static bool mail_thread_hash_cmp(struct mail_hash_transaction *trans, + const void *key, uint32_t idx, void *context) +{ + const struct msgid_search_key *key_rec = key; + struct imap_thread_mailbox *tbox = context; + struct thread_context *ctx = tbox->ctx; + const struct mail_thread_node *node; + const char *msgid; + + node = mail_hash_lookup_idx(trans, idx); + if (key_rec->msgid_crc32 != node->msgid_crc32) + return FALSE; + + ctx->cmp_match_count++; + ctx->cmp_last_idx = idx; - pool_t pool; - pool_t temp_pool; + /* either a match or a collision, need to look closer */ + msgid = mail_thread_node_get_msgid(ctx, node, ctx->cmp_last_idx); + if (msgid == NULL) { + /* we couldn't figure out the Message-ID for whatever reason. + we'll need to fallback to rebuilding the whole thread */ + ctx->rebuild = TRUE; + return FALSE; + } + return strcmp(msgid, key_rec->msgid) == 0; +} + +static unsigned int mail_thread_hash_rec(const void *p) +{ + const struct mail_thread_node *rec = p; + + return rec->msgid_crc32; +} + +static int +mail_thread_hash_remap(struct mail_hash_transaction *trans, + const uint32_t *map, unsigned int map_size, + void *context ATTR_UNUSED) +{ + const struct mail_hash_header *hdr; + struct mail_thread_node *node; + uint32_t idx; + + hdr = mail_hash_get_header(trans); + for (idx = 1; idx < hdr->record_count; idx++) { + node = mail_hash_lookup_idx(trans, idx); + i_assert(!MAIL_HASH_RECORD_IS_DELETED(&node->rec)); + + if (node->parent_idx >= map_size) { + mail_hash_transaction_set_corrupted(trans, + "invalid parent_idx"); + return -1; + } + + node->parent_idx = map[node->parent_idx]; + } + return 0; +} - struct hash_table *msgid_hash; - struct hash_table *subject_hash; +static bool +imap_thread_try_use_hash(struct imap_thread_context *ctx, + struct mail_hash *hash, + const struct mailbox_status *status, bool reset, + struct mail_search_args *search_args) +{ + struct mailbox *box = ctx->cmd->client->mailbox; + const struct mail_hash_header *hdr; + struct mail_hash_transaction *hash_trans; + uint32_t last_seq, last_uid, seq1, seq2; + bool can_use = TRUE, shared_lock = FALSE; + int try, ret; + + last_seq = status->messages; + last_uid = status->uidnext - 1; - struct node root_node; - size_t root_count; /* not exact after prune_dummy_messages() */ + /* Each search condition requires their own separate thread index. + Pretty much all the clients use only "search all" threading, so + we don't need to worry about anything else. */ + if (search_args->args->next != NULL) { + /* too difficult to figure out if we could optimize this. + we most likely couldn't. */ + return FALSE; + } else if (search_args->args->type == SEARCH_ALL) { + /* optimize */ + } else if (search_args->args->type == SEARCH_SEQSET) { + const struct seq_range *range; + unsigned int count; + + range = array_get(&search_args->args->value.seqset, &count); + if (count == 1 && range[0].seq1 == 1) { + /* If we're searching 1..n, we might be able to + optimize this. This is at least useful for testing + incremental index updates if nothing else. :) */ + last_seq = range[0].seq2; + last_uid = 0; + } else { + return FALSE; + } + } else { + return FALSE; + } + + for (try = 0;; try++) { + if ((ret = mail_hash_lock_shared(hash)) < 0) + return FALSE; + if (ret > 0) + break; + if (try == 5) { + /* enough tries */ + return FALSE; + } - bool id_is_uid; -}; + /* doesn't exist, create a new hash */ + if ((ret = mail_hash_create_excl_locked(hash)) < 0) + return FALSE; + if (ret > 0) { + ctx->thread_ctx.hash_trans = + mail_hash_transaction_begin(hash, + status->messages); + return TRUE; + } + } + +again: + hash_trans = mail_hash_transaction_begin(hash, status->messages); + hdr = mail_hash_get_header(hash_trans); + if (reset) + mail_hash_reset(hash_trans); + else if (hdr->last_uid > last_uid) { + /* thread index is newer than our current mailbox view, + can't optimize */ + can_use = FALSE; + } else if (hdr->message_count > last_seq) { + /* messages have been expunged, but not removed from + the thread index. we don't know their Message-IDs + anymore, so we have to rebuild the index. */ + mail_hash_reset(hash_trans); + } else if (hdr->message_count > 0) { + /* non-empty hash. add only the new messages in there. */ + mailbox_get_seq_range(box, 1, hdr->last_uid, &seq1, &seq2); + + if (seq2 != hdr->message_count || + hdr->uid_validity != status->uidvalidity) { + /* some messages have been expunged. have to rebuild. */ + mail_hash_reset(hash_trans); + } else { + /* after all these checks, this is the only case we + can actually optimize. */ + struct mail_search_arg *arg = &ctx->tmp_search_arg; -static void mail_thread_input(struct thread_context *ctx, struct mail *mail); -static void mail_thread_finish(struct thread_context *ctx); + arg->type = SEARCH_SEQSET; + p_array_init(&arg->value.seqset, ctx->cmd->pool, 1); + if (seq2 == last_seq) { + /* no need to update the index, + search nothing */ + shared_lock = TRUE; + } else { + /* search next+1..n */ + seq_range_array_add_range(&arg->value.seqset, + seq2 + 1, last_seq); + } + ctx->tmp_search_arg.next = search_args->args; + search_args->args = &ctx->tmp_search_arg; + } + } else { + /* empty hash - make sure anyway that it gets reset */ + mail_hash_reset(hash_trans); + } -static void mail_thread_deinit(struct thread_context *ctx) + if (can_use && !shared_lock) { + mail_hash_transaction_end(&hash_trans); + mail_hash_unlock(hash); + if (mail_hash_lock_exclusive(hash, + MAIL_HASH_LOCK_FLAG_CREATE_MISSING) <= 0) + return FALSE; + shared_lock = TRUE; + goto again; + } + if (!can_use) { + mail_hash_transaction_end(&hash_trans); + mail_hash_unlock(hash); + return FALSE; + } else { + ctx->thread_ctx.hash_trans = hash_trans; + return TRUE; + } +} + +static void +imap_thread_context_init(struct imap_thread_mailbox *tbox, + struct imap_thread_context *ctx, + struct mail_search_args *search_args, bool reset) { - if (ctx->msgid_hash != NULL) - hash_destroy(&ctx->msgid_hash); - if (ctx->subject_hash != NULL) - hash_destroy(&ctx->subject_hash); + struct mailbox *box = ctx->cmd->client->mailbox; + struct mail_hash *hash = NULL; + struct mailbox_status status; + const struct mail_hash_header *hdr; + unsigned int count; + + mailbox_get_status(box, STATUS_MESSAGES | STATUS_UIDNEXT, &status); + if (imap_thread_try_use_hash(ctx, tbox->hash, &status, + reset, search_args)) + hash = tbox->hash; + else { + /* fallback to using in-memory hash */ + struct index_mailbox *ibox = (struct index_mailbox *)box; + + hash = mail_hash_alloc(ibox->index, NULL, + sizeof(struct mail_thread_node), + mail_thread_hash_key, + mail_thread_hash_rec, + mail_thread_hash_cmp, + mail_thread_hash_remap, + tbox); + if (mail_hash_lock_exclusive(hash, + MAIL_HASH_LOCK_FLAG_CREATE_MISSING) <= 0) + i_unreached(); + ctx->thread_ctx.hash_trans = + mail_hash_transaction_begin(hash, 0); + } + ctx->thread_ctx.hash = hash; + + /* initialize searching */ + ctx->t = mailbox_transaction_begin(box, 0); + ctx->search = mailbox_search_init(ctx->t, search_args, NULL); + ctx->thread_ctx.tmp_mail = mail_alloc(ctx->t, 0, NULL); + + hdr = mail_hash_get_header(ctx->thread_ctx.hash_trans); + count = status.messages < hdr->record_count ? 0 : + status.messages - hdr->record_count; + count += APPROX_MSG_EXTRA_COUNT; + ctx->thread_ctx.msgid_pool = + pool_alloconly_create(MEMPOOL_GROWING"msgids", + count * APPROX_MSGID_SIZE); + i_array_init(&ctx->thread_ctx.msgid_cache, + I_MAX(hdr->record_count, status.messages)); +} + +static int imap_thread_finish(struct imap_thread_mailbox *tbox, + struct imap_thread_context *ctx) +{ + int ret; + + mail_hash_transaction_end(&ctx->thread_ctx.hash_trans); + + ret = mailbox_search_deinit(&ctx->search); + mail_free(&ctx->thread_ctx.tmp_mail); + if (mailbox_transaction_commit(&ctx->t) < 0) + ret = -1; - pool_unref(&ctx->temp_pool); - pool_unref(&ctx->pool); + mail_hash_unlock(ctx->thread_ctx.hash); + if (ctx->thread_ctx.hash != tbox->hash) + mail_hash_free(&ctx->thread_ctx.hash); + + array_free(&ctx->thread_ctx.msgid_cache); + pool_unref(&ctx->thread_ctx.msgid_pool); + return ret; +} + +static int imap_thread_run(struct imap_thread_context *ctx) +{ + static const char *wanted_headers[] = { + HDR_MESSAGE_ID, HDR_IN_REPLY_TO, HDR_REFERENCES, HDR_SUBJECT, + NULL + }; + struct mailbox *box = mailbox_transaction_get_mailbox(ctx->t); + struct mailbox_header_lookup_ctx *headers_ctx; + struct mail_hash_header *hdr; + struct mail *mail; + bool changed = FALSE; + uint32_t prev_uid; + int ret = 0; + + hdr = mail_hash_get_header(ctx->thread_ctx.hash_trans); + prev_uid = hdr->last_uid; + + headers_ctx = mailbox_header_lookup_init(box, wanted_headers); + mail = mail_alloc(ctx->t, MAIL_FETCH_DATE, headers_ctx); + while (ret == 0 && + mailbox_search_next(ctx->search, mail) > 0) { + i_assert(mail->uid > prev_uid); + prev_uid = mail->uid; + changed = TRUE; + + T_BEGIN { + ret = mail_thread_add(&ctx->thread_ctx, mail); + } T_END; + } + mail_free(&mail); + mailbox_header_lookup_deinit(&headers_ctx); + + if (ret < 0 || ctx->thread_ctx.failed || ctx->thread_ctx.rebuild) { + mail_storage_set_internal_error(box->storage); + return -1; + } + + if (changed) { + /* even if write failed, we can still finish the thread + building */ + (void)mail_hash_transaction_write(ctx->thread_ctx.hash_trans); + } + + if (mail_thread_finish(ctx->thread_ctx.tmp_mail, + ctx->thread_ctx.hash_trans, + ctx->thread_type, ctx->cmd->client->output, + ctx->cmd->uid) < 0) { + mail_storage_set_internal_error(box->storage); + return -1; + } + return 0; } int imap_thread(struct client_command_context *cmd, struct mail_search_args *args, enum mail_thread_type type) { - static const char *wanted_headers[] = { - "message-id", "in-reply-to", "references", "subject", - NULL - }; - struct client *client = cmd->client; - struct mailbox_header_lookup_ctx *headers_ctx; - struct thread_context *ctx; - struct mail *mail; - int ret; - - if (type != MAIL_THREAD_REFERENCES) - i_fatal("Only REFERENCES threading supported"); - - ctx = t_new(struct thread_context, 1); - - /* initialize searching */ - ctx->t = mailbox_transaction_begin(client->mailbox, 0); - ctx->search_ctx = mailbox_search_init(ctx->t, args, NULL); - - ctx->box = client->mailbox; - ctx->output = client->output; - ctx->pool = pool_alloconly_create("thread_context", - sizeof(struct node) * - APPROX_MSG_COUNT); - ctx->temp_pool = pool_alloconly_create("thread_context temp", - APPROX_MSG_COUNT * - APPROX_MSGID_SIZE); - ctx->msgid_hash = hash_create(default_pool, ctx->temp_pool, - APPROX_MSG_COUNT*2, str_hash, - (hash_cmp_callback_t *)strcmp); - ctx->id_is_uid = cmd->uid; - - headers_ctx = mailbox_header_lookup_init(client->mailbox, - wanted_headers); - mail = mail_alloc(ctx->t, MAIL_FETCH_DATE, headers_ctx); - while (mailbox_search_next(ctx->search_ctx, mail) > 0) { - T_BEGIN { - mail_thread_input(ctx, mail); - } T_END; - } - - mail_free(&mail); - - o_stream_send_str(client->output, "* THREAD"); - mail_thread_finish(ctx); - o_stream_send_str(client->output, "\r\n"); - - ret = mailbox_search_deinit(&ctx->search_ctx); - if (mailbox_transaction_commit(&ctx->t) < 0) - ret = -1; - - mailbox_header_lookup_deinit(&headers_ctx); - mail_thread_deinit(ctx); - return ret; -} + struct imap_thread_mailbox *tbox = + IMAP_THREAD_CONTEXT(cmd->client->mailbox); + struct imap_thread_context *ctx; + int ret, try; -static void add_root(struct thread_context *ctx, struct node *node) -{ - node->parent = &ctx->root_node; - node->next = ctx->root_node.first_child; - ctx->root_node.first_child = node; - - ctx->root_count++; -} - -static struct node *create_node(struct thread_context *ctx, const char *msgid) -{ - struct node *node; - - node = p_new(ctx->pool, struct node, 1); - node->u.msgid = p_strdup(ctx->temp_pool, msgid); - - hash_insert(ctx->msgid_hash, node->u.msgid, node); - return node; -} - -static struct node *create_id_node(struct thread_context *ctx, - unsigned int id, time_t sent_date) -{ - struct node *node; - - node = p_new(ctx->pool, struct node, 1); - node->id = id; - node->sent_date = sent_date; - - add_root(ctx, node); - return node; -} + i_assert(tbox->ctx == NULL); + i_assert(type == MAIL_THREAD_REFERENCES || + type == MAIL_THREAD_REFERENCES2); -static struct node *update_message(struct thread_context *ctx, - const char *msgid, time_t sent_date, - unsigned int id) -{ - struct node *node; - - if (msgid == NULL) - return create_id_node(ctx, id, sent_date); - - node = hash_lookup(ctx->msgid_hash, msgid); - if (node == NULL) { - /* first time we see this message */ - node = create_node(ctx, msgid); - node->id = id; - node->sent_date = sent_date; - return node; - } - - if (node->id == 0) { - /* seen before in references */ - node->id = id; - node->sent_date = sent_date; - } else { - /* duplicate */ - node = create_id_node(ctx, id, sent_date); - } - - return node; -} - -static bool get_untokenized_msgid(const char **msgid_p, string_t *msgid) -{ - struct rfc822_parser_context parser; + ctx = t_new(struct imap_thread_context, 1); + tbox->ctx = &ctx->thread_ctx; - rfc822_parser_init(&parser, (const unsigned char *)*msgid_p, - strlen(*msgid_p), NULL); - - /* - msg-id = [CFWS] "<" id-left "@" id-right ">" [CFWS] - id-left = dot-atom-text / no-fold-quote / obs-id-left - id-right = dot-atom-text / no-fold-literal / obs-id-right - no-fold-quote = DQUOTE *(qtext / quoted-pair) DQUOTE - no-fold-literal = "[" *(dtext / quoted-pair) "]" - */ - - (void)rfc822_skip_lwsp(&parser); - - if (rfc822_parse_dot_atom(&parser, msgid) <= 0) - return FALSE; - - if (*parser.data != '@') - return FALSE; - parser.data++; - (void)rfc822_skip_lwsp(&parser); - - if (rfc822_parse_dot_atom(&parser, msgid) <= 0) - return FALSE; - - if (*parser.data != '>') - return FALSE; - - *msgid_p = (const char *)parser.data + 1; - return TRUE; -} - -static void strip_lwsp(char *str) -{ - /* @UNSAFE */ - char *dest; - - /* find the first lwsp */ - while (*str != ' ' && *str != '\t' && *str != '\r' && *str != '\n') { - if (*str == '\0') - return; - str++; - } - - for (dest = str; *str != '\0'; str++) { - if (*str != ' ' && *str != '\t' && *str != '\r' && *str != '\n') - *dest++ = *str; - } - *dest = '\0'; -} - -static const char *get_msgid(const char **msgid_p) -{ - const char *msgid = *msgid_p; - const char *p; - string_t *str = NULL; - bool found_at; - - if (*msgid_p == NULL) - return NULL; + for (try = 0; try < 2; try++) { + ctx->thread_type = type; + ctx->cmd = cmd; + imap_thread_context_init(tbox, ctx, args, try == 1); + ret = imap_thread_run(ctx); + if (imap_thread_finish(tbox, ctx) < 0) + ret = -1; - for (;;) { - /* skip until '<' */ - while (*msgid != '<') { - if (*msgid == '\0') { - *msgid_p = msgid; - return NULL; - } - msgid++; - } - msgid++; - - /* check it through quickly to see if it's already normalized */ - p = msgid; found_at = FALSE; - for (;; p++) { - if ((unsigned char)*p >= 'A') /* matches most */ - continue; - - if (*p == '@') - found_at = TRUE; - if (*p == '>' || *p == '"' || *p == '(' || *p == '[') - break; - - if (*p == '\0') { - *msgid_p = p; - return NULL; - } - } - - if (*p == '>') { - *msgid_p = p+1; - if (found_at) { - char *s; - - s = p_strdup_until(unsafe_data_stack_pool, - msgid, p); - strip_lwsp(s); - return s; - } + if (ret < 0 && ctx->thread_ctx.hash == tbox->hash) { + /* try again with in-memory hash */ + memset(ctx, 0, sizeof(*ctx)); } else { - /* ok, do it the slow way */ - *msgid_p = msgid; - - if (str == NULL) { - /* allocate only once, so we don't leak - with multiple invalid message IDs */ - str = t_str_new(256); - } - if (get_untokenized_msgid(msgid_p, str)) - return str_c(str); - } - - /* invalid message id, see if there's another valid one */ - msgid = *msgid_p; - } -} - -static void unlink_child(struct thread_context *ctx, - struct node *child, bool add_to_root) -{ - struct node **node; - - node = &child->parent->first_child; - for (; *node != NULL; node = &(*node)->next) { - if (*node == child) { - *node = child->next; break; } } - child->next = NULL; - if (!add_to_root) - child->parent = NULL; - else - add_root(ctx, child); -} - -static bool find_parent(struct node *node, struct node *parent) -{ - while (node != NULL) { - if (node == parent) - return TRUE; - node = node->parent; - } - return FALSE; -} - -static void link_node(struct thread_context *ctx, const char *parent_msgid, - struct node *child, bool replace) -{ - struct node *parent, **node; - - if (NODE_HAS_PARENT(ctx, child) && !replace) { - /* already got a parent, don't want to replace it */ - return; - } - - parent = hash_lookup(ctx->msgid_hash, parent_msgid); - if (parent == NULL) - parent = create_node(ctx, parent_msgid); - - if (child->parent == parent) { - /* already have this parent, ignore */ - return; - } - - if (child->parent != NULL) - unlink_child(ctx, child, FALSE); - - if (find_parent(parent, child)) { - /* this would create a loop, not allowed */ - return; - } - - /* link them */ - child->parent = parent; - - node = &parent->first_child; - while (*node != NULL) - node = &(*node)->next; - *node = child; -} - -static void link_message(struct thread_context *ctx, - const char *parent_msgid, const char *child_msgid, - bool replace) -{ - struct node *child; - - child = hash_lookup(ctx->msgid_hash, child_msgid); - if (child == NULL) - child = create_node(ctx, child_msgid); - - link_node(ctx, parent_msgid, child, replace); -} - -static bool link_references(struct thread_context *ctx, - struct node *node, const char *references) -{ - const char *parent_id, *child_id; - - parent_id = get_msgid(&references); - if (parent_id == NULL) - return FALSE; - - while ((child_id = get_msgid(&references)) != NULL) { - link_message(ctx, parent_id, child_id, FALSE); - parent_id = child_id; - } - - /* link the last message to us */ - link_node(ctx, parent_id, node, TRUE); - return TRUE; -} - -static void mail_thread_input(struct thread_context *ctx, struct mail *mail) -{ - const char *refid, *message_id, *in_reply_to, *references; - struct node *node; - time_t sent_date; - - if (mail_get_date(mail, &sent_date, NULL) < 0) - sent_date = 0; - if (sent_date == 0) { - if (mail_get_received_date(mail, &sent_date) < 0) - sent_date = 0; - } - - if (mail_get_first_header(mail, "message-id", &message_id) < 0) - message_id = NULL; - node = update_message(ctx, get_msgid(&message_id), sent_date, - ctx->id_is_uid ? mail->uid : mail->seq); - - /* link references */ - if (mail_get_first_header(mail, "references", &references) < 0) - references = NULL; - - if (!link_references(ctx, node, references)) { - if (mail_get_first_header(mail, "in-reply-to", - &in_reply_to) <= 0) - refid = NULL; - else - refid = get_msgid(&in_reply_to); - - if (refid != NULL) - link_node(ctx, refid, node, TRUE); - else { - /* no references, make sure it's not linked */ - if (node != NULL && NODE_HAS_PARENT(ctx, node)) - unlink_child(ctx, node, TRUE); - } - } -} - -static struct node *find_last_child(struct node *node) -{ - node = node->first_child; - while (node->next != NULL) - node = node->next; - - return node; -} - -static struct node **promote_children(struct node **parent) -{ - struct node *new_parent, *old_parent, *child; - - old_parent = *parent; - new_parent = old_parent->parent; - - child = old_parent->first_child; - *parent = child; - - for (;;) { - child->parent = new_parent; - if (child->next == NULL) - break; - child = child->next; - } - - child->next = old_parent->next; - return &child->next; + i_assert(!tbox->ctx->syncing); + tbox->ctx = NULL; + return ret; } -static void prune_dummy_messages(struct thread_context *ctx, - struct node **node_p) -{ - struct node **a; - - a = node_p; - while (*node_p != NULL) { - if ((*node_p)->first_child != NULL) - prune_dummy_messages(ctx, &(*node_p)->first_child); - - if (NODE_IS_DUMMY(*node_p)) { - if ((*node_p)->first_child == NULL) { - /* no children -> delete */ - *node_p = (*node_p)->next; - continue; - } else if (NODE_HAS_PARENT(ctx, *node_p) || - (*node_p)->first_child->next == NULL) { - /* promote children to our level, - deleting the dummy node */ - node_p = promote_children(node_p); - continue; - } - } - - node_p = &(*node_p)->next; - } -} - -static int node_cmp(struct node *a, struct node *b) +static bool imap_thread_index_is_up_to_date(struct thread_context *ctx, + struct mail_index_view *view) { - time_t date_a, date_b; - unsigned int id_a, id_b; - - date_a = a->id != 0 ? a->sent_date : a->first_child->sent_date; - date_b = b->id != 0 ? b->sent_date : b->first_child->sent_date; - - if (date_a != date_b && date_a != 0 && date_b != 0) - return date_a < date_b ? -1 : 1; - - id_a = a->id != 0 ? a->id : a->first_child->id; - id_b = b->id != 0 ? b->id : b->first_child->id; - return id_a < id_b ? -1 : 1; -} - -static struct node *sort_nodes(struct node *list) -{ - struct node *p, *q, *e, *tail; - size_t insize, nmerges, psize, qsize, i; - - i_assert(list != NULL); - - if (list->next == NULL) - return list; /* just one node */ - - insize = 1; - - for (;;) { - p = list; - list = NULL; - tail = NULL; - - nmerges = 0; /* count number of merges we do in this pass */ - while (p != 0) { - nmerges++; /* there exists a merge to be done */ - - /* step `insize' places along from p */ - q = p; - psize = 0; - for (i = 0; i < insize; i++) { - psize++; - q = q->next; - if (q == NULL) break; - } + const struct mail_index_header *hdr; + struct mail_hash_header *hash_hdr; + uint32_t seq1, seq2; - /* if q hasn't fallen off end, we have two lists to - merge */ - qsize = insize; - - /* now we have two lists; merge them */ - while (psize > 0 || (qsize > 0 && q != NULL)) { - /* decide whether next element of merge comes - from p or q */ - if (psize == 0) { - /* p is empty; e must come from q. */ - e = q; q = q->next; qsize--; - } else if (qsize == 0 || !q) { - /* q is empty; e must come from p. */ - e = p; p = p->next; psize--; - } else if (node_cmp(p, q) <= 0) { - /* First element of p is lower - (or same); e must come from p. */ - e = p; p = p->next; psize--; - } else { - /* First element of q is lower; - e must come from q. */ - e = q; q = q->next; qsize--; - } - - /* add the next element to the merged list */ - if (tail) - tail->next = e; - else - list = e; - tail = e; - } - - /* now p has stepped `insize' places along, - and q has too */ - p = q; - } - tail->next = NULL; - - /* If we have done only one merge, we're finished. */ - if (nmerges <= 1) { - /* allow for nmerges == 0, the empty list case */ - return list; - } - - /* Otherwise repeat, merging lists twice the size */ - insize *= 2; - } -} - -static void add_base_subject(struct thread_context *ctx, - const char *subject, struct node *node) -{ - struct node *hash_node; - char *hash_subject; - void *key, *value; - bool is_reply_or_forward; - - if (subject == NULL) - return; - - subject = imap_get_base_subject_cased(pool_datastack_create(), subject, - &is_reply_or_forward); - if (*subject == '\0') - return; - - if (!hash_lookup_full(ctx->subject_hash, subject, &key, &value)) { - hash_subject = p_strdup(ctx->temp_pool, subject); - hash_insert(ctx->subject_hash, hash_subject, node); - } else { - hash_subject = key; - hash_node = value; - - if (!NODE_IS_DUMMY(hash_node) && - (NODE_IS_DUMMY(node) || - (hash_node->u.info->reply && !is_reply_or_forward))) - hash_update(ctx->subject_hash, hash_subject, node); + hdr = mail_index_get_header(view); + hash_hdr = mail_hash_get_header(ctx->hash_trans); + if (hash_hdr->last_uid + 1 == hdr->next_uid) { + /* all messages have been added to hash */ + return hash_hdr->message_count == hdr->messages_count; } - node->u.info->base_subject = hash_subject; - node->u.info->reply = is_reply_or_forward; + if (!mail_index_lookup_seq_range(view, 1, hash_hdr->last_uid, + &seq1, &seq2)) + seq2 = 0; + return seq2 == hash_hdr->message_count; } -static void gather_base_subjects(struct thread_context *ctx) +static int +imap_thread_expunge_handler(struct mail_index_sync_map_ctx *sync_ctx, + uint32_t seq, const void *data, + void **sync_context ATTR_UNUSED, void *context) { - static const char *wanted_headers[] = { "subject", NULL }; - struct mailbox_header_lookup_ctx *headers_ctx; - struct node *node; - const char *subject; - unsigned int id; - uint32_t seq; + struct mailbox *box = context; + struct imap_thread_mailbox *tbox = IMAP_THREAD_CONTEXT(box); + struct thread_context *ctx = tbox->ctx; + struct mailbox_transaction_context *t; + uint32_t uid; - ctx->subject_hash = - hash_create(default_pool, ctx->temp_pool, ctx->root_count * 2, - str_hash, (hash_cmp_callback_t *)strcmp); + if (data == NULL) { + /* deinit */ + if (!ctx->syncing) + return 0; + if (ctx->hash != NULL) { + t = ctx->tmp_mail->transaction; - headers_ctx = mailbox_header_lookup_init(ctx->box, wanted_headers); - ctx->mail = mail_alloc(ctx->t, 0, headers_ctx); + if (!ctx->failed) + (void)mail_hash_transaction_write(ctx->hash_trans); + mail_hash_transaction_end(&ctx->hash_trans); + mail_hash_unlock(tbox->hash); - node = ctx->root_node.first_child; - for (; node != NULL; node = node->next) { - if (!NODE_IS_DUMMY(node)) - id = node->id; - else { - /* sort children, use the first one's id */ - node->first_child = sort_nodes(node->first_child); - id = node->first_child->id; - - node->u.info->sorted = TRUE; + mail_free(&ctx->tmp_mail); + /* don't commit. we're in the middle of syncing and + this transaction isn't marked as external. */ + (void)mailbox_transaction_rollback(&t); + array_free(&ctx->msgid_cache); + pool_unref(&ctx->msgid_pool); } + i_free_and_null(tbox->ctx); + return 0; + } + if (ctx == NULL) { + /* init */ + if (tbox->ctx != NULL) { + /* we're already in the middle of threading */ + return 0; + } + tbox->ctx = ctx = i_new(struct thread_context, 1); + ctx->syncing = TRUE; - if (!ctx->id_is_uid) - seq = id; - else - mailbox_get_seq_range(ctx->box, id, id, &seq, &seq); + /* we can't wait the lock in here or we could deadlock. */ + if (mail_hash_lock_exclusive(tbox->hash, + MAIL_HASH_LOCK_FLAG_TRY) <= 0) + return 0; + + ctx->hash = tbox->hash; + ctx->hash_trans = mail_hash_transaction_begin(ctx->hash, 0); + ctx->msgid_pool = pool_alloconly_create(MEMPOOL_GROWING"msgids", + 20 * APPROX_MSGID_SIZE); + i_array_init(&ctx->msgid_cache, 20); - if (seq != 0) { - mail_set_seq(ctx->mail, seq); - if (mail_get_first_header(ctx->mail, "subject", - &subject) > 0) { - T_BEGIN { - add_base_subject(ctx, subject, node); - } T_END; - } + t = mailbox_transaction_begin(box, 0); + ctx->tmp_mail = mail_alloc(t, 0, NULL); + + if (!imap_thread_index_is_up_to_date(ctx, sync_ctx->view)) { + ctx->failed = TRUE; + return 0; } + } else { + if (ctx->hash == NULL) { + /* locking had failed */ + return 0; + } + if (!ctx->syncing || ctx->failed) + return 0; } - mail_free(&ctx->mail); - mailbox_header_lookup_deinit(&headers_ctx); -} - -static void reset_children_parent(struct node *parent) -{ - struct node *node; - - for (node = parent->first_child; node != NULL; node = node->next) - node->parent = parent; + T_BEGIN { + mail_index_lookup_uid(sync_ctx->view, seq, &uid); + if (mail_thread_remove(ctx, uid) <= 0) + ctx->failed = TRUE; + } T_END; + return 0; } -static void merge_subject_threads(struct thread_context *ctx) +static int imap_thread_mailbox_close(struct mailbox *box) { - struct node **node_p, *node, *hash_node; - char *base_subject; - - for (node_p = &ctx->root_node.first_child; *node_p != NULL; ) { - node = *node_p; - - if (node->u.info == NULL) { - /* deleted node */ - *node_p = node->next; - continue; - } - - /* (ii) If the thread subject is empty, skip this message. */ - base_subject = node->u.info->base_subject; - if (base_subject == NULL) { - node_p = &node->next; - continue; - } - - /* (iii) Lookup the message associated with this thread - subject in the subject table. */ - hash_node = hash_lookup(ctx->subject_hash, base_subject); - i_assert(hash_node != NULL); - - /* (iv) If the message in the subject table is the current - message, skip this message. */ - if (hash_node == node) { - node_p = &node->next; - continue; - } - - /* Otherwise, merge the current message with the one in the - subject table using the following rules: */ - - if (NODE_IS_DUMMY(node) && - NODE_IS_DUMMY(hash_node)) { - /* If both messages are dummies, append the current - message's children to the children of the message in - the subject table (the children of both messages - become siblings), and then delete the current - message. */ - find_last_child(hash_node)->next = node->first_child; - - *node_p = node->next; - hash_node->u.info->sorted = FALSE; - } else if (NODE_IS_DUMMY(hash_node) || - (node->u.info->reply && !hash_node->u.info->reply)) { - /* If the message in the subject table is a dummy - and the current message is not, make the current - message a child of the message in the subject table - (a sibling of its children). - - If the current message is a reply or forward and - the message in the subject table is not, make the - current message a child of the message in the - subject table (a sibling of its children). */ - *node_p = node->next; + struct imap_thread_mailbox *tbox = IMAP_THREAD_CONTEXT(box); + int ret; - node->parent = hash_node; - node->next = hash_node->first_child; - hash_node->first_child = node; - - hash_node->u.info->sorted = FALSE; - } else { - /* Otherwise, create a new dummy message and make both - the current message and the message in the subject - table children of the dummy. Then replace the - message in the subject table with the dummy - message. */ - - /* create new nodes for the children - reusing - existing ones have problems since the other one - might have been handled already and we'd introduce - loops.. - - current node will be destroyed, hash_node will be - the dummy so we don't need to update hash */ - struct node *node1, *node2; - - node1 = p_new(ctx->pool, struct node, 1); - node2 = p_new(ctx->pool, struct node, 1); - - memcpy(node1, node, sizeof(struct node)); - memcpy(node2, hash_node, sizeof(struct node)); - - node1->parent = hash_node; - node2->parent = hash_node; - node1->next = node2; - node2->next = NULL; + i_assert(tbox->ctx == NULL); - reset_children_parent(node1); - reset_children_parent(node2); - - hash_node->id = 0; - hash_node->first_child = node1; - hash_node->u.info->reply = FALSE; - hash_node->u.info->sorted = FALSE; - - node->first_child = NULL; - node->u.info = NULL; - *node_p = node->next; - } - } -} - -static void sort_root_nodes(struct thread_context *ctx) -{ - struct node *node; - - /* sort the children first, they're needed to sort dummy root nodes */ - node = ctx->root_node.first_child; - for (; node != NULL; node = node->next) { - if (node->u.info == NULL) - continue; - - if (NODE_IS_DUMMY(node) && !node->u.info->sorted && - node->first_child != NULL) - node->first_child = sort_nodes(node->first_child); - } - - ctx->root_node.first_child = sort_nodes(ctx->root_node.first_child); + mail_hash_free(&tbox->hash); + ret = tbox->module_ctx.super.close(box); + i_free(tbox); + return ret; } -static bool send_nodes(struct thread_context *ctx, - string_t *str, struct node *node) +static void imap_thread_mailbox_opened(struct mailbox *box) { - if (node->next == NULL && NODE_HAS_PARENT(ctx, node)) { - /* no siblings - special case to avoid extra paranthesis */ - if (node->first_child == NULL) - str_printfa(str, "%u", node->id); - else { - str_printfa(str, "%u ", node->id); - send_nodes(ctx, str, sort_nodes(node->first_child)); - } - return TRUE; - } + struct index_mailbox *ibox = (struct index_mailbox *)box; + struct imap_thread_mailbox *tbox; + uint32_t ext_id; + + if (next_hook_mailbox_opened != NULL) + next_hook_mailbox_opened(box); + + tbox = i_new(struct imap_thread_mailbox, 1); + tbox->module_ctx.super = box->v; + box->v.close = imap_thread_mailbox_close; - while (node != NULL) { - if (str_len(str) + MAX_INT_STRLEN*2 + 3 >= OUTPUT_BUF_SIZE) { - /* string getting full, flush it */ - if (o_stream_send(ctx->output, - str_data(str), str_len(str)) < 0) - return FALSE; - str_truncate(str, 0); - } + tbox->hash = mail_hash_alloc(ibox->index, ".thread", + sizeof(struct mail_thread_node), + mail_thread_hash_key, + mail_thread_hash_rec, + mail_thread_hash_cmp, + mail_thread_hash_remap, + tbox); - if (node->first_child == NULL) - str_printfa(str, "(%u)", node->id); - else { - str_printfa(str, "(%u ", node->id); - send_nodes(ctx, str, sort_nodes(node->first_child)); - str_append_c(str, ')'); - } + ext_id = mail_index_ext_register(ibox->index, "thread", 0, 0, 0); + mail_index_register_expunge_handler(ibox->index, ext_id, TRUE, + imap_thread_expunge_handler, box); - node = node->next; - } - return TRUE; + MODULE_CONTEXT_SET(box, imap_thread_storage_module, tbox); } -static void send_roots(struct thread_context *ctx) +void imap_thread_init(void) { - struct node *node; - string_t *str; - - str = t_str_new(OUTPUT_BUF_SIZE); - str_append_c(str, ' '); - - /* sort root nodes again, they have been modified since the last time */ - sort_root_nodes(ctx); - - node = ctx->root_node.first_child; - for (; node != NULL; node = node->next) { - if (node->u.info == NULL) - continue; - - if (str_len(str) + MAX_INT_STRLEN*2 + 3 >= OUTPUT_BUF_SIZE) { - /* string getting full, flush it */ - if (o_stream_send(ctx->output, - str_data(str), str_len(str)) < 0) - return; - str_truncate(str, 0); - } - - str_append_c(str, '('); - if (!NODE_IS_DUMMY(node)) - str_printfa(str, "%u", node->id); - - if (node->first_child != NULL) { - if (!NODE_IS_DUMMY(node)) - str_append_c(str, ' '); - - if (!node->u.info->sorted) { - node->first_child = - sort_nodes(node->first_child); - } - - if (!send_nodes(ctx, str, node->first_child)) - return; - } - - str_append_c(str, ')'); - } - - (void)o_stream_send(ctx->output, str_data(str), str_len(str)); + next_hook_mailbox_opened = hook_mailbox_opened; + hook_mailbox_opened = imap_thread_mailbox_opened; } -static void mail_thread_finish(struct thread_context *ctx) +void imap_thread_deinit(void) { - struct hash_iterate_context *iter; - void *key, *value; - struct node *node; - - /* (2) save root nodes and drop the msgids */ - iter = hash_iterate_init(ctx->msgid_hash); - while (hash_iterate(iter, &key, &value)) { - struct node *node = value; - - if (node->parent == NULL) - add_root(ctx, node); - } - hash_iterate_deinit(&iter); - - /* drop the memory allocated for message-IDs and msgid_hash, - reuse their memory for base subjects */ - hash_destroy(&ctx->msgid_hash); - p_clear(ctx->temp_pool); - - if (ctx->root_node.first_child == NULL) { - /* no messages */ - return; - } - - /* (3) */ - prune_dummy_messages(ctx, &ctx->root_node.first_child); - - /* initialize the node->u.info for all root nodes */ - node = ctx->root_node.first_child; - for (; node != NULL; node = node->next) - node->u.info = p_new(ctx->pool, struct root_info, 1); - - /* (4) */ - sort_root_nodes(ctx); - - /* (5) Gather together messages under the root that have the same - base subject text. */ - gather_base_subjects(ctx); - - /* (5.C) Merge threads with the same thread subject. */ - merge_subject_threads(ctx); - - /* (6) Sort and send replies */ - T_BEGIN { - send_roots(ctx); - } T_END; + i_assert(hook_mailbox_opened == imap_thread_mailbox_opened); + hook_mailbox_opened = next_hook_mailbox_opened; } diff -r 882888286bf5 -r 929198b1f313 src/imap/imap-thread.h --- a/src/imap/imap-thread.h Mon Jun 09 03:23:34 2008 +0300 +++ b/src/imap/imap-thread.h Mon Jun 09 03:56:04 2008 +0300 @@ -4,10 +4,14 @@ enum mail_thread_type { MAIL_THREAD_NONE, MAIL_THREAD_ORDEREDSUBJECT, - MAIL_THREAD_REFERENCES + MAIL_THREAD_REFERENCES, + MAIL_THREAD_REFERENCES2 }; int imap_thread(struct client_command_context *cmd, struct mail_search_args *args, enum mail_thread_type type); +void imap_thread_init(void); +void imap_thread_deinit(void); + #endif diff -r 882888286bf5 -r 929198b1f313 src/imap/main.c --- a/src/imap/main.c Mon Jun 09 03:23:34 2008 +0300 +++ b/src/imap/main.c Mon Jun 09 03:56:04 2008 +0300 @@ -204,6 +204,7 @@ mailbox_list_register_all(); clients_init(); commands_init(); + imap_thread_init(); module_dir_init(modules); @@ -256,6 +257,7 @@ clients_deinit(); module_dir_unload(&modules); + imap_thread_deinit(); commands_deinit(); mail_storage_deinit(); dict_driver_unregister(&dict_driver_client); diff -r 882888286bf5 -r 929198b1f313 src/lib-index/Makefile.am --- a/src/lib-index/Makefile.am Mon Jun 09 03:23:34 2008 +0300 +++ b/src/lib-index/Makefile.am Mon Jun 09 03:56:04 2008 +0300 @@ -12,6 +12,7 @@ mail-cache-lookup.c \ mail-cache-transaction.c \ mail-cache-sync-update.c \ + mail-hash.c \ mail-index.c \ mail-index-dummy-view.c \ mail-index-fsck.c \ @@ -37,6 +38,7 @@ headers = \ mail-cache.h \ mail-cache-private.h \ + mail-hash.h \ mail-index.h \ mail-index-modseq.h \ mail-index-private.h \ diff -r 882888286bf5 -r 929198b1f313 src/lib-index/mail-hash.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-index/mail-hash.c Mon Jun 09 03:56:04 2008 +0300 @@ -0,0 +1,1109 @@ +/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "ioloop.h" +#include "array.h" +#include "primes.h" +#include "nfs-workarounds.h" +#include "file-dotlock.h" +#include "file-set-size.h" +#include "read-full.h" +#include "write-full.h" +#include "mmap-util.h" +#include "nfs-workarounds.h" +#include "mail-index-private.h" +#include "mail-hash.h" + +#include +#include +#include +#include + +/* How large to create the file initially */ +#define FILE_SIZE_INIT_PERCENTAGE 120 +/* How much larger to grow the file when it needs to be done */ +#define MAIL_HASH_GROW_PERCENTAGE 20 +/* Minimum hash size to use */ +#define MAIL_HASH_MIN_SIZE 109 + +#define MAIL_HASH_SHRINK_PRESSURE 0.3 +#define MAIL_HASH_GROW_PRESSURE 2 + +#define MAIL_HASH_TIMEOUT_SECS 60 + +struct mail_hash { + struct mail_index *index; + + hash_callback_t *key_hash_cb; + mail_hash_ctx_cmp_callback_t *key_compare_cb; + mail_hash_remap_callback_t *remap_callback; + hash_callback_t *rec_hash_cb; + void *cb_context; + unsigned int transaction_count; + + char *filepath; + char *suffix; + int fd; + unsigned int record_size; + + dev_t dev; + ino_t ino; + + void *mmap_base; + size_t mmap_size; + + time_t mtime, mapped_mtime; + size_t change_offset_start, change_offset_end; + + int lock_type; + struct file_lock *file_lock; + struct dotlock *dotlock; + struct dotlock_settings dotlock_settings; + + const struct mail_hash_header *hdr; + + unsigned int in_memory:1; + unsigned int recreate:1; + unsigned int recreated:1; +}; + +#define HASH_RECORD_IDX(trans, idx) \ + PTR_OFFSET((trans)->records_base, (idx) * (trans)->hdr.record_size) + +struct mail_hash_transaction { + struct mail_hash *hash; + + struct mail_hash_header hdr; + /* hash size is [hdr.hash_size] */ + uint32_t *hash_base; + /* record [0] is always unused */ + void *records_base; + /* number of records in records_base. + base_count + inserts.count == hdr.record_count */ + unsigned int base_count; + + /* bit array of modified data. each bit represents 1024 bytes of the + hash file. used only for data read into memory from hash (not + for mmaped data) */ + ARRAY_TYPE(uint32_t) updates; + /* Records inserted within this transaction */ + ARRAY_TYPE(mail_hash_record) inserts; + unsigned int next_grow_hashed_count; + + uint32_t *hash_buf; + uint32_t records_base_1; /* used as records_base if base_count=1 */ + + unsigned int failed:1; + unsigned int mapped:1; +}; + +struct mail_hash_iterate_context { + struct mail_hash_transaction *trans; + uint32_t next_idx; + unsigned int iter_count; +}; + +const struct dotlock_settings default_dotlock_settings = { + MEMBER(temp_prefix) NULL, + MEMBER(lock_suffix) NULL, + + MEMBER(timeout) 10, + MEMBER(stale_timeout) 30 +}; + +static void mail_hash_set_syscall_error(struct mail_hash *hash, + const char *function) +{ + if (ENOSPACE(errno)) { + hash->index->nodiskspace = TRUE; + return; + } + + mail_index_set_error(hash->index, + "%s failed with index hash file %s: %m", + function, hash->filepath); +} + +void mail_hash_set_corrupted(struct mail_hash *hash, const char *error) +{ + mail_index_set_error(hash->index, "Corrupted index hash file %s: %s", + hash->filepath, error); + if (unlink(hash->filepath) < 0 && errno != ENOENT) + mail_hash_set_syscall_error(hash, "unlink()"); +} + +static inline struct mail_hash_record * +mail_hash_idx(struct mail_hash_transaction *trans, uint32_t idx) +{ + if (idx < trans->base_count) + return HASH_RECORD_IDX(trans, idx); + + i_assert(idx < trans->hdr.record_count); + return array_idx_modifiable(&trans->inserts, idx - trans->base_count); +} + +static void mail_hash_file_close(struct mail_hash *hash) +{ + i_assert(hash->transaction_count == 0); + + if (hash->file_lock != NULL) + file_lock_free(&hash->file_lock); + + if (hash->mmap_base != NULL) { + if (munmap(hash->mmap_base, hash->mmap_size) < 0) + mail_hash_set_syscall_error(hash, "munmap()"); + hash->mmap_base = NULL; + hash->mmap_size = 0; + } + hash->ino = 0; + hash->mapped_mtime = 0; + + if (hash->fd != -1) { + if (close(hash->fd) < 0) + mail_hash_set_syscall_error(hash, "close()"); + hash->fd = -1; + } + + hash->hdr = NULL; + hash->recreate = FALSE; + hash->recreated = FALSE; +} + +struct mail_hash * +mail_hash_alloc(struct mail_index *index, const char *suffix, + unsigned int record_size, + hash_callback_t *key_hash_cb, + hash_callback_t *rec_hash_cb, + mail_hash_ctx_cmp_callback_t *key_compare_cb, + mail_hash_remap_callback_t *remap_callback, + void *context) +{ + struct mail_hash *hash; + + i_assert(record_size >= sizeof(struct mail_hash_record)); + + hash = i_new(struct mail_hash, 1); + hash->index = index; + hash->in_memory = MAIL_INDEX_IS_IN_MEMORY(index) || suffix == NULL; + hash->filepath = hash->in_memory ? i_strdup("(in-memory hash)") : + i_strconcat(index->filepath, suffix, NULL); + hash->suffix = i_strdup(suffix); + hash->record_size = record_size; + hash->fd = -1; + hash->lock_type = F_UNLCK; + hash->dotlock_settings = default_dotlock_settings; + hash->dotlock_settings.use_excl_lock = index->use_excl_dotlocks; + hash->dotlock_settings.nfs_flush = index->nfs_flush; + + hash->key_hash_cb = key_hash_cb; + hash->rec_hash_cb = rec_hash_cb; + hash->key_compare_cb = key_compare_cb; + hash->remap_callback = remap_callback, + hash->cb_context = context; + return hash; +} + +void mail_hash_free(struct mail_hash **_hash) +{ + struct mail_hash *hash = *_hash; + + *_hash = NULL; + + mail_hash_file_close(hash); + i_free(hash->filepath); + i_free(hash->suffix); + i_free(hash); +} + +static void +mail_hash_header_init(struct mail_hash *hash, unsigned int hash_size, + struct mail_hash_header *hdr) +{ + memset(hdr, 0, sizeof(*hdr)); + hdr->version = MAIL_HASH_VERSION; + hdr->base_header_size = sizeof(*hdr); + hdr->header_size = hdr->base_header_size; + hdr->record_size = hash->record_size; + /* note that since the index may not have been synced yet, the + uid_validity may be 0 */ + hdr->uid_validity = hash->index->map->hdr.uid_validity; + + hdr->hash_size = I_MAX(primes_closest(hash_size), MAIL_HASH_MIN_SIZE); + hdr->record_count = 1; /* [0] always exists */ +} + +static bool mail_hash_check_header(struct mail_hash *hash, + const struct mail_hash_header *hdr) +{ + uoff_t file_size; + + if (hdr->version != MAIL_HASH_VERSION || + (hdr->last_uid != 0 && + hdr->uid_validity != hash->index->map->hdr.uid_validity) || + (hdr->corrupted && hash->change_offset_end == 0)) { + /* silent rebuild */ + if (unlink(hash->filepath) < 0 && errno != ENOENT) + mail_hash_set_syscall_error(hash, "unlink()"); + return FALSE; + } + + if (hdr->record_size != hash->record_size) { + mail_hash_set_corrupted(hash, "record_size mismatch"); + return FALSE; + } + if (hdr->base_header_size != sizeof(*hdr)) { + mail_hash_set_corrupted(hash, "base_header_size mismatch"); + return FALSE; + } + if (hdr->header_size < hdr->base_header_size) { + mail_hash_set_corrupted(hash, "Invalid header_size"); + return FALSE; + } + if (hdr->record_count == 0) { + mail_hash_set_corrupted(hash, "Invalid record_count"); + return FALSE; + } + if (hdr->hashed_count > hdr->record_count) { + mail_hash_set_corrupted(hash, "Invalid hashed_count"); + return FALSE; + } + if (hdr->message_count > hdr->record_count - 1) { + mail_hash_set_corrupted(hash, "Invalid message_count"); + return FALSE; + } + if (hdr->last_uid < hdr->message_count) { + mail_hash_set_corrupted(hash, "Invalid last_uid"); + return FALSE; + } + if (hdr->uid_validity == 0 && hdr->message_count > 0) { + mail_hash_set_corrupted(hash, "Zero uidvalidity"); + return FALSE; + } + + if (hdr->hash_size < primes_closest(1)) { + mail_hash_set_corrupted(hash, "Invalid hash_size"); + return FALSE; + } + + file_size = hdr->header_size + + hdr->hash_size * sizeof(uint32_t) + + hdr->record_count * hdr->record_size; + if (hash->mmap_size < file_size) { + mail_hash_set_corrupted(hash, "File too small"); + return FALSE; + } + return TRUE; +} + +static int mail_hash_file_fstat(struct mail_hash *hash, struct stat *st_r) +{ + if (fstat(hash->fd, st_r) < 0) { + mail_hash_set_syscall_error(hash, "fstat()"); + return -1; + } + hash->dev = st_r->st_dev; + hash->ino = st_r->st_ino; + return 0; +} + +static int mail_hash_file_map(struct mail_hash *hash, bool full) +{ + struct stat st; + + i_assert(hash->transaction_count == 0); + i_assert(hash->lock_type != F_UNLCK); + + if (mail_hash_file_fstat(hash, &st) < 0) + return -1; + + if (st.st_size < (off_t)sizeof(*hash->hdr)) { + mail_hash_set_corrupted(hash, "File too small"); + return 0; + } + + if (!hash->index->mmap_disable) { + if (hash->mmap_base != NULL) { + if (munmap(hash->mmap_base, hash->mmap_size) < 0) + mail_hash_set_syscall_error(hash, "munmap()"); + } + hash->mmap_size = st.st_size; + hash->mmap_base = mmap(NULL, hash->mmap_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, hash->fd, 0); + if (hash->mmap_base == MAP_FAILED) { + hash->mmap_size = 0; + hash->mmap_base = NULL; + mail_hash_set_syscall_error(hash, "mmap()"); + return -1; + } + hash->mapped_mtime = st.st_mtime; + } else { + //FIXME + } + hash->mtime = st.st_mtime; + hash->hdr = hash->mmap_base; + return 1; +} + +static int mail_hash_file_map_header(struct mail_hash *hash) +{ + int ret; + + if (hash->fd == -1) { + hash->hdr = NULL; + return 1; + } + + if ((ret = mail_hash_file_map(hash, FALSE)) <= 0) + return ret; + + return mail_hash_check_header(hash, hash->hdr) ? 1 : 0; +} + +static int mail_hash_open_fd(struct mail_hash *hash) +{ + hash->fd = nfs_safe_open(hash->filepath, O_RDWR); + if (hash->fd == -1) { + if (errno == ENOENT) + return 0; + mail_hash_set_syscall_error(hash, "open()"); + return -1; + } + return 1; +} + +static int mail_hash_reopen_if_needed(struct mail_hash *hash) +{ + struct stat st; + + if (hash->fd == -1) + return mail_hash_open_fd(hash); + + if (hash->index->nfs_flush) + nfs_flush_file_handle_cache(hash->filepath); + + if (hash->ino == 0) { + if (mail_hash_file_fstat(hash, &st) < 0) + return -1; + } + + if (nfs_safe_stat(hash->filepath, &st) < 0) { + if (errno != ENOENT) { + mail_hash_set_syscall_error(hash, "stat()"); + return -1; + } + } else if (st.st_ino == hash->ino && CMP_DEV_T(st.st_dev, hash->dev)) { + /* the file looks the same */ + if (!hash->index->nfs_flush || fstat(hash->fd, &st) == 0) { + /* it is the same */ + return 0; + } + if (errno != ESTALE) { + mail_hash_set_syscall_error(hash, "fstat()"); + return -1; + } + /* ESTALE - another file renamed over */ + } + mail_hash_file_close(hash); + return mail_hash_open_fd(hash); +} + +static int +mail_hash_file_lock(struct mail_hash *hash, int lock_type, bool try_lock) +{ + enum dotlock_create_flags flags; + unsigned int timeout; + int ret; + + i_assert(hash->fd != -1); + + if (hash->index->lock_method != FILE_LOCK_METHOD_DOTLOCK) { + i_assert(hash->file_lock == NULL); + + timeout = !try_lock ? MAIL_HASH_TIMEOUT_SECS : 0; + ret = file_wait_lock(hash->fd, hash->filepath, lock_type, + hash->index->lock_method, + timeout, &hash->file_lock); + if (ret < 0 || (ret == 0 && !try_lock)) { + mail_hash_set_syscall_error(hash, + "file_wait_lock()"); + } + } else { + i_assert(hash->dotlock == NULL); + + flags = try_lock ? DOTLOCK_CREATE_FLAG_NONBLOCK : 0; + ret = file_dotlock_create(&hash->dotlock_settings, + hash->filepath, flags, + &hash->dotlock); + if (ret < 0 || (ret == 0 && !try_lock)) { + mail_hash_set_syscall_error(hash, + "file_dotlock_create()"); + } + } + return ret; +} + +int mail_hash_lock_shared(struct mail_hash *hash) +{ + int ret; + + i_assert(hash->lock_type == F_UNLCK); + + if (hash->in_memory) { + if (hash->hdr == NULL) + return 0; + hash->lock_type = F_RDLCK; + return 1; + } + + if (hash->fd == -1) { + if ((ret = mail_hash_open_fd(hash)) <= 0) + return ret; + } + + do { + if ((ret = mail_hash_file_lock(hash, F_RDLCK, FALSE)) <= 0) + return -1; + } while ((ret = mail_hash_reopen_if_needed(hash)) > 0); + if (ret < 0 || hash->fd == -1) + return ret; + + hash->lock_type = F_RDLCK; + mail_index_flush_read_cache(hash->index, hash->filepath, + hash->fd, TRUE); + if ((ret = mail_hash_file_map_header(hash)) <= 0) { + mail_hash_unlock(hash); + return ret; + } + return 1; +} + +static int +mail_hash_lock_exclusive_fd(struct mail_hash *hash, + enum mail_hash_lock_flags flags, bool *created_r) +{ + bool exists = TRUE; + int ret; + + i_assert(hash->file_lock == NULL); + i_assert(hash->dotlock == NULL); + + *created_r = FALSE; + + if (hash->index->lock_method == FILE_LOCK_METHOD_DOTLOCK) { + /* use dotlocking */ + } else if (hash->fd == -1 && (ret = mail_hash_open_fd(hash)) <= 0) { + if (ret < 0 || + (flags & MAIL_HASH_LOCK_FLAG_CREATE_MISSING) == 0) + return ret; + /* the file doesn't exist - we need to use dotlocking */ + exists = FALSE; + } else { + /* first try to lock the file descriptor */ + ret = mail_hash_file_lock(hash, F_WRLCK, TRUE); + if (ret != 0) { + /* success / error */ + return ret; + } + if ((flags & MAIL_HASH_LOCK_FLAG_TRY) != 0) + return 0; + + /* already locked. if it's only read-locked, we can + overwrite the file. first wait for a shared lock. */ + if (mail_hash_lock_shared(hash) <= 0) + return -1; + /* try once again if we can upgrade our shared lock to + an exclusive lock */ + ret = file_lock_try_update(hash->file_lock, F_WRLCK); + if (ret != 0) + return ret; + /* still no luck - fallback to dotlocking */ + } + if (file_dotlock_create(&hash->dotlock_settings, hash->filepath, + 0, &hash->dotlock) <= 0) { + mail_hash_set_syscall_error(hash, "file_dotlock_create()"); + return -1; + } + if (!exists) { + /* file didn't exist - see if someone just created it */ + i_assert(hash->fd == -1); + ret = mail_hash_open_fd(hash); + if (ret != 0) { + (void)file_dotlock_delete(&hash->dotlock); + if (ret < 0) + return -1; + + /* the file was created - we need to have it locked, + so retry this operation */ + return mail_hash_lock_exclusive_fd(hash, flags, + created_r); + } + *created_r = TRUE; + } + /* other sessions are reading the file, we must not overwrite */ + hash->recreate = TRUE; + return 1; +} + +static int mail_hash_lock_excl_full(struct mail_hash *hash, + enum mail_hash_lock_flags flags, + bool *created_r) +{ + bool create_missing = (flags & MAIL_HASH_LOCK_FLAG_CREATE_MISSING) != 0; + int ret; + + i_assert(hash->lock_type == F_UNLCK); + + if (hash->in_memory) { + if (hash->hdr == NULL && !create_missing) + return 0; + hash->lock_type = F_WRLCK; + return 1; + } + + if ((ret = mail_hash_lock_exclusive_fd(hash, flags, created_r)) <= 0) { + mail_hash_unlock(hash); + return ret; + } + hash->lock_type = F_WRLCK; + + mail_index_flush_read_cache(hash->index, hash->filepath, + hash->fd, TRUE); + if ((ret = mail_hash_file_map_header(hash)) <= 0) { + mail_hash_unlock(hash); + if (ret == 0 && create_missing) { + /* the broken file was unlinked - try again */ + mail_hash_file_close(hash); + return mail_hash_lock_excl_full(hash, flags, created_r); + } + return ret; + } + return 1; +} + +int mail_hash_lock_exclusive(struct mail_hash *hash, + enum mail_hash_lock_flags flags) +{ + bool created; + + return mail_hash_lock_excl_full(hash, flags, &created); +} + +void mail_hash_unlock(struct mail_hash *hash) +{ + if (hash->recreated) + mail_hash_file_close(hash); + + if (hash->file_lock != NULL) + file_unlock(&hash->file_lock); + if (hash->dotlock != NULL) + (void)file_dotlock_delete(&hash->dotlock); + hash->lock_type = F_UNLCK; +} + +int mail_hash_create_excl_locked(struct mail_hash *hash) +{ + bool created; + + if (mail_hash_lock_excl_full(hash, MAIL_HASH_LOCK_FLAG_CREATE_MISSING, + &created) <= 0) + return -1; + + if (!created) { + mail_hash_unlock(hash); + return 0; + } + return 1; +} + +static void mail_hash_resize(struct mail_hash_transaction *trans) +{ + struct mail_hash_record *rec; + unsigned int idx, new_size, hash_idx, hash_key; + + new_size = I_MAX(primes_closest(trans->hdr.hashed_count), + MAIL_HASH_MIN_SIZE); + i_assert(new_size != trans->hdr.hash_size); + trans->hdr.hash_size = new_size; + + i_free(trans->hash_buf); + trans->hash_buf = i_new(uint32_t, trans->hdr.hash_size); + trans->hash_base = trans->hash_buf; + + for (idx = 1; idx < trans->hdr.record_count; idx++) { + rec = mail_hash_idx(trans, idx); + if (MAIL_HASH_RECORD_IS_DELETED(rec)) + continue; + + hash_key = trans->hash->rec_hash_cb(rec); + if (hash_key == 0) + continue; + + /* found a hashed record, move it to its new position */ + hash_idx = hash_key % trans->hdr.hash_size; + rec->next_idx = trans->hash_buf[hash_idx]; + trans->hash_buf[hash_idx] = idx; + } + + trans->next_grow_hashed_count = + trans->hdr.hash_size * MAIL_HASH_GROW_PRESSURE; + i_assert(trans->hdr.hashed_count < trans->next_grow_hashed_count); +} + +struct mail_hash_transaction * +mail_hash_transaction_begin(struct mail_hash *hash, unsigned int min_hash_size) +{ + struct mail_hash_transaction *trans; + + i_assert(hash->lock_type != F_UNLCK); + + trans = i_new(struct mail_hash_transaction, 1); + trans->hash = hash; + if (hash->hdr != NULL) + trans->hdr = *hash->hdr; + else { + mail_hash_header_init(hash, min_hash_size, &trans->hdr); + trans->mapped = TRUE; + } + trans->base_count = trans->hdr.record_count; + if (trans->base_count <= 1) { + /* empty hash */ + trans->hash_buf = i_new(uint32_t, trans->hdr.hash_size); + trans->hash_base = trans->hash_buf; + trans->records_base = &trans->records_base_1; + } else { + trans->hash_base = + PTR_OFFSET(hash->mmap_base, hash->hdr->header_size); + trans->records_base = &trans->hash_base[hash->hdr->hash_size]; + } + + trans->next_grow_hashed_count = + trans->hdr.hash_size * MAIL_HASH_GROW_PRESSURE; + hash->transaction_count++; + return trans; +} + +int mail_hash_transaction_write(struct mail_hash_transaction *trans) +{ + struct mail_hash *hash = trans->hash; + const struct mail_hash_record *inserts; + unsigned int size, count, existing_size; + const char *temp_path = NULL; + uoff_t offset; + float nodes_per_list; + int fd = hash->fd; + + i_assert(hash->lock_type == F_WRLCK); + + if (trans->failed) + return -1; + if (!array_is_created(&trans->inserts) && + !array_is_created(&trans->updates)) { + /* nothing changed */ + return 0; + } + + /* see if hash needs resizing */ + nodes_per_list = (float)trans->hdr.hashed_count / + (float)trans->hdr.hash_size; + if ((nodes_per_list < MAIL_HASH_SHRINK_PRESSURE && + trans->hdr.hash_size > MAIL_HASH_MIN_SIZE) || + nodes_per_list > MAIL_HASH_GROW_PRESSURE) + mail_hash_resize(trans); + + if (trans->hash->in_memory) + return 0; + + if (hash->recreate || hash->hdr->hash_size != trans->hdr.hash_size) { + /* recreate the file instead of overwriting */ + fd = -1; + } + + existing_size = sizeof(trans->hdr) + + trans->hdr.hash_size * sizeof(uint32_t) + + trans->base_count * trans->hdr.record_size; + if (fd == -1) { + temp_path = t_strconcat(hash->filepath, ".tmp", NULL); + fd = open(temp_path, O_RDWR | O_CREAT | O_TRUNC, 0600); + if (fd == -1) { + if (ENOSPACE(errno)) { + hash->index->nodiskspace = TRUE; + return -1; + } + mail_index_set_error(hash->index, + "creat(%s) failed: %m", temp_path); + return -1; + } + } + + if (pwrite_full(fd, &trans->hdr, sizeof(trans->hdr), 0) < 0) { + mail_hash_set_syscall_error(hash, "pwrite()"); + return -1; + } + /* FIXME: use updates array */ + offset = sizeof(trans->hdr); + if (pwrite_full(fd, trans->hash_base, + trans->hdr.hash_size * sizeof(uint32_t), offset) < 0) { + mail_hash_set_syscall_error(hash, "pwrite()"); + return -1; + } + offset += trans->hdr.hash_size * sizeof(uint32_t); + /* if there's only the first null record, don't bother writing it. + especially because then records_base may point to sizeof(uint32_t) + instead of hdr.record_size */ + if (trans->base_count > 1) { + if (pwrite_full(fd, trans->records_base, + trans->base_count * trans->hdr.record_size, + offset) < 0) { + mail_hash_set_syscall_error(hash, "pwrite()"); + return -1; + } + } + + /* write the new data */ + if (array_is_created(&trans->inserts)) { + inserts = array_get(&trans->inserts, &count); + size = count * trans->hdr.record_size; + if (pwrite_full(fd, inserts, size, existing_size) < 0) { + mail_hash_set_syscall_error(hash, "pwrite()"); + return -1; + } + } + if (temp_path != NULL) { + if (rename(temp_path, hash->filepath) < 0) { + mail_index_set_error(hash->index, + "rename(%s, %s) failed: %m", + temp_path, hash->filepath); + return -1; + } + if (close(fd) < 0) + mail_hash_set_syscall_error(hash, "close()"); + /* we must not overwrite before reopening the file */ + hash->recreate = TRUE; + hash->recreated = TRUE; + } + return 0; +} + +void mail_hash_transaction_end(struct mail_hash_transaction **_trans) +{ + struct mail_hash_transaction *trans = *_trans; + + *_trans = NULL; + + trans->hash->transaction_count--; + if (array_is_created(&trans->inserts)) + array_free(&trans->inserts); + if (array_is_created(&trans->updates)) + array_free(&trans->updates); + i_free(trans->hash_buf); + i_free(trans); +} + +bool mail_hash_transaction_is_broken(struct mail_hash_transaction *trans) +{ + return trans->failed; +} + +void mail_hash_transaction_set_corrupted(struct mail_hash_transaction *trans, + const char *error) +{ + trans->failed = TRUE; + mail_hash_set_corrupted(trans->hash, error); +} + +void mail_hash_reset(struct mail_hash_transaction *trans) +{ + mail_hash_header_init(trans->hash, trans->hdr.hash_size / 2, + &trans->hdr); + trans->mapped = TRUE; + trans->base_count = trans->hdr.record_count; + i_assert(trans->base_count == 1); + + i_free(trans->hash_buf); + trans->hash_buf = i_new(uint32_t, trans->hdr.hash_size); + trans->hash_base = trans->hash_buf; + trans->records_base = &trans->records_base_1; +} + +int mail_hash_map_file(struct mail_hash_transaction *trans) +{ + if (!trans->mapped) { + if (mail_hash_file_map(trans->hash, TRUE) <= 0) { + trans->failed = TRUE; + return -1; + } + trans->mapped = TRUE; + } + return 0; +} + +struct mail_hash_header * +mail_hash_get_header(struct mail_hash_transaction *trans) +{ + return &trans->hdr; +} + +static void mail_hash_iterate_init(struct mail_hash_iterate_context *iter, + struct mail_hash_transaction *trans, + uint32_t start_idx) +{ + memset(iter, 0, sizeof(*iter)); + iter->trans = trans; + iter->next_idx = start_idx; +} + +static int mail_hash_iterate_next(struct mail_hash_iterate_context *iter, + uint32_t *idx_r) +{ + struct mail_hash_record *rec; + uint32_t idx = iter->next_idx; + + if (idx == 0) + return 0; + if (unlikely(idx >= iter->trans->hdr.record_count)) { + mail_hash_transaction_set_corrupted(iter->trans, + "Index points outside file"); + return -1; + } + rec = mail_hash_idx(iter->trans, idx); + iter->next_idx = rec->next_idx; + + if (++iter->iter_count > iter->trans->hdr.record_count) { + /* we've iterated through more indexes than there exist. + we must be looping. */ + mail_hash_transaction_set_corrupted(iter->trans, + "next_iter loops"); + return -1; + } + *idx_r = idx; + return 1; +} + +void *mail_hash_lookup(struct mail_hash_transaction *trans, const void *key, + uint32_t *idx_r) +{ + struct mail_hash *hash = trans->hash; + struct mail_hash_iterate_context iter; + unsigned int hash_idx; + uint32_t idx; + int ret; + + hash_idx = hash->key_hash_cb(key) % trans->hdr.hash_size; + mail_hash_iterate_init(&iter, trans, trans->hash_base[hash_idx]); + + while ((ret = mail_hash_iterate_next(&iter, &idx)) > 0) { + if (hash->key_compare_cb(trans, key, idx, hash->cb_context)) + break; + } + + if (ret <= 0) { + *idx_r = 0; + return NULL; + } else { + *idx_r = idx; + return mail_hash_idx(trans, idx); + } +} + +void *mail_hash_lookup_idx(struct mail_hash_transaction *trans, uint32_t idx) +{ + i_assert(idx > 0); + + if (idx >= trans->hdr.record_count) { + mail_hash_transaction_set_corrupted(trans, + "Index points outside file"); + /* return pointer to the first dummy record */ + idx = 0; + } + + return mail_hash_idx(trans, idx); +} + +static uoff_t +mail_hash_idx_to_offset(struct mail_hash_transaction *trans, uint32_t idx) +{ + return trans->hdr.header_size + + trans->hdr.hash_size * sizeof(uint32_t) + + trans->hdr.record_size * idx; +} + +static void +mail_hash_update_offset(struct mail_hash_transaction *trans, + uoff_t offset, unsigned int size) +{ + uint32_t *p; + unsigned int pos = offset / 1024; + unsigned int pos2 = (offset + size - 1) / 1024; + + if (!array_is_created(&trans->updates)) + i_array_init(&trans->updates, I_MAX(pos, 256)); + + while (pos <= pos2) { + p = array_idx_modifiable(&trans->updates, pos / 32); + *p |= 1 << (pos % 32); + pos++; + } +} + +static void mail_hash_update_hash_idx(struct mail_hash_transaction *trans, + uint32_t hash_idx) +{ + size_t offset; + + offset = trans->hdr.header_size + hash_idx * sizeof(uint32_t); + mail_hash_update_offset(trans, offset, sizeof(uint32_t)); +} + +static void mail_hash_insert_idx(struct mail_hash_transaction *trans, + const void *value, uint32_t *idx_r) +{ + struct mail_hash_record *rec; + uint32_t idx; + + if (trans->hdr.first_hole_idx != 0) { + /* allocate the record from the first hole */ + idx = trans->hdr.first_hole_idx; + rec = mail_hash_idx(trans, idx); + + memcpy(&trans->hdr.first_hole_idx, rec + 1, + sizeof(trans->hdr.first_hole_idx)); + mail_hash_update(trans, idx); + } else { + idx = trans->hdr.record_count++; + if (!array_is_created(&trans->inserts)) { + array_create(&trans->inserts, default_pool, + trans->hdr.record_size, 128); + } + rec = array_append_space(&trans->inserts); + } + + memcpy(rec, value, trans->hdr.record_size); + rec->next_idx = 0; + + *idx_r = idx; +} + +static void mail_hash_insert_hash(struct mail_hash_transaction *trans, + uint32_t hash_key, uint32_t idx) +{ + struct mail_hash_record *rec; + uint32_t hash_idx; + + if (trans->hdr.hashed_count >= trans->next_grow_hashed_count) + mail_hash_resize(trans); + + hash_idx = hash_key % trans->hdr.hash_size; + + rec = mail_hash_idx(trans, idx); + rec->next_idx = trans->hash_base[hash_idx]; + trans->hash_base[hash_idx] = idx; + + mail_hash_update_hash_idx(trans, hash_idx); + trans->hdr.hashed_count++; +} + +void mail_hash_insert(struct mail_hash_transaction *trans, const void *key, + const void *value, uint32_t *idx_r) +{ + uint32_t hash_key; + + mail_hash_insert_idx(trans, value, idx_r); + if (key == NULL) + hash_key = 0; + else { + hash_key = trans->hash->key_hash_cb(key); + mail_hash_insert_hash(trans, hash_key, *idx_r); + } + i_assert(trans->hash->rec_hash_cb(value) == hash_key); +} + +void mail_hash_update(struct mail_hash_transaction *trans, uint32_t idx) +{ + uoff_t offset; + + i_assert(idx > 0 && idx < trans->hdr.record_count); + + offset = mail_hash_idx_to_offset(trans, idx); + mail_hash_update_offset(trans, offset, + trans->hdr.record_size); +} + +static void +mail_hash_remove_idx(struct mail_hash_transaction *trans, uint32_t idx) +{ + struct mail_hash_record *rec; + + if (idx+1 == trans->hdr.record_count) { + /* removing last record */ + trans->hdr.record_count--; + } else { + /* mark the record expunged */ + rec = mail_hash_idx(trans, idx); + rec->next_idx = (uint32_t)-1; + /* update the linked list of holes */ + i_assert(trans->hdr.record_size >= + sizeof(*rec) + sizeof(trans->hdr.first_hole_idx)); + memcpy(rec+1, &trans->hdr.first_hole_idx, + sizeof(trans->hdr.first_hole_idx)); + trans->hdr.first_hole_idx = idx; + + mail_hash_update(trans, idx); + } +} + +void mail_hash_remove(struct mail_hash_transaction *trans, + uint32_t idx, uint32_t key_hash) +{ + struct mail_hash_record *rec, *rec2; + unsigned int hash_idx; + uint32_t idx2; + int ret; + + i_assert(idx > 0 && idx < trans->hdr.record_count); + + if (key_hash == 0) { + /* key not in hash table */ + mail_hash_remove_idx(trans, idx); + return; + } + + rec = mail_hash_idx(trans, idx); + + hash_idx = key_hash % trans->hdr.hash_size; + idx2 = trans->hash_base[hash_idx]; + if (idx2 == idx) { + /* first in the hash table */ + trans->hash_base[hash_idx] = rec->next_idx; + mail_hash_update_hash_idx(trans, hash_idx); + } else { + /* find the previous record */ + struct mail_hash_iterate_context iter; + + mail_hash_iterate_init(&iter, trans, idx2); + while ((ret = mail_hash_iterate_next(&iter, &idx2)) > 0) { + rec2 = mail_hash_idx(trans, idx2); + if (rec2->next_idx == idx) + break; + } + if (ret <= 0) { + if (ret == 0) { + mail_hash_set_corrupted(trans->hash, + "Tried to remove non-existing key"); + } + return; + } + + rec2->next_idx = rec->next_idx; + mail_hash_update_offset(trans, idx2, trans->hdr.record_size); + + if (trans->hdr.hashed_count == 0) { + mail_hash_set_corrupted(trans->hash, + "Too low hashed_count"); + return; + } + trans->hdr.hashed_count--; + } + + mail_hash_remove_idx(trans, idx); +} diff -r 882888286bf5 -r 929198b1f313 src/lib-index/mail-hash.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-index/mail-hash.h Mon Jun 09 03:56:04 2008 +0300 @@ -0,0 +1,141 @@ +#ifndef MAIL_HASH_H +#define MAIL_HASH_H + +#include "hash.h" + +#define MAIL_HASH_VERSION 1 + +struct mail_index; +struct mail_hash; +struct mail_hash_transaction; + +/* File format: + + [header] + [hash_size * (uint32_t idx)] + [record_count * hdr.record_size] + + The indexes start from 1, so 0 means "doesn't exist". +*/ + +struct mail_hash_header { + /* File format version */ + uint8_t version; + /* Corrupted flag is set while file is being modified. */ + uint8_t corrupted; + uint8_t unused[3]; + + uint16_t base_header_size; + /* Full size of each mail_hash_record */ + uint16_t record_size; + /* Full header size. Currently always the same as base_header_size. */ + uint32_t header_size; + /* Number of records after the hash table, includes [0] and holes */ + uint32_t record_count; + /* Number of messages with non-zero hash */ + uint32_t hashed_count; + + /* Holes is a linked list of unused records. 0 = no holes. */ + uint32_t first_hole_idx; + /* Hash size as number of elements */ + uint32_t hash_size; + + /* UID validity. */ + uint32_t uid_validity; + /* The last UID which has been added to this file (but may have been + expunged already) */ + uint32_t last_uid; + /* Number of message records (records with non-zero UID) */ + uint32_t message_count; +}; + +struct mail_hash_record { + /* Linked list of records for hash collisions. + (uint32_t)-1 means this record is deleted */ + uint32_t next_idx; + /* user_data[] */ +}; +ARRAY_DEFINE_TYPE(mail_hash_record, struct mail_hash_record); +#define MAIL_HASH_RECORD_IS_DELETED(rec) \ + ((rec)->next_idx == (uint32_t)-1) + +enum mail_hash_lock_flags { + MAIL_HASH_LOCK_FLAG_TRY = 0x01, + MAIL_HASH_LOCK_FLAG_CREATE_MISSING = 0x02 +}; + +/* Returns 0 if the pointers are equal. */ +typedef bool mail_hash_ctx_cmp_callback_t(struct mail_hash_transaction *trans, + const void *key, uint32_t idx, + void *context); +/* map[] contains old -> new index mapping. */ +typedef int mail_hash_remap_callback_t(struct mail_hash_transaction *trans, + const uint32_t *map, + unsigned int map_size, void *context); + +/* suffix=NULL keeps the has only in memory */ +struct mail_hash * +mail_hash_alloc(struct mail_index *index, const char *suffix, + unsigned int record_size, + hash_callback_t *key_hash_cb, + hash_callback_t *rec_hash_cb, + mail_hash_ctx_cmp_callback_t *key_compare_cb, + mail_hash_remap_callback_t *remap_callback, + void *context); +void mail_hash_free(struct mail_hash **hash); + +/* Returns 1 if created and locked, 0 if already exists, -1 if error. */ +int mail_hash_create_excl_locked(struct mail_hash *hash); + +/* Lock the file. Returns 1 if locking was successful, 0 if file doesn't exist, + -1 if error. */ +int mail_hash_lock_shared(struct mail_hash *hash); +/* If FLAG_TRY_LOCK is set and file is already locked, return 0. + Otherwise return values are identical with mail_hash_lock_shared() */ +int mail_hash_lock_exclusive(struct mail_hash *hash, + enum mail_hash_lock_flags flags); +void mail_hash_unlock(struct mail_hash *hash); + +struct mail_hash_transaction * +mail_hash_transaction_begin(struct mail_hash *hash, unsigned int min_hash_size); +int mail_hash_transaction_write(struct mail_hash_transaction *trans); +void mail_hash_transaction_end(struct mail_hash_transaction **trans); +/* Returns TRUE if transaction is in broken state because of an earlier + I/O error or detected file corruption. */ +bool mail_hash_transaction_is_broken(struct mail_hash_transaction *trans); + +/* Clear the entire hash file's contents. */ +void mail_hash_reset(struct mail_hash_transaction *trans); +/* Read the entire file to memory. */ +int mail_hash_map_file(struct mail_hash_transaction *trans); + +/* Returns a modifiable hash header. */ +struct mail_hash_header * +mail_hash_get_header(struct mail_hash_transaction *trans); + +/* Look up key from hash and return its value or NULL if key isn't in the hash. + NULL is also returned if the lookup fails because of I/O error or file + corruption. */ +void *mail_hash_lookup(struct mail_hash_transaction *trans, const void *key, + uint32_t *idx_r); +/* Never returns NULL. If the lookup fails the transaction is marked broken + and a pointer to a dummy record is returned. */ +void *mail_hash_lookup_idx(struct mail_hash_transaction *trans, uint32_t idx); + +/* If key=NULL, it's only added as a record after the hash table and not + added to the actual hash table. Note that hash=0 equals to key=NULL insert, + so a valid hash value must never be 0. */ +void mail_hash_insert(struct mail_hash_transaction *trans, const void *key, + const void *value, uint32_t *idx_r); +/* Mark the record at given index as modified. */ +void mail_hash_update(struct mail_hash_transaction *trans, uint32_t idx); +/* idx must be provided. key_hash must be provided if the record was added + with non-NULL key. */ +void mail_hash_remove(struct mail_hash_transaction *trans, + uint32_t idx, uint32_t key_hash); + +void mail_hash_set_corrupted(struct mail_hash *hash, const char *error); +void mail_hash_transaction_set_corrupted(struct mail_hash_transaction *trans, + const char *error); + +#endif diff -r 882888286bf5 -r 929198b1f313 src/lib-index/mail-index-sync-ext.c diff -r 882888286bf5 -r 929198b1f313 src/lib-mail/message-parser.c diff -r 882888286bf5 -r 929198b1f313 src/lib/istream-crlf.c