Mercurial > dovecot > original-hg > dovecot-1.2
changeset 7952:04a3be30e5a6 HEAD
Thread index fixes and optimizations.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Thu, 26 Jun 2008 19:11:56 +0300 |
parents | 5e3b995372d4 |
children | 01c23befce4f |
files | doc/thread-refs.txt src/lib-index/mail-hash.c src/lib-index/mail-hash.h src/lib-storage/index/Makefile.am src/lib-storage/index/index-thread-finish.c src/lib-storage/index/index-thread-links.c src/lib-storage/index/index-thread-list.c src/lib-storage/index/index-thread-private.h src/lib-storage/index/index-thread.c |
diffstat | 9 files changed, 745 insertions(+), 193 deletions(-) [+] |
line wrap: on
line diff
--- a/doc/thread-refs.txt Thu Jun 26 08:49:02 2008 +0300 +++ b/doc/thread-refs.txt Thu Jun 26 19:11:56 2008 +0300 @@ -62,17 +62,18 @@ parent: Pointer to parent node. Children pointers aren't required. uid: Message's UID (0 = expunged or never even existed) - link_refcount: Number of messages referencing "this node" -> "parent node" - (e.g. "References: a b" would increase this message's reference as well as - b's reference, regardless of how the links are really added to the thread - tree). + parent_link_refcount: Number of messages containing "this message" -> "parent + message" link, i.e. "number of links to parent node". However since parents + can change, not all of these references might be from our current child + nodes. When this refcount reaches 0, it means we must detach from our + parent. expunge_rebuilds: If this message gets expunged, rebuild the thread tree. parent_unref_rebuilds: If a link between this node and its child gets unreferenced, rebuild the thread tree. } link_reference(parent_node, child_node) - parent_node.link_refcount++ + child_node.parent_link_refcount++ if is_ancestor(child_node, parent_node) // child_node is an ancestor of parent_node. Adding child_node -> // parent_node would introduce a loop. If any messages referencing the @@ -144,8 +145,8 @@ if parent.parent_unref_rebuilds return false - child.link_refcount-- - if child.link_refcount == 0 + child.parent_link_refcount-- + if child.parent_link_refcount == 0 child.parent = NIL return true
--- a/src/lib-index/mail-hash.c Thu Jun 26 08:49:02 2008 +0300 +++ b/src/lib-index/mail-hash.c Thu Jun 26 19:11:56 2008 +0300 @@ -232,6 +232,8 @@ hdr->hash_size = I_MAX(primes_closest(hash_size), MAIL_HASH_MIN_SIZE); hdr->record_count = 1; /* [0] always exists */ + hdr->reset_counter = hash->hdr == NULL ? ioloop_time : + hash->hdr->reset_counter + 1; } static bool mail_hash_check_header(struct mail_hash *hash, @@ -603,6 +605,11 @@ hash->lock_type = F_UNLCK; } +int mail_hash_get_lock_type(struct mail_hash *hash) +{ + return hash->lock_type; +} + int mail_hash_create_excl_locked(struct mail_hash *hash) { bool created; @@ -696,6 +703,7 @@ int fd = hash->fd; i_assert(hash->lock_type == F_WRLCK); + i_assert(trans->hdr.hashed_count <= trans->hdr.record_count); if (trans->failed) return -1; @@ -726,7 +734,8 @@ trans->base_count * trans->hdr.record_size; if (fd == -1) { temp_path = t_strconcat(hash->filepath, ".tmp", NULL); - fd = open(temp_path, O_RDWR | O_CREAT | O_TRUNC, 0600); + fd = open(temp_path, O_RDWR | O_CREAT | O_TRUNC, + hash->index->mode); if (fd == -1) { if (ENOSPACE(errno)) { hash->index->nodiskspace = TRUE; @@ -807,6 +816,17 @@ return trans->failed; } +bool mail_hash_transaction_is_in_memory(struct mail_hash_transaction *trans) +{ + return trans->hash->in_memory; +} + +struct mail_hash * +mail_hash_transaction_get_hash(struct mail_hash_transaction *trans) +{ + return trans->hash; +} + void mail_hash_transaction_set_corrupted(struct mail_hash_transaction *trans, const char *error) { @@ -966,6 +986,11 @@ if (trans->hdr.first_hole_idx != 0) { /* allocate the record from the first hole */ idx = trans->hdr.first_hole_idx; + if (idx >= trans->hdr.record_count) { + mail_hash_transaction_set_corrupted(trans, + "Corrupted first_hole_idx"); + idx = 0; + } rec = mail_hash_idx(trans, idx); memcpy(&trans->hdr.first_hole_idx, rec + 1, @@ -1104,8 +1129,8 @@ "Too low hashed_count"); return; } - trans->hdr.hashed_count--; } + trans->hdr.hashed_count--; mail_hash_remove_idx(trans, idx); }
--- a/src/lib-index/mail-hash.h Thu Jun 26 08:49:02 2008 +0300 +++ b/src/lib-index/mail-hash.h Thu Jun 26 19:11:56 2008 +0300 @@ -47,6 +47,8 @@ uint32_t last_uid; /* Number of message records (records with non-zero UID) */ uint32_t message_count; + /* Increased every time the hash is reset */ + uint32_t reset_counter; }; struct mail_hash_record { @@ -95,6 +97,8 @@ int mail_hash_lock_exclusive(struct mail_hash *hash, enum mail_hash_lock_flags flags); void mail_hash_unlock(struct mail_hash *hash); +/* Returns the current locking state (F_UNLCK, F_RDLCK, F_WRLCK) */ +int mail_hash_get_lock_type(struct mail_hash *hash); struct mail_hash_transaction * mail_hash_transaction_begin(struct mail_hash *hash, unsigned int min_hash_size); @@ -103,6 +107,12 @@ /* Returns TRUE if transaction is in broken state because of an earlier I/O error or detected file corruption. */ bool mail_hash_transaction_is_broken(struct mail_hash_transaction *trans); +/* Returns TRUE if hash is currently being updated in memory. */ +bool mail_hash_transaction_is_in_memory(struct mail_hash_transaction *trans); + +/* Returns the hash structure of the transaction. */ +struct mail_hash * +mail_hash_transaction_get_hash(struct mail_hash_transaction *trans); /* Clear the entire hash file's contents. */ void mail_hash_reset(struct mail_hash_transaction *trans);
--- a/src/lib-storage/index/Makefile.am Thu Jun 26 08:49:02 2008 +0300 +++ b/src/lib-storage/index/Makefile.am Thu Jun 26 19:11:56 2008 +0300 @@ -26,6 +26,7 @@ index-thread.c \ index-thread-finish.c \ index-thread-links.c \ + index-thread-list.c \ index-transaction.c headers = \
--- a/src/lib-storage/index/index-thread-finish.c Thu Jun 26 08:49:02 2008 +0300 +++ b/src/lib-storage/index/index-thread-finish.c Thu Jun 26 19:11:56 2008 +0300 @@ -37,6 +37,7 @@ struct mail *tmp_mail; struct mail_hash_transaction *hash_trans; + struct mail_thread_list_context *hash_list_ctx; ARRAY_DEFINE(roots, struct mail_thread_root_node); ARRAY_DEFINE(shadow_nodes, struct mail_thread_shadow_node); @@ -127,16 +128,24 @@ return 0; } +static uint32_t +thread_lookup_existing(struct thread_finish_context *ctx, uint32_t idx) +{ + const struct mail_thread_node *node; + + node = mail_hash_lookup_idx(ctx->hash_trans, idx); + i_assert(MAIL_INDEX_NODE_EXISTS(node)); + i_assert(node->uid_or_id != 0); + return node->uid_or_id; +} + static int thread_child_node_fill(struct thread_finish_context *ctx, struct mail_thread_child_node *child) { - const struct mail_thread_node *node; int tz; - node = mail_hash_lookup_idx(ctx->hash_trans, child->idx); - i_assert(node->uid != 0 && node->exists); - child->uid = node->uid; + child->uid = thread_lookup_existing(ctx, child->idx); if (!mail_set_uid(ctx->tmp_mail, child->uid)) { /* the UID should have existed. we would have rebuild @@ -164,7 +173,6 @@ ARRAY_TYPE(mail_thread_child_node) *sorted_children) { const struct mail_thread_shadow_node *shadows; - const struct mail_thread_node *node; struct mail_thread_child_node child, *children; unsigned int count; @@ -177,9 +185,7 @@ i_assert(child.idx != 0); if (shadows[child.idx].next_sibling_idx == 0) { /* only child - don't bother setting sort date */ - node = mail_hash_lookup_idx(ctx->hash_trans, child.idx); - i_assert(node->uid != 0 && node->exists); - child.uid = node->uid; + child.uid = thread_lookup_existing(ctx, child.idx); array_append(sorted_children, &child, 1); return 0; @@ -202,12 +208,11 @@ { struct subject_gather_context gather_ctx; struct mail_thread_root_node *roots; - const struct mail_thread_node *node; const char *subject; unsigned int i, count; ARRAY_TYPE(mail_thread_child_node) sorted_children; const struct mail_thread_child_node *children; - uint32_t idx; + uint32_t idx, uid; int ret = 0; memset(&gather_ctx, 0, sizeof(gather_ctx)); @@ -240,10 +245,8 @@ continue; } - node = mail_hash_lookup_idx(ctx->hash_trans, idx); - i_assert(node->uid != 0 && node->exists); - - if (!mail_set_uid(ctx->tmp_mail, node->uid)) { + uid = thread_lookup_existing(ctx, idx); + if (!mail_set_uid(ctx->tmp_mail, uid)) { /* the UID should have existed. we would have rebuild the thread tree otherwise. */ mail_hash_transaction_set_corrupted(ctx->hash_trans, @@ -443,7 +446,8 @@ roots = array_get_modifiable(&ctx->roots, &root_count); for (idx = 1; idx < record_count; idx++) { node = mail_hash_lookup_idx(ctx->hash_trans, idx); - if (MAIL_HASH_RECORD_IS_DELETED(&node->rec) || !node->exists) + if (MAIL_HASH_RECORD_IS_DELETED(&node->rec) || + !MAIL_INDEX_NODE_EXISTS(node)) continue; child.idx = idx; @@ -467,14 +471,17 @@ return 0; } -static void mail_thread_create_shadows(struct thread_finish_context *ctx, - uint32_t record_count) +static int mail_thread_create_shadows(struct thread_finish_context *ctx, + uint32_t record_count) { + struct mail_thread_list_update_context *thread_list_ctx; struct mail_thread_node *node, *parent; struct mail_thread_root_node root; struct mail_thread_child_node child; + struct mail_hash *hash; uint8_t *referenced; uint32_t idx, i, j, parent_idx, alloc_size, max; + int ret = 0; ctx->use_sent_date = FALSE; @@ -485,26 +492,42 @@ referenced = i_new(uint8_t, alloc_size); for (idx = 1; idx < record_count; idx++) { node = mail_hash_lookup_idx(ctx->hash_trans, idx); - if (MAIL_HASH_RECORD_IS_DELETED(&node->rec)) + if (MAIL_HASH_RECORD_IS_DELETED(&node->rec)) { + /* @UNSAFE: mark deleted records as referenced, so we + don't waste time with them */ + referenced[idx / 8] |= 1 << (idx % 8); continue; + } - if (node->exists) { + if (MAIL_INDEX_NODE_EXISTS(node)) { /* @UNSAFE: don't remove existing nodes */ referenced[idx / 8] |= 1 << (idx % 8); } if (node->parent_idx == 0) { + /* root node - add to roots list */ root.node.idx = idx; - root.node.uid = node->uid; - root.dummy = !node->exists; + if (!MAIL_INDEX_NODE_EXISTS(node)) { + root.dummy = TRUE; + root.node.uid = 0; + } else { + root.dummy = FALSE; + root.node.uid = node->uid_or_id; + } array_append(&ctx->roots, &root, 1); continue; } + if (node->parent_idx >= record_count) { + mail_hash_transaction_set_corrupted(ctx->hash_trans, + "parent_idx too large"); + ret = -1; + break; + } /* @UNSAFE: keep track of referenced nodes */ referenced[node->parent_idx / 8] |= 1 << (node->parent_idx % 8); - if (!node->exists) { + if (!MAIL_INDEX_NODE_EXISTS(node)) { /* dummy node */ continue; } @@ -514,7 +537,8 @@ parents, add it as the highest dummy's child. */ parent_idx = node->parent_idx; parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx); - while (!parent->exists && parent->parent_idx != 0) { + while (!MAIL_INDEX_NODE_EXISTS(parent) && + parent->parent_idx != 0) { parent_idx = parent->parent_idx; parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx); @@ -523,22 +547,40 @@ } /* remove unneeded records from hash */ - for (i = 1; i < alloc_size; i++) { + hash = mail_hash_transaction_get_hash(ctx->hash_trans); + if (ret < 0 || mail_hash_get_lock_type(hash) != F_WRLCK) { + /* thread index isn't locked, can't delete anything */ + i_free(referenced); + return ret; + } + + /* FIXME: we could remove everything from thread list by pointing to + existing messages' References: headers */ + thread_list_ctx = mail_thread_list_update_begin(ctx->hash_list_ctx, + ctx->hash_trans); + for (i = 0; i < alloc_size; i++) { if (referenced[i] == 0xff) continue; + j = i == 0 ? 1 : 0; max = i+1 < alloc_size ? 8 : (record_count % 8); - for (j = 0; j < max; j++) { + for (; j < max; j++) { if ((referenced[i] & (1 << j)) != 0) continue; idx = i*8 + j; node = mail_hash_lookup_idx(ctx->hash_trans, idx); + + if (node->ref_index == MAIL_INDEX_NODE_REF_EXT) { + mail_thread_list_remove(thread_list_ctx, + node->uid_or_id); + } mail_hash_remove(ctx->hash_trans, idx, node->msgid_crc32); } } i_free(referenced); + return mail_thread_list_commit(&thread_list_ctx); } static int mail_thread_finish(struct thread_finish_context *ctx, @@ -556,7 +598,8 @@ /* make sure all shadow indexes are accessible directly. */ (void)array_idx_modifiable(&ctx->shadow_nodes, hdr->record_count); - mail_thread_create_shadows(ctx, hdr->record_count); + if (mail_thread_create_shadows(ctx, hdr->record_count) < 0) + return -1; /* (4) */ ctx->use_sent_date = TRUE; @@ -588,11 +631,10 @@ nodes_change_uids_to_seqs(struct mail_thread_iterate_context *iter, bool root) { struct mail_thread_child_node *children; - struct mailbox *box; + struct mailbox *box = iter->ctx->tmp_mail->box; unsigned int i, count; uint32_t uid, seq; - box = mailbox_transaction_get_mailbox(iter->ctx->tmp_mail->transaction); children = array_get_modifiable(&iter->children, &count); for (i = 0; i < count; i++) { uid = children[i].uid; @@ -656,21 +698,27 @@ struct mail_thread_iterate_context * mail_thread_iterate_init_full(struct mail *tmp_mail, struct mail_hash_transaction *hash_trans, + struct mail_thread_list_context *hash_list_ctx, enum mail_thread_type thread_type, bool return_seqs) { struct mail_thread_iterate_context *iter; struct thread_finish_context *ctx; + struct mail_hash *hash; iter = i_new(struct mail_thread_iterate_context, 1); ctx = iter->ctx = i_new(struct thread_finish_context, 1); ctx->refcount = 1; ctx->tmp_mail = tmp_mail; ctx->hash_trans = hash_trans; + ctx->hash_list_ctx = hash_list_ctx; ctx->return_seqs = return_seqs; if (mail_thread_finish(ctx, thread_type) < 0) iter->failed = TRUE; else { + hash = mail_hash_transaction_get_hash(hash_trans); + if (mail_hash_get_lock_type(hash) == F_WRLCK) + (void)mail_hash_transaction_write(hash_trans); mail_thread_iterate_fill_root(iter); if (return_seqs) nodes_change_uids_to_seqs(iter, TRUE); @@ -713,10 +761,7 @@ if (ret < 0) { struct mail *mail = iter->ctx->tmp_mail; - struct mailbox *box; - - box = mailbox_transaction_get_mailbox(mail->transaction); - mail_storage_set_internal_error(box->storage); + mail_storage_set_internal_error(mail->box->storage); } if (--iter->ctx->refcount == 0) { array_free(&iter->ctx->roots);
--- a/src/lib-storage/index/index-thread-links.c Thu Jun 26 08:49:02 2008 +0300 +++ b/src/lib-storage/index/index-thread-links.c Thu Jun 26 19:11:56 2008 +0300 @@ -8,13 +8,15 @@ static struct mail_thread_node * thread_msgid_get(struct mail_thread_update_context *ctx, uint32_t ref_uid, - const char *msgid, uint32_t *idx_r) + unsigned int ref_index, const char *msgid, uint32_t *idx_r) { struct mail_thread_node *node, new_node; struct msgid_search_key key; const char **msgidp; uint32_t idx; + i_assert(ref_index != MAIL_INDEX_NODE_REF_MSGID); + key.msgid = msgid; key.msgid_crc32 = crc32_str_nonzero(msgid); @@ -23,20 +25,18 @@ /* not found, create */ memset(&new_node, 0, sizeof(new_node)); new_node.msgid_crc32 = key.msgid_crc32; - new_node.uid = ref_uid; + if (ref_index <= MAIL_INDEX_NODE_REF_MAX_VALUE) { + new_node.uid_or_id = ref_uid; + new_node.ref_index = ref_index; + } else { + new_node.uid_or_id = + mail_thread_list_add(ctx->thread_list_ctx, + msgid); + new_node.ref_index = MAIL_INDEX_NODE_REF_EXT; + } mail_hash_insert(ctx->hash_trans, &key, &new_node, &idx); node = mail_hash_lookup_idx(ctx->hash_trans, idx); - } else if (node->uid == 0 && ref_uid != 0) { - /* make non-existing node uniquely identifiable */ - if (node->exists) { - mail_hash_transaction_set_corrupted(ctx->hash_trans, - "uid=0 found"); - ctx->failed = TRUE; - } else { - node->uid = ref_uid; - mail_hash_update(ctx->hash_trans, idx); - } } /* keep message-ids cached */ @@ -55,11 +55,17 @@ struct mail_thread_node unode; if (msgid != NULL) { - node = thread_msgid_get(ctx, 0, msgid, idx_r); - if (!node->exists) { + node = thread_msgid_get(ctx, 0, 0, msgid, idx_r); + if (!MAIL_INDEX_NODE_EXISTS(node)) { /* add UID to node */ - node->uid = uid; - node->exists = TRUE; + if (node->ref_index == MAIL_INDEX_NODE_REF_EXT && + node->uid_or_id != 0) { + mail_thread_list_remove(ctx->thread_list_ctx, + node->uid_or_id); + } + + node->uid_or_id = uid; + node->ref_index = MAIL_INDEX_NODE_REF_MSGID; } else { /* duplicate, keep the original. if the original ever gets expunged, rebuild. */ @@ -72,8 +78,8 @@ if (msgid == NULL) { /* no valid message-id */ memset(&unode, 0, sizeof(unode)); - unode.uid = uid; - unode.exists = TRUE; + unode.uid_or_id = uid; + unode.ref_index = MAIL_INDEX_NODE_REF_MSGID; mail_hash_insert(ctx->hash_trans, NULL, &unode, idx_r); } } @@ -100,7 +106,7 @@ parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx); child = mail_hash_lookup_idx(ctx->hash_trans, child_idx); - child->link_refcount++; + child->parent_link_refcount++; mail_hash_update(ctx->hash_trans, child_idx); if (thread_node_has_ancestor(ctx, parent, child)) { @@ -144,7 +150,7 @@ child->parent_idx = parent_idx; else { /* Conflicting parent already exists, keep the original */ - if (child->exists) { + if (MAIL_INDEX_NODE_EXISTS(child)) { /* If this message gets expunged, the parent is changed. */ child->expunge_rebuilds = TRUE; @@ -166,27 +172,56 @@ mail_hash_update(ctx->hash_trans, child_idx); } -static void +struct thread_message_id { + const char *str; + uint32_t crc32; + unsigned int collisions_after; +}; + +static const char * thread_link_references(struct mail_thread_update_context *ctx, uint32_t ref_uid, - const char **references) + const char *references) { - const char *msgid, *last_msgid; + ARRAY_DEFINE(id_arr, struct thread_message_id); + struct thread_message_id *ids; + struct thread_message_id id; uint32_t parent_idx, child_idx; + unsigned int i, j, count, ref_index; - last_msgid = message_id_get_next(references); - if (last_msgid == NULL) - return; - (void)thread_msgid_get(ctx, ref_uid, last_msgid, &parent_idx); + /* put all message IDs to an array */ + memset(&id, 0, sizeof(id)); + t_array_init(&id_arr, 32); + while ((id.str = message_id_get_next(&references)) != NULL) { + id.crc32 = crc32_str_nonzero(id.str); + array_append(&id_arr, &id, 1); + } + + ids = array_get_modifiable(&id_arr, &count); + if (count <= 1) + return count == 0 ? NULL : ids[0].str; - while ((msgid = message_id_get_next(references)) != NULL) { - (void)thread_msgid_get(ctx, ref_uid, msgid, &child_idx); + /* count collisions */ + for (i = 0; i < count; i++) { + for (j = i + 1; j < count; j++) { + if (ids[i].crc32 == ids[j].crc32) + ids[i].collisions_after++; + } + } + + ref_index = MAIL_INDEX_NODE_REF_REFERENCES_LAST + + ids[0].collisions_after; + thread_msgid_get(ctx, ref_uid, ref_index, ids[0].str, &parent_idx); + for (i = 1; i < count; i++) { + ref_index = MAIL_INDEX_NODE_REF_REFERENCES_LAST + + ids[i].collisions_after; + thread_msgid_get(ctx, ref_uid, ref_index, + ids[i].str, &child_idx); thread_link_reference(ctx, parent_idx, child_idx); parent_idx = child_idx; - last_msgid = msgid; } /* link the last ID to us */ - *references = last_msgid; + return ids[count-1].str; } static int thread_get_mail_header(struct mail *mail, const char *name, @@ -203,34 +238,14 @@ return 0; } -static bool references_are_crc32_unique(const char *references) -{ - const char *msgid; - uint32_t msgid_crc32; - ARRAY_TYPE(uint32_t) crc_arr; - const uint32_t *crc; - unsigned int i, count; - - t_array_init(&crc_arr, 32); - while ((msgid = message_id_get_next(&references)) != NULL) { - msgid_crc32 = crc32_str_nonzero(msgid); - crc = array_get(&crc_arr, &count); - for (i = 0; i < count; i++) { - if (crc[i] == msgid_crc32) - return FALSE; - } - array_append(&crc_arr, &msgid_crc32, 1); - } - return TRUE; -} - int mail_thread_add(struct mail_thread_update_context *ctx, struct mail *mail) { const char *message_id, *in_reply_to, *references, *parent_msgid; const struct mail_thread_node *parent, *old_parent; struct mail_hash_header *hdr; struct mail_thread_node *node; - uint32_t idx, parent_idx, ref_uid; + uint32_t idx, parent_idx; + unsigned int ref_index; hdr = mail_hash_get_header(ctx->hash_trans); i_assert(mail->uid > hdr->last_uid); @@ -241,21 +256,22 @@ thread_get_mail_header(mail, HDR_REFERENCES, &references) < 0) return -1; - ref_uid = references_are_crc32_unique(references) ? mail->uid : 0; thread_msg_add(ctx, mail->uid, message_id_get_next(&message_id), &idx); - thread_link_references(ctx, ref_uid, &references); + parent_msgid = thread_link_references(ctx, mail->uid, references); - if (references != NULL) - parent_msgid = references; + if (parent_msgid != NULL) + ref_index = MAIL_INDEX_NODE_REF_REFERENCES_LAST; else { /* no valid IDs in References:, use In-Reply-To: instead */ if (thread_get_mail_header(mail, HDR_IN_REPLY_TO, &in_reply_to) < 0) return -1; parent_msgid = message_id_get_next(&in_reply_to); + ref_index = MAIL_INDEX_NODE_REF_INREPLYTO; } parent = parent_msgid == NULL ? NULL : - thread_msgid_get(ctx, ref_uid, parent_msgid, &parent_idx); + thread_msgid_get(ctx, mail->uid, ref_index, + parent_msgid, &parent_idx); node = mail_hash_lookup_idx(ctx->hash_trans, idx); old_parent = node->parent_idx == 0 ? NULL : @@ -277,8 +293,10 @@ static bool mail_thread_node_lookup(struct mail_thread_update_context *ctx, uint32_t uid, - uint32_t *idx_r, struct mail_thread_node **node_r) + uint32_t *idx_r, const char **msgid_r, + struct mail_thread_node **node_r) { + struct mail_thread_node *node; struct msgid_search_key key; const char *msgids; int ret; @@ -295,14 +313,17 @@ return FALSE; key.msgid_crc32 = crc32_str_nonzero(key.msgid); - *node_r = mail_hash_lookup(ctx->hash_trans, &key, idx_r); - if (*node_r == NULL) + node = mail_hash_lookup(ctx->hash_trans, &key, idx_r); + if (node == NULL) return FALSE; - if ((*node_r)->uid != ctx->tmp_mail->uid) { + if (node->ref_index != MAIL_INDEX_NODE_REF_MSGID || + node->uid_or_id != uid) { /* duplicate Message-ID probably */ return FALSE; } + *msgid_r = key.msgid; + *node_r = node; return TRUE; } @@ -333,7 +354,8 @@ static bool thread_unref_msgid(struct mail_thread_update_context *ctx, - uint32_t parent_idx, uint32_t child_idx) + uint32_t ref_uid, uint32_t parent_idx, + const char *child_msgid, uint32_t child_idx) { struct mail_thread_node *parent, *child; @@ -342,24 +364,39 @@ return FALSE; child = mail_hash_lookup_idx(ctx->hash_trans, child_idx); - if (child->link_refcount == 0) { + if (child->parent_link_refcount == 0) { mail_hash_transaction_set_corrupted(ctx->hash_trans, "unexpected refcount=0"); return FALSE; } - child->link_refcount--; - if (child->link_refcount == 0) { + child->parent_link_refcount--; + if (child->parent_link_refcount == 0) { /* we don't have a root anymore */ child->parent_idx = 0; } + + if (child->uid_or_id == ref_uid && + child->ref_index != MAIL_INDEX_NODE_REF_EXT) { + child->uid_or_id = + mail_thread_list_add(ctx->thread_list_ctx, child_msgid); + child->ref_index = MAIL_INDEX_NODE_REF_EXT; + } mail_hash_update(ctx->hash_trans, child_idx); + + if (parent->uid_or_id == ref_uid && + parent->ref_index != MAIL_INDEX_NODE_REF_EXT) { + parent->uid_or_id = + mail_thread_list_add(ctx->thread_list_ctx, child_msgid); + parent->ref_index = MAIL_INDEX_NODE_REF_EXT; + mail_hash_update(ctx->hash_trans, parent_idx); + } return TRUE; } static bool -thread_unref_links(struct mail_thread_update_context *ctx, - uint32_t last_child_idx, const char *references, - bool *valid_r) +thread_unref_links(struct mail_thread_update_context *ctx, uint32_t ref_uid, + const char *last_child_msgid, uint32_t last_child_idx, + const char *references, bool *valid_r) { const char *msgid; uint32_t parent_idx, child_idx; @@ -378,22 +415,24 @@ while ((msgid = message_id_get_next(&references)) != NULL) { if (!thread_msgid_lookup(ctx, msgid, &child_idx) || - !thread_unref_msgid(ctx, parent_idx, child_idx)) + !thread_unref_msgid(ctx, ref_uid, parent_idx, + msgid, child_idx)) return FALSE; parent_idx = child_idx; } - return thread_unref_msgid(ctx, parent_idx, last_child_idx); + return thread_unref_msgid(ctx, ref_uid, parent_idx, + last_child_msgid, last_child_idx); } int mail_thread_remove(struct mail_thread_update_context *ctx, uint32_t uid) { struct mail_hash_header *hdr; struct mail_thread_node *node; - const char *references, *in_reply_to; + const char *msgid, *references, *in_reply_to; uint32_t idx, parent_idx; bool have_refs; - if (!mail_thread_node_lookup(ctx, uid, &idx, &node)) + if (!mail_thread_node_lookup(ctx, uid, &idx, &msgid, &node)) return 0; if (node->expunge_rebuilds) return 0; @@ -402,7 +441,7 @@ &references) < 0) return -1; - if (!thread_unref_links(ctx, idx, references, &have_refs)) + if (!thread_unref_links(ctx, uid, msgid, idx, references, &have_refs)) return 0; if (!have_refs) { /* no valid IDs in References:, use In-Reply-To: instead */ @@ -412,15 +451,16 @@ in_reply_to = message_id_get_next(&in_reply_to); if (in_reply_to != NULL && (!thread_msgid_lookup(ctx, in_reply_to, &parent_idx) || - !thread_unref_msgid(ctx, parent_idx, idx))) + !thread_unref_msgid(ctx, uid, parent_idx, + in_reply_to, idx))) return 0; } /* get the node again, the pointer may have changed */ node = mail_hash_lookup_idx(ctx->hash_trans, idx); - node->uid = 0; - node->exists = FALSE; + node->uid_or_id = mail_thread_list_add(ctx->thread_list_ctx, msgid); + node->ref_index = MAIL_INDEX_NODE_REF_EXT; mail_hash_update(ctx->hash_trans, idx); hdr = mail_hash_get_header(ctx->hash_trans);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-storage/index/index-thread-list.c Thu Jun 26 19:11:56 2008 +0300 @@ -0,0 +1,325 @@ +/* Copyright (c) 2008 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "array.h" +#include "str.h" +#include "bsearch-insert-pos.h" +#include "istream.h" +#include "ostream.h" +#include "index-storage.h" +#include "index-thread-private.h" + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> + +struct mail_thread_list_context { + struct mailbox *box; + char *path; + + uint32_t reset_counter; + ARRAY_TYPE(uint32_t) ids; + ARRAY_TYPE(const_string) msgids; + uint32_t last_id; + pool_t msgid_pool; + + unsigned int modified:1; +}; + +struct mail_thread_list_update_context { + struct mail_thread_list_context *ctx; + struct mail_hash_transaction *hash_trans; + + unsigned int refreshed:1; + unsigned int failed:1; +}; + +struct mail_thread_list_context *mail_thread_list_init(struct mailbox *box) +{ + struct index_mailbox *ibox = (struct index_mailbox *)box; + struct mail_thread_list_context *ctx; + + ctx = i_new(struct mail_thread_list_context, 1); + ctx->box = box; + ctx->path = i_strdup_printf("%s"MAIL_THREAD_INDEX_SUFFIX".ids", + ibox->index->filepath); + ctx->msgid_pool = + pool_alloconly_create(MEMPOOL_GROWING"thread msgid pool", 1024); + i_array_init(&ctx->ids, 32); + i_array_init(&ctx->msgids, 32); + return ctx; +} + +void mail_thread_list_deinit(struct mail_thread_list_context **_ctx) +{ + struct mail_thread_list_context *ctx = *_ctx; + + *_ctx = NULL; + array_free(&ctx->ids); + array_free(&ctx->msgids); + pool_unref(&ctx->msgid_pool); + i_free(ctx->path); + i_free(ctx); +} + +struct mail_thread_list_update_context * +mail_thread_list_update_begin(struct mail_thread_list_context *ctx, + struct mail_hash_transaction *hash_trans) +{ + struct mail_thread_list_update_context *update_ctx; + + update_ctx = i_new(struct mail_thread_list_update_context, 1); + update_ctx->ctx = ctx; + update_ctx->hash_trans = hash_trans; + + if (mail_hash_transaction_is_in_memory(hash_trans)) + update_ctx->refreshed = TRUE; + return update_ctx; +} + +static int uint32_cmp(const void *p1, const void *p2) +{ + const uint32_t *u1 = p1, *u2 = p2; + + return *u1 < *u2 ? -1 : (*u1 > *u2 ? 1 : 0); +} + +static int mail_thread_list_read(struct mail_thread_list_context *ctx) +{ + struct istream *input; + const char *line; + uint32_t id, prev_id; + int fd, ret = 0; + + array_clear(&ctx->ids); + array_clear(&ctx->msgids); + p_clear(ctx->msgid_pool); + + fd = open(ctx->path, O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) + return 0; + mail_storage_set_critical(ctx->box->storage, + "open(%s) failed: %m", ctx->path); + return -1; + } + input = i_stream_create_fd(fd, (size_t)-1, FALSE); + if ((line = i_stream_read_next_line(input)) == NULL) + ret = -1; + else + ctx->reset_counter = strtoul(line, NULL, 10); + + prev_id = 0; + while ((line = i_stream_read_next_line(input)) != NULL) { + for (id = 0; *line >= '0' && *line <= '9'; line++) + id = id*10 + (*line - '0'); + if (*line != ' ' || id <= prev_id) { + ret = -1; + break; + } + prev_id = id; + line++; + + line = p_strdup(ctx->msgid_pool, line); + array_append(&ctx->ids, &id, 1); + array_append(&ctx->msgids, &line, 1); + } + ctx->last_id = prev_id; + if (input->stream_errno != 0) { + errno = input->stream_errno; + mail_storage_set_critical(ctx->box->storage, + "read(%s) failed: %m", ctx->path); + ret = -1; + } else if (ret < 0) { + mail_storage_set_critical(ctx->box->storage, + "Corrupted thread list file %s", ctx->path); + } + i_stream_unref(&input); + + if (close(fd) < 0) { + mail_storage_set_critical(ctx->box->storage, + "close(%s) failed: %m", ctx->path); + ret = -1; + } + return ret; +} + +static int mail_thread_list_refresh(struct mail_thread_list_update_context *ctx) +{ + struct mail_hash_header *hdr; + + if (ctx->refreshed) + return 0; + ctx->refreshed = TRUE; + + if (mail_thread_list_read(ctx->ctx) < 0) + return -1; + + hdr = mail_hash_get_header(ctx->hash_trans); + if (ctx->ctx->reset_counter != hdr->reset_counter) { + array_clear(&ctx->ctx->ids); + array_clear(&ctx->ctx->msgids); + p_clear(ctx->ctx->msgid_pool); + } + return 0; +} + +static inline bool +mail_thread_lookup_idx(struct mail_thread_list_update_context *ctx, + uint32_t id, unsigned int *idx_r) +{ + const uint32_t *ids; + unsigned int count; + + ids = array_get(&ctx->ctx->ids, &count); + return bsearch_insert_pos(&id, ids, I_MIN(count, id), sizeof(*ids), + uint32_cmp, idx_r); +} + +int mail_thread_list_lookup(struct mail_thread_list_update_context *ctx, + uint32_t id, const char **msgid_r) +{ + const char *const *msgid_p; + unsigned int idx; + + if (!mail_thread_lookup_idx(ctx, id, &idx)) { + if (mail_thread_list_refresh(ctx) < 0) + return -1; + if (!mail_thread_lookup_idx(ctx, id, &idx)) { + *msgid_r = NULL; + return 0; + } + } + + msgid_p = array_idx(&ctx->ctx->msgids, idx); + *msgid_r = *msgid_p; + return 1; +} + +uint32_t mail_thread_list_add(struct mail_thread_list_update_context *ctx, + const char *msgid) +{ + if (mail_thread_list_refresh(ctx) < 0) + ctx->failed = TRUE; + + msgid = p_strdup(ctx->ctx->msgid_pool, msgid); + ctx->ctx->modified = TRUE; + ctx->ctx->last_id++; + array_append(&ctx->ctx->ids, &ctx->ctx->last_id, 1); + array_append(&ctx->ctx->msgids, &msgid, 1); + return ctx->ctx->last_id; +} + +void mail_thread_list_remove(struct mail_thread_list_update_context *ctx, + uint32_t id) +{ + unsigned int idx; + + if (mail_thread_list_refresh(ctx) < 0) + return; + + if (!mail_thread_lookup_idx(ctx, id, &idx)) { + mail_storage_set_critical(ctx->ctx->box->storage, + "%s lost ID %u", ctx->ctx->path, id); + return; + } + + ctx->ctx->modified = TRUE; + array_delete(&ctx->ctx->ids, idx, 1); + array_delete(&ctx->ctx->msgids, idx, 1); +} + +static int mail_thread_list_write(struct mail_thread_list_context *ctx, + uint32_t reset_counter) +{ + struct index_mailbox *ibox = (struct index_mailbox *)ctx->box; + const uint32_t *ids; + const char *temp_path, *const *msgids; + unsigned int i, count; + struct ostream *output; + string_t *str; + int fd, ret = 0; + + ids = array_get(&ctx->ids, &count); + if (count == 0) { + /* everything deleted */ + if (unlink(ctx->path) < 0 && errno != ENOENT) + i_error("unlink(%s) failed: %m", ctx->path); + ctx->modified = FALSE; + return 0; + } + msgids = array_idx(&ctx->msgids, 0); + + /* write all IDs to .tmp file */ + temp_path = t_strconcat(ctx->path, ".tmp", NULL); + fd = open(temp_path, O_CREAT | O_TRUNC | O_WRONLY, ibox->index->mode); + if (fd == -1) { + mail_storage_set_critical(ctx->box->storage, + "creat(%s) failed: %m", temp_path); + return -1; + } + str = t_str_new(256); + output = o_stream_create_fd_file(fd, 0, FALSE); + str_printfa(str, "%u\n", reset_counter); + o_stream_send(output, str_data(str), str_len(str)); + + for (i = 0; i < count; i++) { + str_truncate(str, 0); + str_printfa(str, "%u %s\n", ids[i], msgids[i]); + o_stream_send(output, str_data(str), str_len(str)); + } + if (output->last_failed_errno != 0) { + errno = output->last_failed_errno; + mail_storage_set_critical(ctx->box->storage, + "write(%s) failed: %m", temp_path); + ret = -1; + } + o_stream_unref(&output); + if (close(fd) < 0) { + mail_storage_set_critical(ctx->box->storage, + "close(%s) failed: %m", temp_path); + ret = -1; + } + if (ret < 0) { + if (unlink(temp_path) < 0) + i_error("unlink(%s) failed: %m", temp_path); + return -1; + } + + /* finish the write by renaming the file */ + if (rename(temp_path, ctx->path) < 0) { + mail_storage_set_critical(ctx->box->storage, + "rename(%s, %s) failed: %m", temp_path, ctx->path); + return -1; + } + + ctx->modified = FALSE; + return 0; +} + +int mail_thread_list_commit(struct mail_thread_list_update_context **_ctx) +{ + struct mail_thread_list_update_context *ctx = *_ctx; + int ret = ctx->failed ? -1 : 0; + + *_ctx = NULL; + + if (ctx->ctx->modified && !ctx->failed && + !mail_hash_transaction_is_in_memory(ctx->hash_trans)) { + struct mail_hash_header *hdr; + + hdr = mail_hash_get_header(ctx->hash_trans); + ret = mail_thread_list_write(ctx->ctx, hdr->reset_counter); + } + i_free(ctx); + return ret; +} + +void mail_thread_list_rollback(struct mail_thread_list_update_context **_ctx) +{ + struct mail_thread_list_update_context *ctx = *_ctx; + + *_ctx = NULL; + i_free(ctx); +}
--- a/src/lib-storage/index/index-thread-private.h Thu Jun 26 08:49:02 2008 +0300 +++ b/src/lib-storage/index/index-thread-private.h Thu Jun 26 19:11:56 2008 +0300 @@ -2,11 +2,14 @@ #define INDEX_THREAD_PRIVATE_H struct index_mailbox; +struct mail_thread_list_context; #include "crc32.h" #include "mail-hash.h" #include "mail-thread.h" +#define MAIL_THREAD_INDEX_SUFFIX ".thread" + #define HDR_MESSAGE_ID "message-id" #define HDR_IN_REPLY_TO "in-reply-to" #define HDR_REFERENCES "references" @@ -20,25 +23,56 @@ struct mail_thread_node { struct mail_hash_record rec; - /* exists=TRUE: UID of the mail this node belongs to - exists=FALSE: UID of some message that references (in References: or - In-Reply-To: header) this node. Of all the valid references exactly - one has the same CRC32 as this node's msgid_crc32. */ - uint32_t uid; - uint32_t parent_idx; + /* CRC32 checksum of this node's message ID (except CRC32 0 -> 1). + If there are more nodes with the same checksum, use uid and + ref_index fields to find the original string. */ uint32_t msgid_crc32; + /* ref_index=0: ID in external thread IDs list (beginning from 1) + ref_index=1: UID of the mail whose Message-ID: header points to + this node. + ref_index>1: UID of some message which has references to this + node. ref_index spcifies how the reference string is found. */ + uint32_t uid_or_id; + /* Index for this node's parent node */ + unsigned int parent_idx:27; + /* 0 = uid contains the index to the wanted Message ID in external + thread IDs list + 1 = Message-ID: header's first valid ID + 2 = In-Reply-To: header's first valid ID + 3 = References: header's last ID with the same CRC32 + 4 = References: header's second last ID with the same CRC32 + (i.e. one non-matching ID already had a CRC32 collision) + .. + 31 = References: header's 29th last ID with the same CRC32 + */ + unsigned int ref_index:5; + /* Number of messages containing "this message" -> "parent message" + link, i.e. "number of links to parent node". However since parents + can change, not all of these references might be from our current + child nodes. When this refcount reaches 0, it means we must detach + from our parent. */ + unsigned int parent_link_refcount:30; + /* If uid is expunged, rebuild the thread tree. */ + unsigned int expunge_rebuilds:1; + /* If a link between this node and its child gets unreferenced, + rebuild the thread tree. */ + unsigned int parent_unref_rebuilds:1; +}; +#define MAIL_INDEX_NODE_REF_EXT 0 +#define MAIL_INDEX_NODE_REF_MSGID 1 +#define MAIL_INDEX_NODE_REF_INREPLYTO 2 +#define MAIL_INDEX_NODE_REF_REFERENCES_LAST 3 +#define MAIL_INDEX_NODE_REF_MAX_VALUE 31 - uint32_t link_refcount:29; - uint32_t expunge_rebuilds:1; - uint32_t parent_unref_rebuilds:1; - uint32_t exists:1; -}; +#define MAIL_INDEX_NODE_EXISTS(node) \ + ((node)->ref_index == MAIL_INDEX_NODE_REF_MSGID) struct mail_thread_update_context { struct mail *tmp_mail; struct mail_hash *hash; struct mail_hash_transaction *hash_trans; + struct mail_thread_list_update_context *thread_list_ctx; /* Hash record idx -> Message-ID */ ARRAY_DEFINE(msgid_cache, const char *); @@ -64,9 +98,26 @@ struct mail_thread_iterate_context * mail_thread_iterate_init_full(struct mail *tmp_mail, struct mail_hash_transaction *hash_trans, + struct mail_thread_list_context *hash_list_ctx, enum mail_thread_type thread_type, bool return_seqs); void index_thread_mailbox_index_opened(struct index_mailbox *ibox); +struct mail_thread_list_context * +mail_thread_list_init(struct mailbox *box); +void mail_thread_list_deinit(struct mail_thread_list_context **ctx); + +struct mail_thread_list_update_context * +mail_thread_list_update_begin(struct mail_thread_list_context *ctx, + struct mail_hash_transaction *hash_trans); +int mail_thread_list_lookup(struct mail_thread_list_update_context *ctx, + uint32_t id, const char **msgid_r); +uint32_t mail_thread_list_add(struct mail_thread_list_update_context *ctx, + const char *msgid); +void mail_thread_list_remove(struct mail_thread_list_update_context *ctx, + uint32_t id); +int mail_thread_list_commit(struct mail_thread_list_update_context **ctx); +void mail_thread_list_rollback(struct mail_thread_list_update_context **ctx); + #endif
--- a/src/lib-storage/index/index-thread.c Thu Jun 26 08:49:02 2008 +0300 +++ b/src/lib-storage/index/index-thread.c Thu Jun 26 19:11:56 2008 +0300 @@ -35,6 +35,8 @@ union mailbox_module_context module_ctx; struct mail_hash *hash; + struct mail_thread_list_context *list_ctx; + /* set only temporarily while needed */ struct mail_thread_update_context *ctx; }; @@ -63,69 +65,87 @@ } static const char * -mail_thread_find_msgid_crc32(struct mail_thread_update_context *ctx, - uint32_t msgid_crc32) +mail_thread_nth_last_refid_crc32(const char *msgids, unsigned int ref_index, + uint32_t msgid_crc32) { - const char *msgids, *msgid; - int ret; + const unsigned int idx_from_end = + ref_index - MAIL_INDEX_NODE_REF_REFERENCES_LAST + 1; + const char **last_msgids, *msgid; + unsigned int idx = 0; - /* if there are any valid references, it's in them */ - ret = mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES, &msgids); - if (ret == 0) { - /* no References: header, fallback to In-Reply-To: */ - if (mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO, - &msgids) <= 0) - return NULL; - - msgid = message_id_get_next(&msgids); - if (msgid != NULL && crc32_str_nonzero(msgid) == msgid_crc32) - return msgid; - return NULL; - } - if (ret < 0) - return NULL; - + last_msgids = t_new(const char *, idx_from_end); while ((msgid = message_id_get_next(&msgids)) != NULL) { if (crc32_str_nonzero(msgid) == msgid_crc32) { - /* found it. there aren't any colliding message-id - CRC32s in this message or it wouldn't have been - added as a reference UID. */ - return msgid; + last_msgids[idx % idx_from_end] = msgid; + idx++; } } - return NULL; - + return last_msgids[idx % idx_from_end]; } static const char * -mail_thread_node_get_msgid(struct mail_thread_update_context *ctx, +mail_thread_node_get_msgid(struct mail_hash_transaction *trans, + struct mail_thread_update_context *ctx, const struct mail_thread_node *node, uint32_t idx) { - const char *msgids, *msgid, **p; + const char *msgids, *msgid = NULL, **p; + int ret; + + if (node->ref_index == MAIL_INDEX_NODE_REF_EXT) { + if (mail_thread_list_lookup(ctx->thread_list_ctx, + node->uid_or_id, &msgid) < 0) + return NULL; + if (msgid == NULL) { + mail_hash_transaction_set_corrupted(trans, + "Referenced list msgid lost"); + } + return msgid; + } p = array_idx_modifiable(&ctx->msgid_cache, idx); if (*p != NULL) return *p; - if (node->uid == 0) + ret = mail_set_uid(ctx->tmp_mail, node->uid_or_id); + if (ret <= 0) { + if (ret == 0) { + mail_hash_transaction_set_corrupted(trans, + t_strdup_printf("Referenced UID %u lost", + node->uid_or_id)); + } return NULL; - - if (mail_set_uid(ctx->tmp_mail, node->uid) <= 0) - return NULL; - if (node->exists) { - /* we can get the Message-ID directly */ + } + switch (node->ref_index) { + case MAIL_INDEX_NODE_REF_MSGID: + /* Message-ID: header */ if (mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID, - &msgids) <= 0) + &msgids) < 0) return NULL; - + msgid = message_id_get_next(&msgids); + break; + case MAIL_INDEX_NODE_REF_INREPLYTO: + /* In-Reply-To: header */ + if (mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO, + &msgids) < 0) + return NULL; msgid = message_id_get_next(&msgids); - } else { - /* find from a referencing message */ - msgid = mail_thread_find_msgid_crc32(ctx, node->msgid_crc32); + break; + default: + /* References: header */ + if (mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES, + &msgids) < 0) + return NULL; + msgid = mail_thread_nth_last_refid_crc32(msgids, + node->ref_index, + node->msgid_crc32); + break; } - if (msgid == NULL) + if (msgid == NULL) { + /* shouldn't have happened */ + mail_hash_transaction_set_corrupted(trans, "Message ID lost"); return NULL; + } *p = p_strdup(ctx->msgid_pool, msgid); return *p; @@ -139,6 +159,7 @@ struct mail_thread_update_context *ctx = tbox->ctx; const struct mail_thread_node *node; const char *msgid; + bool ret; node = mail_hash_lookup_idx(trans, idx); if (key_rec->msgid_crc32 != node->msgid_crc32) @@ -148,14 +169,19 @@ ctx->cmp_last_idx = idx; /* either a match or a collision, need to look closer */ - msgid = mail_thread_node_get_msgid(ctx, node, ctx->cmp_last_idx); - if (msgid == NULL) { - /* we couldn't figure out the Message-ID for whatever reason. - we'll need to fallback to rebuilding the whole thread */ - ctx->rebuild = TRUE; - return FALSE; - } - return strcmp(msgid, key_rec->msgid) == 0; + T_BEGIN { + msgid = mail_thread_node_get_msgid(trans, ctx, node, idx); + if (msgid != NULL) + ret = strcmp(msgid, key_rec->msgid) == 0; + else { + /* we couldn't figure out the Message-ID for whatever + reason. we'll need to fallback to rebuilding the + whole thread. */ + ctx->rebuild = TRUE; + ret = FALSE; + } + } T_END; + return ret; } static unsigned int mail_thread_hash_rec(const void *p) @@ -358,6 +384,9 @@ ctx->t = mailbox_transaction_begin(ctx->box, 0); ctx->search = mailbox_search_init(ctx->t, ctx->search_args, NULL); ctx->thread_ctx.tmp_mail = mail_alloc(ctx->t, 0, NULL); + ctx->thread_ctx.thread_list_ctx = + mail_thread_list_update_begin(tbox->list_ctx, + ctx->thread_ctx.hash_trans); hdr = mail_hash_get_header(ctx->thread_ctx.hash_trans); count = status.messages < hdr->record_count ? 0 : @@ -465,6 +494,9 @@ { struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(ctx->box); + if (ctx->thread_ctx.thread_list_ctx != NULL) + mail_thread_list_rollback(&ctx->thread_ctx.thread_list_ctx); + mail_hash_transaction_end(&ctx->thread_ctx.hash_trans); (void)mailbox_search_deinit(&ctx->search); @@ -500,8 +532,11 @@ mail_thread_iterate_init(struct mail_thread_context *ctx, enum mail_thread_type thread_type, bool write_seqs) { + struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(ctx->box); + return mail_thread_iterate_init_full(ctx->thread_ctx.tmp_mail, ctx->thread_ctx.hash_trans, + tbox->list_ctx, thread_type, write_seqs); } @@ -526,6 +561,33 @@ return seq2 == hash_hdr->message_count; } +static void +mail_thread_expunge_handler_deinit(struct mail_thread_mailbox *tbox, + struct mail_thread_update_context *ctx) +{ + struct mailbox_transaction_context *t; + + t = ctx->tmp_mail->transaction; + + if (ctx->failed) + mail_thread_list_rollback(&ctx->thread_list_ctx); + else { + if (mail_thread_list_commit(&ctx->thread_list_ctx) < 0) + ctx->failed = TRUE; + } + if (!ctx->failed) + (void)mail_hash_transaction_write(ctx->hash_trans); + mail_hash_transaction_end(&ctx->hash_trans); + mail_hash_unlock(tbox->hash); + + mail_free(&ctx->tmp_mail); + /* don't commit. we're in the middle of syncing and + this transaction isn't marked as external. */ + (void)mailbox_transaction_rollback(&t); + array_free(&ctx->msgid_cache); + pool_unref(&ctx->msgid_pool); +} + static int mail_thread_expunge_handler(struct mail_index_sync_map_ctx *sync_ctx, uint32_t seq, const void *data, @@ -541,21 +603,8 @@ /* deinit */ if (!ctx->syncing) return 0; - if (ctx->hash != NULL) { - t = ctx->tmp_mail->transaction; - - if (!ctx->failed) - (void)mail_hash_transaction_write(ctx->hash_trans); - mail_hash_transaction_end(&ctx->hash_trans); - mail_hash_unlock(tbox->hash); - - mail_free(&ctx->tmp_mail); - /* don't commit. we're in the middle of syncing and - this transaction isn't marked as external. */ - (void)mailbox_transaction_rollback(&t); - array_free(&ctx->msgid_cache); - pool_unref(&ctx->msgid_pool); - } + if (ctx->hash != NULL) + mail_thread_expunge_handler_deinit(tbox, ctx); i_free_and_null(tbox->ctx); return 0; } @@ -575,6 +624,9 @@ ctx->hash = tbox->hash; ctx->hash_trans = mail_hash_transaction_begin(ctx->hash, 0); + ctx->thread_list_ctx = + mail_thread_list_update_begin(tbox->list_ctx, + ctx->hash_trans); ctx->msgid_pool = pool_alloconly_create(MEMPOOL_GROWING"msgids", 20 * APPROX_MSGID_SIZE); i_array_init(&ctx->msgid_cache, 20); @@ -610,6 +662,7 @@ i_assert(tbox->ctx == NULL); + mail_thread_list_deinit(&tbox->list_ctx); mail_hash_free(&tbox->hash); ret = tbox->module_ctx.super.close(box); i_free(tbox); @@ -626,7 +679,8 @@ tbox->module_ctx.super = box->v; box->v.close = mail_thread_mailbox_close; - tbox->hash = mail_hash_alloc(ibox->index, ".thread", + tbox->list_ctx = mail_thread_list_init(box); + tbox->hash = mail_hash_alloc(ibox->index, MAIL_THREAD_INDEX_SUFFIX, sizeof(struct mail_thread_node), mail_thread_hash_key, mail_thread_hash_rec,