Mercurial > dovecot > original-hg > dovecot-1.2
view src/imap/imap-thread.c @ 6089:a19931ec66db HEAD
Changed mail_transaction_begin() API to take flags parameter instead of two
booleans.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Thu, 19 Jul 2007 02:36:04 +0300 |
parents | 7a6db5ec047d |
children | ad1b948c5fa2 |
line wrap: on
line source
/* Copyright (C) 2002-2006 Timo Sirainen */ /* Implementation of draft-ietf-imapext-thread-12 threading algorithm Message threads are permanently stored using mail-hash API. The links are built according to normal threading rules. Base subject grouping however isn't saved, so it needs to be done the slow way. We do this permanent storing and optimization only when we're threading all messages, which is what pretty much all the webmails do. Otherwise we'd need to have separate thread trees for each search query, which isn't practical. The nodes are stored sorted by their sent_date/UID as specified by the threading rules, so no further sorting is required to be done. Dummy nodes are sorted by their children, and since their children are also sorted, it practically means that they need to be moved whenever their first child is changed. Adding new messages to the hash is easy and fast. Removing messages however might cause changes all over the thread tree. Luckily this is rare so we can just optimize the common cases. This is done with reference counts: A node is created for each seen Message-ID. We also store the dummy ones for which no actual message exists. Each time a Message-ID is seen in the In-Reply-To or References headers, the Message-ID node's reference count is increased. Expunging then decreases reference counts for each Message-ID. There are two rare problematic cases when expunging: 1) Duplicate Message-IDs 2) Broken parent-child relationships: a) different messages describe them differently in their References headers b) loops For these cases expunging a message affecting these may cause larger thread reorganizations. We mark these with expunge_rebuilds flag, so that if the problematic message is expunged, we'll just rebuild everything. When a message is expunged, either its reference count drops to zero in which case it's removed completely, otherwise it's turned into a dummy node. Typically when reference count drops to zero it means it the node has no children. One special case is however a thread with: [1:dummy] [2:dummy] [3:ref 1, 2] [4:ref 2] When 3 is removed, 1's refcount drops to zero but 2 is still referenced by 4. In this case the 2's parent must be updated. When we open the mail-hash, we check that no messages have been expunged from mailbox which haven't also been removed from the hash. If they have, we rebuild the thread tree. Otherwise we add the new messages to the hash and then send the results to client. */ #include "common.h" #include "array.h" #include "crc32.h" #include "ostream.h" #include "str.h" #include "message-id.h" #include "imap-base-subject.h" #include "mail-storage.h" #include "mail-search.h" #include "imap-thread.h" /* FIXME: the mailbox accessing API needs some cleaning up. we shouldn't be including all kinds of private headers */ #include "../lib-storage/index/index-storage.h" #include "mail-hash.h" #include "mail-index-private.h" #include "mail-index-sync-private.h" #include <stdlib.h> #define IMAP_THREAD_CONTEXT(obj) \ MODULE_CONTEXT(obj, imap_thread_storage_module) /* how much memory to allocate initially. these are very rough approximations. */ #define APPROX_MSG_EXTRA_COUNT 10 #define APPROX_MSGID_SIZE 45 /* Try to buffer this much data before sending it to output stream. */ #define OUTPUT_BUF_SIZE 2048 #define HDR_MESSAGE_ID "message-id" #define HDR_IN_REPLY_TO "in-reply-to" #define HDR_REFERENCES "references" #define HDR_SUBJECT "subject" struct msgid_rec { const char *msgid; uint32_t msgid_crc32; }; struct mail_thread_rec { struct mail_hash_record rec; uint32_t refcount:31; uint32_t expunge_rebuilds:1; uint32_t msgid_crc32; uint32_t sent_date; uint32_t parent_idx; uint32_t first_child_idx; uint32_t next_idx; }; struct mail_thread_moved_rec { struct mail_thread_rec rec; /* first_child_idx and its siblings are in moved_recs */ unsigned int moved_children:1; }; struct mail_thread_root_rec { uint32_t idx; uint32_t uid; uint32_t sent_date; /* a linked list of roots which belong to the same thread */ struct mail_thread_root_rec *next; /* another record has this record in its next-pointer. this record isn't a root anymore. */ unsigned int nonroot:1; /* idx points to moved_recs */ unsigned int moved:1; /* subject contained a Re: or Fwd: */ unsigned int reply:1; }; #define ROOT_REC_IS_DUMMY(rec) \ ((rec)->uid == 0) struct thread_context { struct mail_search_context *search_ctx; struct mailbox_transaction_context *t; struct mailbox *box; struct ostream *output; enum mail_thread_type thread_type; struct mail *tmp_mail; struct mail_thread_rec tmp_rec; struct mail_search_arg tmp_search_arg; struct mail_search_seqset seqset; /* Hash record idx -> Message-ID */ ARRAY_DEFINE(msgid_map, const char *); pool_t msgid_pool; struct mail_hash *msgid_hash; pool_t subject_pool; struct hash_table *subject_hash; ARRAY_DEFINE(roots, struct mail_thread_root_rec *); ARRAY_DEFINE(moved_recs, struct mail_thread_moved_rec); uint32_t *alt_dates; unsigned int cmp_match_count; uint32_t cmp_last_idx; unsigned int id_is_uid:1; unsigned int failed:1; }; struct imap_thread_mailbox { union mailbox_module_context module_ctx; struct mail_hash *msgid_hash; /* set only temporarily while needed */ struct thread_context *ctx; }; static void (*next_hook_mailbox_opened)(struct mailbox *box); static MODULE_CONTEXT_DEFINE_INIT(imap_thread_storage_module, &mail_storage_module_register); static void imap_thread_hash_init(struct mailbox *box, bool create); static int mail_thread_input(struct thread_context *ctx, struct mail *mail); static int mail_thread_finish(struct thread_context *ctx); static int unlink_child(struct thread_context *ctx, uint32_t child_idx, uint32_t new_parent_idx); static void mail_thread_deinit(struct imap_thread_mailbox *tbox, struct thread_context *ctx) { i_free(ctx->alt_dates); if (ctx->msgid_hash != tbox->msgid_hash) mail_hash_free(&ctx->msgid_hash); else mail_hash_unlock(ctx->msgid_hash); if (ctx->subject_hash != NULL) { hash_destroy(ctx->subject_hash); pool_unref(ctx->subject_pool); } array_free(&ctx->msgid_map); pool_unref(ctx->msgid_pool); } static uint32_t crc32_str_nonzero(const char *str) { uint32_t value = crc32_str(str); return value == 0 ? 1 : value; } static int mail_thread_rec_idx(struct thread_context *ctx, uint32_t idx, const struct mail_thread_rec **rec_r) { const void *value; if (mail_hash_lookup_idx(ctx->msgid_hash, idx, &value) < 0) { ctx->failed = TRUE; return -1; } *rec_r = value; return 0; } static unsigned int mail_thread_hash_key(const void *key) { const struct msgid_rec *key_rec = key; return key_rec->msgid_crc32; } static int mail_thread_find_child_msgid(struct thread_context *ctx, uint32_t parent_uid, uint32_t msgid_crc32, const char **msgid_r) { const char *msgids, *msgid, *found_msgid = NULL; int ret; if ((ret = mail_set_uid(ctx->tmp_mail, parent_uid)) < 0) return -1; if (ret == 0) { *msgid_r = NULL; return 0; } msgids = mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO); msgid = msgids == NULL ? NULL : message_id_get_next(&msgids); if (msgid != NULL) { if (crc32_str_nonzero(msgid) == msgid_crc32) found_msgid = msgid; } msgids = mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES); if (msgids == NULL) { *msgid_r = found_msgid; return 0; } while ((msgid = message_id_get_next(&msgids)) != NULL) { if (crc32_str_nonzero(msgid) == msgid_crc32) { if (found_msgid != NULL && strcmp(found_msgid, msgid) != 0) { /* hash collisions, we can't figure this out */ return -1; } found_msgid = msgid; } } *msgid_r = found_msgid; return 0; } static const char * mail_thread_children_get_parent_msgid(struct thread_context *ctx, const struct mail_thread_rec *parent_rec) { const struct mail_thread_rec *child_rec; const char *msgid; uint32_t idx; for (idx = parent_rec->first_child_idx; idx != 0; ) { if (mail_thread_rec_idx(ctx, idx, &child_rec) < 0) return NULL; idx = child_rec->next_idx; if (child_rec->rec.uid == 0) { if (idx != 0) continue; /* only dummies in this level. go deeper. */ return mail_thread_children_get_parent_msgid(ctx, child_rec); } /* each non-dummy child must have a valid In-Reply-To or References header pointing to the parent, otherwise it wouldn't be our child */ if (parent_rec->msgid_crc32 == 0) { mail_hash_set_corrupted(ctx->msgid_hash, "msgid_crc32=0 node has children"); return NULL; } if (mail_thread_find_child_msgid(ctx, child_rec->rec.uid, parent_rec->msgid_crc32, &msgid) == 0) return msgid; } return NULL; } static const char * mail_thread_rec_get_msgid(struct thread_context *ctx, const struct mail_thread_rec *rec, uint32_t idx) { const char *msgids, *msgid, **p; p = array_idx_modifiable(&ctx->msgid_map, idx); if (*p != NULL) return *p; if (rec->rec.uid != 0) { /* we can get the Message-ID directly */ if (mail_set_uid(ctx->tmp_mail, rec->rec.uid) <= 0) return NULL; msgids = mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID); if (msgids == NULL) return NULL; msgid = message_id_get_next(&msgids); } else { /* fallback to finding from children's references */ msgid = mail_thread_children_get_parent_msgid(ctx, rec); } if (msgid == NULL) return NULL; *p = p_strdup(ctx->msgid_pool, msgid); return *p; } static bool mail_thread_hash_cmp(const void *key, const void *data, struct imap_thread_mailbox *tbox) { struct thread_context *ctx = tbox->ctx; const struct msgid_rec *key_rec = key; const struct mail_thread_rec *rec = data; const char *msgid; if (key_rec->msgid_crc32 != rec->msgid_crc32) return FALSE; ctx->cmp_match_count++; ctx->cmp_last_idx = mail_hash_value_idx(ctx->msgid_hash, rec); /* either a match or a collision, need to look closer */ msgid = mail_thread_rec_get_msgid(ctx, rec, ctx->cmp_last_idx); if (msgid == NULL) { /* we couldn't figure out the Message-ID for whatever reason. we'll need to fallback to rebuilding the whole thread */ ctx->failed = TRUE; return FALSE; } return strcmp(msgid, key_rec->msgid) == 0; } static unsigned int mail_thread_hash_rec(const void *p) { const struct mail_thread_rec *rec = p; return rec->msgid_crc32; } static int resize_callback(struct mail_hash *tmp_hash, uint32_t first_changed_idx, const uint32_t *map, unsigned int map_size, void *context) { struct thread_context *ctx = context; const struct mail_hash_header *hdr; const struct mail_thread_rec *rec; struct mail_thread_rec tmp_rec; const void *value; uint32_t idx; hdr = mail_hash_get_header(tmp_hash); for (idx = first_changed_idx; idx <= hdr->record_count; idx++) { if (mail_hash_lookup_idx(tmp_hash, idx, &value) < 0) return -1; rec = value; i_assert(!MAIL_HASH_RECORD_IS_DELETED(&rec->rec)); if (rec->parent_idx >= map_size || rec->first_child_idx >= map_size || rec->next_idx >= map_size) { mail_hash_set_corrupted(ctx->msgid_hash, "invalid indexes"); return -1; } tmp_rec = *rec; tmp_rec.parent_idx = map[rec->parent_idx]; tmp_rec.first_child_idx = map[rec->first_child_idx]; tmp_rec.next_idx = map[rec->next_idx]; if (mail_hash_update_idx(tmp_hash, idx, &tmp_rec) < 0) return -1; } return 0; } static int imap_thread_context_init(struct imap_thread_mailbox *tbox, struct client *client, const char *charset, struct mail_search_arg *search_args) { struct thread_context *ctx = tbox->ctx; struct mailbox_status status; const struct mail_hash_header *hdr; unsigned int count; uint32_t last_seq = 0, last_uid = 0; if (mailbox_get_status(client->mailbox, STATUS_MESSAGES | STATUS_UIDNEXT, &status) < 0) return -1; last_seq = status.messages; last_uid = status.uidnext - 1; /* Each search condition requires their own separate thread index. Pretty much all the clients use only "search all" threading, so we don't need to worry about anything else. */ if (search_args->next != NULL) { /* too difficult to figure out if we could optimize this. we most likely couldn't. */ ctx->msgid_hash = NULL; } else if (search_args->type == SEARCH_ALL) { /* optimize */ } else if (search_args->type == SEARCH_SEQSET && search_args->value.seqset->seq1 == 1) { /* If we're searching 1..n, we might be able to optimize this. This is at least useful for testing incremental index updates if nothing else. :) */ last_seq = search_args->value.seqset->seq2; last_uid = 0; } else { ctx->msgid_hash = NULL; } if (ctx->msgid_hash != NULL) { if (mail_hash_lock(ctx->msgid_hash) <= 0) ctx->msgid_hash = NULL; } hdr = ctx->msgid_hash == NULL ? NULL : mail_hash_get_header(ctx->msgid_hash); if (hdr == NULL) { /* we want to build it in memory */ } else if (hdr->message_count > last_seq) { if (hdr->last_uid > last_uid) { /* view is a bit out of date, can't optimize */ mail_hash_unlock(ctx->msgid_hash); ctx->msgid_hash = NULL; } else { /* rebuild */ if (mail_hash_reset(ctx->msgid_hash, 0) < 0) ctx->msgid_hash = NULL; } } else if (hdr->last_uid != 0) { /* non-empty hash. add only the new messages in there. */ if (mailbox_get_uids(client->mailbox, 1, hdr->last_uid, &ctx->seqset.seq1, &ctx->seqset.seq2) < 0) { mail_hash_unlock(ctx->msgid_hash); return -1; } if (ctx->seqset.seq2 != hdr->message_count) { /* some messages have been expunged. have to rebuild. */ if (mail_hash_reset(ctx->msgid_hash, 0) < 0) ctx->msgid_hash = NULL; } else { /* after all these checks, this is the only case we can actually optimize. */ ctx->tmp_search_arg.type = SEARCH_SEQSET; if (ctx->seqset.seq2 == last_seq) { /* search nothing */ ctx->tmp_search_arg.value.seqset = NULL; } else { /* search next+1..n */ ctx->seqset.seq1 = ctx->seqset.seq2 + 1; ctx->seqset.seq2 = last_seq; ctx->tmp_search_arg.value.seqset = &ctx->seqset; } search_args = &ctx->tmp_search_arg; if (mail_hash_resize_if_needed(ctx->msgid_hash, last_seq - hdr->message_count, resize_callback, ctx) < 0) ctx->msgid_hash = NULL; } } if (ctx->msgid_hash == NULL) { /* fallback to using in-memory hash */ struct index_mailbox *ibox = (struct index_mailbox *)client->mailbox; ctx->msgid_hash = mail_hash_open(ibox->index, ".thread", MAIL_HASH_OPEN_FLAG_CREATE | MAIL_HASH_OPEN_FLAG_IN_MEMORY, sizeof(struct mail_thread_rec), 0, mail_thread_hash_key, mail_thread_hash_rec, mail_thread_hash_cmp, tbox); } /* initialize searching */ ctx->t = mailbox_transaction_begin(client->mailbox, 0); ctx->search_ctx = mailbox_search_init(ctx->t, charset, search_args, NULL); ctx->box = client->mailbox; ctx->output = client->output; /* at this point the hash is either locked or we're using in-memory hash where it doesn't matter */ hdr = mail_hash_get_header(ctx->msgid_hash); count = client->messages_count < hdr->record_count ? 0 : client->messages_count - hdr->record_count; count += APPROX_MSG_EXTRA_COUNT; ctx->msgid_pool = pool_alloconly_create("msgids", count * APPROX_MSGID_SIZE); i_array_init(&ctx->msgid_map, I_MAX(hdr->record_count, client->messages_count)); ctx->tmp_mail = mail_alloc(ctx->t, 0, NULL); return 0; } int imap_thread(struct client_command_context *cmd, const char *charset, struct mail_search_arg *args, enum mail_thread_type type) { static const char *wanted_headers[] = { HDR_MESSAGE_ID, HDR_IN_REPLY_TO, HDR_REFERENCES, HDR_SUBJECT, NULL }; struct imap_thread_mailbox *tbox = IMAP_THREAD_CONTEXT(cmd->client->mailbox); struct mailbox_header_lookup_ctx *headers_ctx; struct thread_context *ctx; struct mail *mail; int ret, try; i_assert(type == MAIL_THREAD_REFERENCES || type == MAIL_THREAD_REFERENCES2); if (tbox->msgid_hash == NULL) imap_thread_hash_init(cmd->client->mailbox, TRUE); headers_ctx = mailbox_header_lookup_init(cmd->client->mailbox, wanted_headers); ctx = t_new(struct thread_context, 1); tbox->ctx = ctx; ctx->msgid_hash = tbox->msgid_hash; ctx->thread_type = type; for (try = 0; try < 2; try++) { ret = 0; if (imap_thread_context_init(tbox, cmd->client, charset, args) < 0) { ret = -1; break; } mail = mail_alloc(ctx->t, MAIL_FETCH_DATE, headers_ctx); ctx->id_is_uid = cmd->uid; while (ret == 0 && mailbox_search_next(ctx->search_ctx, mail) > 0) { t_push(); ret = mail_thread_input(ctx, mail); t_pop(); } mail_free(&mail); if (mail_thread_finish(ctx) < 0) ret = -1; ret = mailbox_search_deinit(&ctx->search_ctx); if (mailbox_transaction_commit(&ctx->t, 0) < 0) ret = -1; mail_thread_deinit(tbox, ctx); if (ctx->failed && ctx->msgid_hash == tbox->msgid_hash) { /* try again with in-memory hash */ memset(ctx, 0, sizeof(*ctx)); } else { break; } } tbox->ctx = NULL; mailbox_header_lookup_deinit(&headers_ctx); return ret; } static int mail_thread_children_update_parent(struct thread_context *ctx, const struct mail_thread_rec *parent_rec) { const struct mail_thread_rec *child_rec; struct mail_thread_rec tmp_rec; uint32_t idx; for (idx = parent_rec->first_child_idx; idx != 0; ) { if (mail_thread_rec_idx(ctx, idx, &child_rec) < 0) return -1; tmp_rec = *child_rec; tmp_rec.parent_idx = parent_rec->parent_idx; if (mail_hash_update_idx(ctx->msgid_hash, idx, &tmp_rec) < 0) { ctx->failed = TRUE; return -1; } idx = child_rec->next_idx; } return 0; } static int mail_thread_rec_unref_idx(struct thread_context *ctx, uint32_t idx, const char *msgid, bool is_parent) { const struct mail_thread_rec *rec; struct mail_thread_rec tmp_rec; struct msgid_rec key; if (mail_thread_rec_idx(ctx, idx, &rec) < 0) return -1; if (rec->refcount == 0 || (is_parent && rec->refcount == 1 && rec->rec.uid != 0)) { mail_hash_set_corrupted(ctx->msgid_hash, "refcount too low"); return -1; } if (rec->refcount == 1) { /* last reference to the node, remove it completely. it may however still have children, so update their parents */ if (mail_thread_children_update_parent(ctx, rec) < 0) return -1; if (rec->parent_idx != 0) { if (unlink_child(ctx, idx, 0) < 0) return -1; } key.msgid = msgid != NULL ? msgid : mail_thread_rec_get_msgid(ctx, rec, idx); if (key.msgid == NULL) return -1; key.msgid_crc32 = crc32_str_nonzero(key.msgid); if (mail_hash_remove_idx(ctx->msgid_hash, idx, &key) < 0) { ctx->failed = TRUE; return -1; } return 0; } else { tmp_rec = *rec; tmp_rec.refcount--; if (mail_hash_update_idx(ctx->msgid_hash, idx, &tmp_rec) < 0) { ctx->failed = TRUE; return -1; } return 1; } } static int mail_thread_rec_get_nondummy(struct thread_context *ctx, const struct mail_thread_rec *rec, const struct mail_thread_rec **nondummy_rec_r) { while (rec->rec.uid == 0 && rec->first_child_idx != 0) { if (mail_thread_rec_idx(ctx, rec->first_child_idx, &rec) < 0) return -1; } *nondummy_rec_r = rec; return 0; } static int mail_thread_rec_time_cmp(struct thread_context *ctx, const struct mail_thread_rec *rec1, const struct mail_thread_rec *rec2) { /* assume that rec2 is already non-dummy */ i_assert(rec2->rec.uid != 0 || rec2->first_child_idx == 0); if (rec1->rec.uid == 0) { if (mail_thread_rec_get_nondummy(ctx, rec1, &rec1) < 0) return 1; } /* use the sent date as long as both of the dates are valid */ if (rec1->sent_date != rec2->sent_date && rec1->sent_date != 0 && rec2->sent_date != 0) return rec1->sent_date < rec2->sent_date ? -1 : 1; /* otherwise fallback to comparing UIDs. put dummy records to end of list */ if (rec1->rec.uid == 0) return rec2->rec.uid != 0 ? 1 : 0; if (rec2->rec.uid == 0) return -1; return rec1->rec.uid < rec2->rec.uid ? -1 : 1; } static int update_next_idx(struct thread_context *ctx, uint32_t idx, uint32_t next_idx, uint32_t parent_idx) { struct mail_thread_rec tmp_rec; const struct mail_thread_rec *rec; if (mail_thread_rec_idx(ctx, idx, &rec) < 0) return -1; if (idx == parent_idx) { /* first child */ tmp_rec = *rec; tmp_rec.first_child_idx = next_idx; } else { /* sibling */ tmp_rec = *rec; tmp_rec.next_idx = next_idx; } return mail_hash_update_idx(ctx->msgid_hash, idx, &tmp_rec); } static int mail_thread_update_child_pos(struct thread_context *ctx, uint32_t child_idx, const struct mail_thread_rec *child_rec, const struct mail_thread_rec *parent_rec, bool remove_existing) { const struct mail_thread_rec *rec, *cmp_rec; struct mail_thread_rec tmp_rec; uint32_t idx, prev_idx, orig_next_idx; bool found = FALSE; if (child_rec->parent_idx == 0) { /* we're not sorting root nodes */ return 0; } if (parent_rec == NULL) { /* not given, have to look up */ if (mail_thread_rec_idx(ctx, child_rec->parent_idx, &parent_rec) < 0) return -1; } if (parent_rec->first_child_idx == 0) { /* this is the first child */ i_assert(!remove_existing); tmp_rec = *parent_rec; tmp_rec.first_child_idx = child_idx; if (mail_hash_update_idx(ctx->msgid_hash, child_rec->parent_idx, &tmp_rec) < 0) { ctx->failed = TRUE; return -1; } } else { /* find the position where we want it inserted */ prev_idx = child_rec->parent_idx; orig_next_idx = child_rec->next_idx; idx = parent_rec->first_child_idx; if (mail_thread_rec_idx(ctx, idx, &rec) < 0) return -1; if (mail_thread_rec_get_nondummy(ctx, child_rec, &cmp_rec) < 0) return -1; while (mail_thread_rec_time_cmp(ctx, rec, cmp_rec) < 0) { if (idx == child_idx) { /* unlink the child from here */ i_assert(remove_existing); if (found) return -1; found = TRUE; i_assert(rec->next_idx == orig_next_idx); if (update_next_idx(ctx, prev_idx, orig_next_idx, child_rec->parent_idx) < 0) return -1; } else { prev_idx = idx; } idx = rec->next_idx; if (idx == 0) break; if (mail_thread_rec_idx(ctx, idx, &rec) < 0) return -1; } if (idx == child_idx) { /* already in the right position */ i_assert(remove_existing); return found ? -1 : 0; } /* insert into this position */ if (update_next_idx(ctx, prev_idx, child_idx, child_rec->parent_idx) < 0) return -1; /* update the child's next_idx */ if (child_rec->next_idx != idx) { tmp_rec = *child_rec; tmp_rec.next_idx = idx; if (mail_hash_update_idx(ctx->msgid_hash, child_idx, &tmp_rec) < 0) return -1; } if (remove_existing && !found) { /* go through the rest and remove the existing child */ while (rec->next_idx != child_idx) { idx = rec->next_idx; if (idx == 0) { /* should have been found */ return -1; } if (mail_thread_rec_idx(ctx, idx, &rec) < 0) return -1; } if (update_next_idx(ctx, idx, orig_next_idx, child_rec->parent_idx) < 0) return -1; } } if (mail_thread_rec_idx(ctx, child_rec->parent_idx, &parent_rec) < 0) return -1; if (parent_rec->rec.uid != 0 || parent_rec->first_child_idx != child_idx) return 0; /* parent is a dummy and we updated its first child. that might move the parent */ return mail_thread_update_child_pos(ctx, child_rec->parent_idx, parent_rec, NULL, TRUE); } static int create_or_update_msg(struct thread_context *ctx, const char *msgid, time_t sent_date, uint32_t uid, uint32_t *idx_r) { struct msgid_rec key; struct mail_thread_rec rec; const struct mail_thread_rec *recp; const void *value; uint32_t idx; int ret; memset(&rec, 0, sizeof(rec)); rec.rec.uid = uid; rec.msgid_crc32 = msgid == NULL ? 0 : crc32_str_nonzero(msgid); rec.refcount = 1; rec.sent_date = sent_date; if (msgid == NULL) { if (mail_hash_insert(ctx->msgid_hash, NULL, &rec, &idx) < 0) { ctx->failed = TRUE; return -1; } *idx_r = idx; return 0; } key.msgid = msgid; key.msgid_crc32 = rec.msgid_crc32; ret = mail_hash_lookup(ctx->msgid_hash, &key, &value, &idx); if (ret < 0 || ctx->failed) { ctx->failed = TRUE; return -1; } if (ret == 0) { /* first time we see this message */ if (mail_hash_insert(ctx->msgid_hash, &key, &rec, &idx) < 0) { ctx->failed = TRUE; return -1; } msgid = p_strdup(ctx->msgid_pool, msgid); array_idx_set(&ctx->msgid_map, idx, &msgid); *idx_r = idx; return 0; } recp = value; if (recp->rec.uid == 0) { /* seen before in references */ const char **p; rec = *recp; rec.rec.uid = uid; rec.sent_date = sent_date; rec.refcount++; if (mail_hash_update_idx(ctx->msgid_hash, idx, &rec) < 0) { ctx->failed = TRUE; return -1; } if (mail_thread_update_child_pos(ctx, idx, &rec, NULL, TRUE) < 0) return -1; p = array_idx_modifiable(&ctx->msgid_map, idx); if (*p == NULL) *p = p_strdup(ctx->msgid_pool, msgid); } else { /* duplicate */ struct mail_thread_rec orig_rec; uint32_t orig_idx = idx; orig_rec = *recp; rec.msgid_crc32 = 0; if (mail_hash_insert(ctx->msgid_hash, NULL, &rec, &idx) < 0) { ctx->failed = TRUE; return -1; } /* if the original message gets expunged, the thread tree must be rebuilt. */ orig_rec.expunge_rebuilds = TRUE; if (mail_hash_update_idx(ctx->msgid_hash, orig_idx, &orig_rec) < 0) { ctx->failed = TRUE; return -1; } } *idx_r = idx; return 0; } static int unlink_child(struct thread_context *ctx, uint32_t child_idx, uint32_t new_parent_idx) { const struct mail_thread_rec *child_rec, *parent_rec, *sibling_rec; struct mail_thread_rec rec; uint32_t idx, parent_idx, sibling_idx; if (mail_thread_rec_idx(ctx, child_idx, &child_rec) < 0) return -1; if (child_rec->parent_idx == 0) return 0; parent_idx = child_rec->parent_idx; if (mail_thread_rec_idx(ctx, parent_idx, &parent_rec) < 0) return -1; /* find the node which links to this child */ sibling_idx = parent_idx; for (idx = parent_rec->first_child_idx; idx != child_idx; ) { i_assert(idx != 0); sibling_idx = idx; if (mail_thread_rec_idx(ctx, sibling_idx, &parent_rec) < 0) return -1; idx = parent_rec->next_idx; } if (mail_thread_rec_idx(ctx, sibling_idx, &sibling_rec) < 0) return -1; if (mail_thread_rec_idx(ctx, child_idx, &child_rec) < 0) return -1; /* remove from old parent's children list */ rec = *sibling_rec; if (sibling_idx != parent_idx) rec.next_idx = child_rec->next_idx; else rec.first_child_idx = child_rec->next_idx; i_assert(sibling_idx != rec.parent_idx); if (mail_hash_update_idx(ctx->msgid_hash, sibling_idx, &rec) < 0) { ctx->failed = TRUE; return -1; } if (parent_idx == sibling_idx && parent_rec->rec.uid == 0) { /* parent is a dummy and we removed its first child. this might move the parent */ if (mail_thread_update_child_pos(ctx, parent_idx, parent_rec, NULL, TRUE) < 0) return -1; } /* update the child's parent. it's added to new parent's children list elsewhere. since this node was originally elsewhere as a dummy, expunging this node will need to move it back there. */ rec = *child_rec; rec.parent_idx = new_parent_idx; rec.next_idx = 0; rec.expunge_rebuilds = TRUE; i_assert(child_idx != rec.parent_idx); if (mail_hash_update_idx(ctx->msgid_hash, child_idx, &rec) < 0) { ctx->failed = TRUE; return -1; } return 0; } static bool find_parent(struct thread_context *ctx, const struct mail_thread_rec *parent_rec, uint32_t child_idx) { while (parent_rec->parent_idx != 0) { if (parent_rec->parent_idx == child_idx) return TRUE; if (mail_thread_rec_idx(ctx, parent_rec->parent_idx, &parent_rec) < 0) return TRUE; } return FALSE; } static int mark_rebuild_with_parents(struct thread_context *ctx, uint32_t idx) { const struct mail_thread_rec *rec; struct mail_thread_rec tmp_rec; while (idx != 0) { if (mail_thread_rec_idx(ctx, idx, &rec) < 0) return 1; if (!rec->expunge_rebuilds) { tmp_rec = *rec; tmp_rec.expunge_rebuilds = TRUE; if (mail_hash_update_idx(ctx->msgid_hash, idx, &tmp_rec) < 0) { ctx->failed = TRUE; return -1; } } idx = rec->parent_idx; } return 0; } static int msgid_ref_or_create(struct thread_context *ctx, const char *msgid, uint32_t *idx_r, const struct mail_thread_rec **rec_r) { struct msgid_rec key; struct mail_thread_rec tmp_rec; const void *value; int ret; key.msgid = msgid; key.msgid_crc32 = crc32_str_nonzero(msgid); ret = mail_hash_lookup(ctx->msgid_hash, &key, &value, idx_r); if (ret < 0 || ctx->failed) { ctx->failed = TRUE; return -1; } if (ret > 0) { if (rec_r != NULL) *rec_r = value; memcpy(&tmp_rec, value, sizeof(tmp_rec)); tmp_rec.refcount++; if (mail_hash_update_idx(ctx->msgid_hash, *idx_r, &tmp_rec) < 0) { ctx->failed = TRUE; return -1; } return 0; } /* not found, create */ memset(&ctx->tmp_rec, 0, sizeof(ctx->tmp_rec)); ctx->tmp_rec.msgid_crc32 = key.msgid_crc32; ctx->tmp_rec.refcount = 1; if (mail_hash_insert(ctx->msgid_hash, &key, &ctx->tmp_rec, idx_r) < 0) { ctx->failed = TRUE; return -1; } msgid = p_strdup(ctx->msgid_pool, msgid); array_idx_set(&ctx->msgid_map, *idx_r, &msgid); if (rec_r != NULL) *rec_r = &ctx->tmp_rec; return 0; } static int link_to_parent_msgid(struct thread_context *ctx, const char *parent_msgid, uint32_t child_idx, bool replace) { const struct mail_thread_rec *child_rec, *parent_rec; struct mail_thread_rec tmp_rec; uint32_t parent_idx; if (mail_thread_rec_idx(ctx, child_idx, &child_rec) < 0) return -1; /* create the msgid even if we don't use it. it's important because */ if (msgid_ref_or_create(ctx, parent_msgid, &parent_idx, &parent_rec) < 0) return -1; if (child_rec->parent_idx != 0 && !replace) { /* already got a parent, don't want to replace it. if the old parent gets expunged, we'll need a rebuild */ tmp_rec = *parent_rec; tmp_rec.expunge_rebuilds = TRUE; if (mail_hash_update_idx(ctx->msgid_hash, parent_idx, &tmp_rec) < 0) { ctx->failed = TRUE; return -1; } return 0; } /* look up again, since the pointer may have been invalidated if the parent_rec was inserted into hash */ if (mail_thread_rec_idx(ctx, child_idx, &child_rec) < 0) return -1; if (child_rec->parent_idx == parent_idx) { /* already have this parent, ignore */ return 0; } if (parent_idx == child_idx || find_parent(ctx, parent_rec, child_idx)) { if (ctx->failed) return -1; /* this would create a loop, not allowed. if any of the parents get expunged, the loop gets removed and we'll need a rebuild */ if (mark_rebuild_with_parents(ctx, parent_idx) < 0) return -1; return 0; } /* set new parent */ if (child_rec->parent_idx != 0) { if (unlink_child(ctx, child_idx, parent_idx) < 0) return -1; } else { tmp_rec = *child_rec; tmp_rec.parent_idx = parent_idx; i_assert(child_idx != tmp_rec.parent_idx); if (mail_hash_update_idx(ctx->msgid_hash, child_idx, &tmp_rec) < 0) { ctx->failed = TRUE; return -1; } } /* increase parent's refcount */ tmp_rec = *parent_rec; tmp_rec.refcount++; if (mail_hash_update_idx(ctx->msgid_hash, parent_idx, &tmp_rec) < 0) { ctx->failed = TRUE; return -1; } /* add to parent's child list */ return mail_thread_update_child_pos(ctx, child_idx, child_rec, parent_rec, FALSE); } static int link_message_ids(struct thread_context *ctx, const char *parent_msgid, const char *child_msgid) { uint32_t child_idx; if (msgid_ref_or_create(ctx, child_msgid, &child_idx, NULL) < 0) return -1; if (link_to_parent_msgid(ctx, parent_msgid, child_idx, FALSE) < 0) return -1; return 0; } static int link_references(struct thread_context *ctx, uint32_t child_idx, const char *references) { const char *parent_msgid, *child_msgid; parent_msgid = message_id_get_next(&references); if (parent_msgid == NULL) return 0; while ((child_msgid = message_id_get_next(&references)) != NULL) { if (link_message_ids(ctx, parent_msgid, child_msgid) < 0) return -1; parent_msgid = child_msgid; } /* link the last message to us */ if (link_to_parent_msgid(ctx, parent_msgid, child_idx, TRUE) < 0) return -1; return 1; } static int mail_thread_input(struct thread_context *ctx, struct mail *mail) { const char *refid, *message_id, *in_reply_to, *references; uint32_t idx; time_t sent_date; int ret; sent_date = mail_get_date(mail, NULL); if (sent_date == (time_t)-1) sent_date = 0; message_id = mail_get_first_header(mail, HDR_MESSAGE_ID); if (create_or_update_msg(ctx, message_id_get_next(&message_id), sent_date, mail->uid, &idx) < 0) return -1; /* link references */ references = mail_get_first_header(mail, HDR_REFERENCES); ret = link_references(ctx, idx, references); if (ret < 0) return -1; if (ret == 0) { in_reply_to = mail_get_first_header(mail, HDR_IN_REPLY_TO); refid = in_reply_to == NULL ? NULL : message_id_get_next(&in_reply_to); if (refid != NULL) { if (link_to_parent_msgid(ctx, refid, idx, TRUE) < 0) return -1; } else { /* no references, make sure it's not linked */ if (unlink_child(ctx, idx, 0) < 0) return -1; } } return 0; } static int mail_thread_rec_idx_moved(struct thread_context *ctx, uint32_t idx, bool *moved, const struct mail_thread_rec **rec_r) { const struct mail_thread_moved_rec *mrec; if (*moved) { mrec = array_idx(&ctx->moved_recs, idx); *rec_r = &mrec->rec; *moved = mrec->moved_children; return 0; } else { return mail_thread_rec_idx(ctx, idx, rec_r); } } static int get_first_nondummy_child(struct thread_context *ctx, const struct mail_thread_rec *rec, uint32_t *idx_r, bool children_moved) { uint32_t idx = rec->first_child_idx; bool sub_moved; int ret; while (idx != 0) { sub_moved = children_moved; if (mail_thread_rec_idx_moved(ctx, idx, &sub_moved, &rec) < 0) return -1; if (rec->rec.uid != 0) { *idx_r = idx; return 1; } if (rec->first_child_idx != 0) { ret = get_first_nondummy_child(ctx, rec, idx_r, sub_moved); if (ret != 0) return ret; } idx = rec->next_idx; } return 0; } static int have_nondummy_children(struct thread_context *ctx, const struct mail_thread_rec *rec, bool children_moved) { uint32_t idx; return get_first_nondummy_child(ctx, rec, &idx, children_moved); } static int get_next_nondummy_idx(struct thread_context *ctx, const struct mail_thread_rec *rec, bool moved, uint32_t *idx_r) { uint32_t idx = rec->next_idx; bool children_moved; int ret; while (idx != 0) { children_moved = moved; if (mail_thread_rec_idx_moved(ctx, idx, &children_moved, &rec) < 0) return -1; if (rec->rec.uid != 0) break; if (rec->first_child_idx != 0) { /* does it have non-dummy children? */ ret = have_nondummy_children(ctx, rec, children_moved); if (ret < 0) return -1; if (ret > 0) break; } idx = rec->next_idx; } *idx_r = idx; return 0; } static void add_base_subject(struct thread_context *ctx, const char *subject, struct mail_thread_root_rec *rec) { struct mail_thread_root_rec *hash_rec; char *hash_subject; void *key, *value; bool is_reply_or_forward; i_assert(rec->next == NULL); if (subject == NULL) return; subject = imap_get_base_subject_cased(pool_datastack_create(), subject, &is_reply_or_forward); if (*subject == '\0') return; if (!hash_lookup_full(ctx->subject_hash, subject, &key, &value)) { hash_subject = p_strdup(ctx->subject_pool, subject); hash_insert(ctx->subject_hash, hash_subject, rec); } else { hash_subject = key; hash_rec = value; if (!ROOT_REC_IS_DUMMY(hash_rec) && (ROOT_REC_IS_DUMMY(rec) || (hash_rec->reply && !is_reply_or_forward))) { rec->next = hash_rec; hash_rec->nonroot = TRUE; hash_update(ctx->subject_hash, hash_subject, rec); } else { while (hash_rec->next != NULL) hash_rec = hash_rec->next; hash_rec->next = rec; rec->nonroot = TRUE; } } rec->reply = is_reply_or_forward; } static int gather_base_subjects(struct thread_context *ctx) { struct mail_thread_root_rec **roots; const struct mail_thread_rec *rec; const char *subject; unsigned int i, count; uint32_t idx, uid; int ret; roots = array_get_modifiable(&ctx->roots, &count); ctx->subject_pool = pool_alloconly_create("base subjects", nearest_power(count * 20)); ctx->subject_hash = hash_create(default_pool, ctx->subject_pool, count * 2, str_hash, (hash_cmp_callback_t *)strcmp); for (i = 0; i < count; i++) { if (roots[i]->uid != 0) { idx = roots[i]->idx; uid = roots[i]->uid; } else { /* find the first non-dummy child */ if (mail_thread_rec_idx(ctx, roots[i]->idx, &rec) < 0) return -1; ret = get_first_nondummy_child(ctx, rec, &idx, FALSE); if (ret < 0) return -1; if (ret == 0) { /* a complete dummy thread, skip it */ continue; } if (mail_thread_rec_idx(ctx, idx, &rec) < 0) return -1; uid = rec->rec.uid; } if (mail_set_uid(ctx->tmp_mail, roots[i]->uid) > 0) { t_push(); subject = mail_get_first_header(ctx->tmp_mail, HDR_SUBJECT); add_base_subject(ctx, subject, roots[i]); t_pop(); } } return 0; } static int mail_thread_root_rec_time_cmp(struct mail_thread_root_rec *r1, struct mail_thread_root_rec *r2) { /* use the sent date as long as both of the dates are valid */ if (r1->sent_date != r2->sent_date && r1->sent_date != 0 && r2->sent_date != 0) return r1->sent_date < r2->sent_date ? -1 : 1; /* otherwise fallback to comparing UIDs */ return r1->uid < r2->uid ? -1 : 1; } static int mrec_add_sorted(struct thread_context *ctx, unsigned int parent_mrec_idx, struct mail_thread_moved_rec *child_mrec, uint32_t child_mrec_idx) { struct mail_thread_moved_rec *mrec, *parent_mrec; const struct mail_thread_rec *rec, *cmp_rec, *rec_nondummy; uint32_t idx, prev_idx = 0; bool children_moved; if (mail_thread_rec_get_nondummy(ctx, &child_mrec->rec, &cmp_rec) < 0) return -1; parent_mrec = array_idx_modifiable(&ctx->moved_recs, parent_mrec_idx); for (idx = parent_mrec->rec.first_child_idx; idx != 0; ) { children_moved = TRUE; if (mail_thread_rec_idx_moved(ctx, idx, &children_moved, &rec) < 0) return -1; if (mail_thread_rec_get_nondummy(ctx, rec, &rec_nondummy) < 0) return -1; if (mail_thread_rec_time_cmp(ctx, rec_nondummy, cmp_rec) > 0) break; prev_idx = idx; idx = rec->next_idx; } if (prev_idx == 0) { /* insert as first */ child_mrec->rec.next_idx = parent_mrec->rec.first_child_idx; parent_mrec->rec.first_child_idx = child_mrec_idx; } else { child_mrec->rec.next_idx = idx; mrec = array_idx_modifiable(&ctx->moved_recs, prev_idx); mrec->rec.next_idx = child_mrec_idx; } return 0; } static int mrec_add_root(struct thread_context *ctx, unsigned int parent_mrec_idx, const struct mail_thread_root_rec *parent_rrec) { const struct mail_thread_rec *rec; struct mail_thread_moved_rec *mrec; uint32_t mrec_idx; bool children_moved = parent_rrec->moved; if (mail_thread_rec_idx_moved(ctx, parent_rrec->idx, &children_moved, &rec) < 0) return -1; mrec_idx = array_count(&ctx->moved_recs); mrec = array_append_space(&ctx->moved_recs); mrec->rec = *rec; mrec->moved_children = children_moved; return mrec_add_sorted(ctx, parent_mrec_idx, mrec, mrec_idx); } static int mrec_add_children(struct thread_context *ctx, unsigned int parent_mrec_idx, const struct mail_thread_rec *parent_rec) { const struct mail_thread_rec *rec; struct mail_thread_moved_rec *mrec; uint32_t mrec_idx, idx; for (idx = parent_rec->first_child_idx; idx != 0; idx = rec->next_idx) { if (mail_thread_rec_idx(ctx, idx, &rec) < 0) return -1; mrec_idx = array_count(&ctx->moved_recs); mrec = array_append_space(&ctx->moved_recs); mrec->rec = *rec; if (mrec_add_sorted(ctx, parent_mrec_idx, mrec, mrec_idx) < 0) return -1; } return 0; } static int mail_thread_root_thread_merge(struct thread_context *ctx, struct mail_thread_root_rec *rrec) { struct mail_thread_root_rec *next; const struct mail_thread_rec *rec; struct mail_thread_moved_rec *mrec; uint32_t mrec_idx; if (mail_thread_rec_idx(ctx, rrec->idx, &rec) < 0) return -1; if (!ROOT_REC_IS_DUMMY(rrec)) { /* the record has the correct date already. the following messages that have the reply-flag set will be children of this record. */ next = rrec->next; if (next->reply) { /* move the parent */ mrec_idx = array_count(&ctx->moved_recs); mrec = array_append_space(&ctx->moved_recs); mrec->rec = *rec; mrec->rec.first_child_idx = 0; mrec->moved_children = TRUE; rrec->moved = TRUE; rrec->idx = mrec_idx; if (mrec_add_children(ctx, mrec_idx, rec) < 0) return -1; while (next != NULL && next->reply) { mrec_add_root(ctx, mrec_idx, next); next = next->next; } } /* if there are more messages, they'll be siblings to this record, and a dummy root will be added. */ if (next != NULL) { /* create dummy */ mrec_idx = array_count(&ctx->moved_recs); mrec = array_append_space(&ctx->moved_recs); mrec_add_root(ctx, mrec_idx, rrec); mrec->moved_children = TRUE; rrec->moved = TRUE; rrec->idx = mrec_idx; while (next != NULL) { mrec_add_root(ctx, mrec_idx, next); next = next->next; } } } else { /* add the rest of the records as children to this dummy */ mrec_idx = array_count(&ctx->moved_recs); mrec = array_append_space(&ctx->moved_recs); mrec->rec = *rec; mrec->rec.first_child_idx = 0; mrec->moved_children = TRUE; rrec->moved = TRUE; rrec->idx = mrec_idx; for (next = rrec->next; next != NULL; next = next->next) { if (!ROOT_REC_IS_DUMMY(next)) mrec_add_root(ctx, mrec_idx, next); else { if (mail_thread_rec_idx(ctx, next->idx, &rec) < 0) return -1; if (mrec_add_children(ctx, mrec_idx, rec) < 0) return -1; } } } return 0; } static int merge_subject_threads(struct thread_context *ctx) { struct mail_thread_root_rec **roots; unsigned int i, count; i_array_init(&ctx->moved_recs, 128); (void)array_append_space(&ctx->moved_recs); roots = array_get_modifiable(&ctx->roots, &count); for (i = 0; i < count; i++) { if (roots[i]->next != NULL && !roots[i]->nonroot) { if (mail_thread_root_thread_merge(ctx, roots[i]) < 0) return -1; } } return 0; } static int mail_thread_root_rec_sort_cmp(const void *p1, const void *p2) { struct mail_thread_root_rec *const *rp1 = p1, *const *rp2 = p2; struct mail_thread_root_rec *r1 = *rp1, *r2 = *rp2; /* move the nonroots to the end of the array. we just want to get rid of them. */ if (r1->nonroot) return r2->nonroot ? 0 : 1; if (r2->nonroot) return -1; return mail_thread_root_rec_time_cmp(r1, r2); } static void sort_root_nodes(struct thread_context *ctx) { struct mail_thread_root_rec **roots; unsigned int count; /* the root nodes contain the first non-dummy node's uid/sent_date, so we can compare them directly. the first non-dummy node is also the oldest one as the children lists are always kept sorted */ roots = array_get_modifiable(&ctx->roots, &count); qsort(roots, count, sizeof(*roots), mail_thread_root_rec_sort_cmp); } static void update_root_dates(struct thread_context *ctx) { struct mail_thread_root_rec **roots; unsigned int i, count; roots = array_get_modifiable(&ctx->roots, &count); for (i = 0; i < count; i++) roots[i]->sent_date = ctx->alt_dates[roots[i]->idx]; } static int str_add_id(struct thread_context *ctx, string_t *str, uint32_t uid) { if (!ctx->id_is_uid) { if (mailbox_get_uids(ctx->box, uid, uid, &uid, &uid) < 0) return -1; i_assert(uid != 0); } str_printfa(str, "%u", uid); return 0; } #define STR_NEED_FLUSH(str, extra) \ (str_len(str) + (extra) + MAX_INT_STRLEN*2 + 3 >= OUTPUT_BUF_SIZE) static int send_nodes(struct thread_context *ctx, string_t *str, uint32_t idx, bool moved) { const struct mail_thread_rec *rec; uint32_t next_idx; bool children_moved; int ret; /* FIXME: there could be some more sanity checks, for example verify that nodes are returned sorted */ children_moved = moved; if (mail_thread_rec_idx_moved(ctx, idx, &children_moved, &rec) < 0) return -1; if (get_next_nondummy_idx(ctx, rec, moved, &next_idx) < 0) return -1; if (next_idx == 0) { /* no siblings - special case to avoid extra paranthesis */ if (rec->rec.uid == 0) { /* dummy node, just skip this */ if (rec->first_child_idx != 0) { send_nodes(ctx, str, rec->first_child_idx, children_moved); } return 0; } if ((ret = have_nondummy_children(ctx, rec, children_moved)) < 0) return -1; if (str_add_id(ctx, str, rec->rec.uid) < 0) return -1; if (ret != 0) { str_append_c(str, ' '); send_nodes(ctx, str, rec->first_child_idx, children_moved); } return 0; } for (;;) { if (STR_NEED_FLUSH(str, 0)) { /* string getting full, flush it */ if (o_stream_send(ctx->output, str_data(str), str_len(str)) < 0) return -1; str_truncate(str, 0); } if ((ret = have_nondummy_children(ctx, rec, children_moved)) < 0) return -1; if (ret == 0) { /* only child */ if (rec->rec.uid != 0) { str_append_c(str, '('); if (str_add_id(ctx, str, rec->rec.uid) < 0) return -1; str_append_c(str, ')'); } } else if (rec->rec.uid == 0) { /* dummy with children */ str_append_c(str, '('); send_nodes(ctx, str, rec->first_child_idx, children_moved); str_append_c(str, ')'); } else { /* node with children */ str_append_c(str, '('); if (str_add_id(ctx, str, rec->rec.uid) < 0) return -1; str_append_c(str, ' '); send_nodes(ctx, str, rec->first_child_idx, children_moved); str_append_c(str, ')'); } if (get_next_nondummy_idx(ctx, rec, moved, &idx) < 0) return -1; if (idx == 0) break; children_moved = moved; if (mail_thread_rec_idx_moved(ctx, idx, &children_moved, &rec) < 0) return -1; } return 1; } static int send_root(struct thread_context *ctx, string_t *str, struct mail_thread_root_rec *root) { const struct mail_thread_rec *rec; const struct mail_thread_moved_rec *mrec; uoff_t orig_offset; bool moved = FALSE; if (STR_NEED_FLUSH(str, 1)) { /* string getting full, flush it */ if (o_stream_send(ctx->output, str_data(str), str_len(str)) < 0) return -1; str_truncate(str, 0); } if (root->moved) { mrec = array_idx(&ctx->moved_recs, root->idx); rec = &mrec->rec; moved = mrec->moved_children; } else { if (mail_thread_rec_idx(ctx, root->idx, &rec) < 0) return -1; } str_append_c(str, '('); orig_offset = ctx->output->offset + str_len(str); if (rec->rec.uid != 0) { if (str_add_id(ctx, str, rec->rec.uid) < 0) return -1; } if (rec->first_child_idx != 0) { if (rec->rec.uid != 0) str_append_c(str, ' '); if (send_nodes(ctx, str, rec->first_child_idx, moved) < 0) return -1; } if (ctx->output->offset + str_len(str) != orig_offset) str_append_c(str, ')'); else { /* just a bunch of dummy nodes */ str_truncate(str, str_len(str)-1); } return 0; } static int send_roots(struct thread_context *ctx) { struct mail_thread_root_rec *const *roots; unsigned int i, count; string_t *str; str = t_str_new(OUTPUT_BUF_SIZE); str_append(str, "* THREAD "); /* sort root nodes again, they have been modified since the last time */ sort_root_nodes(ctx); roots = array_get(&ctx->roots, &count); for (i = 0; i < count; i++) { if (roots[i]->nonroot) { /* nonroots are last in the list */ break; } if (send_root(ctx, str, roots[i]) < 0) return -1; } str_append(str, "\r\n"); (void)o_stream_send(ctx->output, str_data(str), str_len(str)); return 0; } static int update_altdates(struct thread_context *ctx, uint32_t idx, const struct mail_thread_rec *rec) { const struct mail_hash_header *hdr; uint32_t timestamp = rec->sent_date; hdr = mail_hash_get_header(ctx->msgid_hash); for (;;) { if (ctx->alt_dates[idx] < timestamp) { /* @UNSAFE */ ctx->alt_dates[idx] = timestamp; } idx = rec->parent_idx; if (idx == 0) break; if (idx > hdr->record_count) { mail_hash_set_corrupted(ctx->msgid_hash, "parent_idx too large"); return -1; } if (mail_thread_rec_idx(ctx, idx, &rec) < 0) return -1; } return 0; } static int mail_thread_finish(struct thread_context *ctx) { const struct mail_hash_header *hdr; const struct mail_thread_rec *rec; struct mail_thread_root_rec *root_rec; uint32_t idx; if (ctx->failed) return -1; hdr = mail_hash_get_header(ctx->msgid_hash); if (hdr->record_count == 0) return 0; /* (2) save root nodes */ i_array_init(&ctx->roots, I_MIN(128, hdr->record_count)); if (ctx->thread_type == MAIL_THREAD_REFERENCES2) ctx->alt_dates = i_new(uint32_t, hdr->record_count + 1); for (idx = 1; idx <= hdr->record_count; idx++) { if (mail_thread_rec_idx(ctx, idx, &rec) < 0) return -1; if (MAIL_HASH_RECORD_IS_DELETED(&rec->rec)) continue; if (rec->parent_idx == 0) { /* use the first non-dummy message's uid/sent_date so that the roots can be directly sorted */ if (mail_thread_rec_get_nondummy(ctx, rec, &rec) < 0) return -1; root_rec = p_new(ctx->msgid_pool, struct mail_thread_root_rec, 1); root_rec->idx = idx; root_rec->uid = rec->rec.uid; root_rec->sent_date = rec->sent_date; array_append(&ctx->roots, &root_rec, 1); } if (ctx->thread_type == MAIL_THREAD_REFERENCES2) update_altdates(ctx, idx, rec); } if (ctx->thread_type == MAIL_THREAD_REFERENCES2) { update_root_dates(ctx); i_free_and_null(ctx->alt_dates); } else { /* (4) */ sort_root_nodes(ctx); /* (5) Gather together messages under the root that have the same base subject text. */ if (gather_base_subjects(ctx) < 0) return -1; /* (5.C) Merge threads with the same thread subject. */ if (merge_subject_threads(ctx) < 0) return -1; } /* (6) Sort again and send replies */ t_push(); send_roots(ctx); t_pop(); return 0; } static int mail_thread_rec_from_seq(struct thread_context *ctx, uint32_t seq, struct msgid_rec *key_r, uint32_t *idx_r, const struct mail_thread_rec **rec_r) { const void *value; const char *message_id; if (mail_set_seq(ctx->tmp_mail, seq) < 0) return -1; message_id = mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID); if (message_id == NULL) return 0; key_r->msgid = message_id_get_next(&message_id); if (key_r->msgid == NULL) return 0; key_r->msgid_crc32 = crc32_str_nonzero(key_r->msgid); if (mail_hash_lookup(ctx->msgid_hash, key_r, &value, idx_r) <= 0) return -1; *rec_r = value; if ((*rec_r)->rec.uid != ctx->tmp_mail->uid) { /* duplicate Message-ID probably */ return -1; } return 1; } static int mail_thread_unref_references(struct thread_context *ctx) { const char *references, *msgid; struct msgid_rec key; const void *value; uint32_t idx; int ret; references = mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES); if (references == NULL) return 0; msgid = message_id_get_next(&references); if (msgid == NULL) return 0; t_push(); /* tmp_mail may be changed below, so we have to save the references string */ references = t_strdup(references); do { key.msgid = msgid; key.msgid_crc32 = crc32_str_nonzero(msgid); ctx->cmp_match_count = 0; ctx->cmp_last_idx = 0; ret = mail_hash_lookup(ctx->msgid_hash, &key, &value, &idx); if (ret < 0 || (ret == 0 && ctx->cmp_match_count != 1)) { ctx->failed = TRUE; break; } if (ret == 0) { /* there's only one key with this crc32 value, so it must be what we're looking for */ idx = ctx->cmp_last_idx; ctx->failed = FALSE; } if (mail_thread_rec_unref_idx(ctx, idx, msgid, FALSE) < 0) break; msgid = message_id_get_next(&references); } while (msgid != NULL); t_pop(); return msgid == NULL ? 1 : -1; } static int mail_thread_rec_turn_to_dummy(struct thread_context *ctx, const struct mail_thread_rec *rec) { struct mail_thread_rec tmp_rec; uint32_t idx; idx = mail_hash_value_idx(ctx->msgid_hash, rec); tmp_rec = *rec; tmp_rec.refcount--; tmp_rec.sent_date = 0; tmp_rec.rec.uid = 0; if (mail_hash_update_idx(ctx->msgid_hash, idx, &tmp_rec) < 0) { ctx->failed = TRUE; return -1; } if (rec->parent_idx != 0) { /* since our sent_date got removed, we may need to be moved */ if (mail_thread_update_child_pos(ctx, idx, rec, NULL, TRUE) < 0) return -1; } return 1; } static int imap_thread_expunge_handler(struct mail_index_sync_map_ctx *sync_ctx, uint32_t seq, const void *data __attr_unused__, void **sync_context __attr_unused__, void *context) { struct imap_thread_mailbox *tbox = context; struct thread_context *ctx = tbox->ctx; struct msgid_rec key; const struct mail_thread_rec *rec; uint32_t idx; int ret; if (ctx == NULL) { /* init */ struct mail_index_transaction *t; struct mailbox_transaction_context *mt; tbox->ctx = ctx = i_new(struct thread_context, 1); if (mail_hash_lock(tbox->msgid_hash) <= 0) return 0; t = mail_index_transaction_begin(sync_ctx->view, 0); mt = MAIL_STORAGE_CONTEXT(t); ctx->msgid_hash = tbox->msgid_hash; ctx->msgid_pool = pool_alloconly_create("msgids", 20 * APPROX_MSGID_SIZE); i_array_init(&ctx->msgid_map, 20); ctx->tmp_mail = mail_alloc(mt, 0, NULL); } else if (data == NULL) { /* deinit */ if (ctx->msgid_hash != NULL) { mail_hash_unlock(tbox->msgid_hash); mail_free(&ctx->tmp_mail); array_free(&ctx->msgid_map); pool_unref(ctx->msgid_pool); } i_free_and_null(tbox->ctx); return 0; } else { if (ctx->msgid_hash == NULL) { /* locking had failed */ return 0; } if (ctx->failed) return 0; } if (mail_thread_rec_from_seq(ctx, seq, &key, &idx, &rec) <= 0) return 0; ret = mail_thread_unref_references(ctx); if (ret < 0) return 0; if (ret == 0 && rec->parent_idx != 0) { /* We have a parent but no References header, so it means there's In-Reply-To. Don't bother verifying this, just unreference the parent. */ if (mail_thread_rec_unref_idx(ctx, rec->parent_idx, NULL, TRUE) < 0) return 0; } /* now unreference the expunged message itself */ if (rec->refcount == 1) { if (mail_thread_rec_unref_idx(ctx, idx, NULL, FALSE) < 0) return 0; } else { if (mail_thread_rec_turn_to_dummy(ctx, rec) <= 0) return 0; } return 0; } static int imap_thread_mailbox_close(struct mailbox *box) { struct imap_thread_mailbox *tbox = IMAP_THREAD_CONTEXT(box); int ret; if (tbox->msgid_hash != NULL) mail_hash_free(&tbox->msgid_hash); ret = tbox->module_ctx.super.close(box); i_free(tbox); return ret; } static void imap_thread_hash_init(struct mailbox *box, bool create) { struct index_mailbox *ibox = (struct index_mailbox *)box; struct imap_thread_mailbox *tbox = IMAP_THREAD_CONTEXT(box); uint32_t ext_id; i_assert(tbox->msgid_hash == NULL); tbox->msgid_hash = mail_hash_open(ibox->index, ".thread", create ? MAIL_HASH_OPEN_FLAG_CREATE : 0, sizeof(struct mail_thread_rec), 0, mail_thread_hash_key, mail_thread_hash_rec, mail_thread_hash_cmp, tbox); if (tbox->msgid_hash == NULL) return; ext_id = mail_index_ext_register(ibox->index, "thread", 0, 0, 0); mail_index_register_expunge_handler(ibox->index, ext_id, TRUE, imap_thread_expunge_handler, tbox); } static struct mailbox_sync_context * imap_thread_sync_init(struct mailbox *box, enum mailbox_sync_flags flags) { struct imap_thread_mailbox *tbox = IMAP_THREAD_CONTEXT(box); struct mailbox_sync_context *ctx; ctx = tbox->module_ctx.super.sync_init(box, flags); if (box->opened) { imap_thread_hash_init(box, FALSE); /* we don't want to get back here */ box->v.sync_init = tbox->module_ctx.super.sync_init; } return ctx; } static void imap_thread_mailbox_opened(struct mailbox *box) { struct imap_thread_mailbox *tbox; if (next_hook_mailbox_opened != NULL) next_hook_mailbox_opened(box); tbox = i_new(struct imap_thread_mailbox, 1); tbox->module_ctx.super = box->v; box->v.close = imap_thread_mailbox_close; MODULE_CONTEXT_SET(box, imap_thread_storage_module, tbox); if (box->opened) imap_thread_hash_init(box, FALSE); else { /* delayed opening used. we want to try to open the hash anyway, because if syncing expunges anything and we didn't notice it, we would have to rebuild the hash */ box->v.sync_init = imap_thread_sync_init; } } void imap_thread_init(void) { next_hook_mailbox_opened = hook_mailbox_opened; hook_mailbox_opened = imap_thread_mailbox_opened; } void imap_thread_deinit(void) { i_assert(hook_mailbox_opened == imap_thread_mailbox_opened); hook_mailbox_opened = next_hook_mailbox_opened; }