changeset 7795:56114dc2ccfc HEAD

mail_hash_update() updated wrong index.
author Timo Sirainen <tss@iki.fi>
date Sat, 17 May 2008 17:15:05 +0300
parents b2fefe24b849
children 37f23e0476e2
files src/lib-storage/index/index-thread-links.c
diffstat 1 files changed, 417 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-storage/index/index-thread-links.c	Sat May 17 17:15:05 2008 +0300
@@ -0,0 +1,417 @@
+/* Copyright (c) 2002-2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "message-id.h"
+#include "mail-storage.h"
+#include "index-thread-private.h"
+
+static struct mail_thread_node *
+thread_msgid_get(struct mail_thread_update_context *ctx, uint32_t ref_uid,
+		 const char *msgid, uint32_t *idx_r)
+{
+	struct mail_thread_node *node, new_node;
+	struct msgid_search_key key;
+	const char **msgidp;
+	uint32_t idx;
+
+	key.msgid = msgid;
+	key.msgid_crc32 = crc32_str_nonzero(msgid);
+
+	node = mail_hash_lookup(ctx->hash_trans, &key, &idx);
+	if (node == NULL) {
+		/* not found, create */
+		memset(&new_node, 0, sizeof(new_node));
+		new_node.msgid_crc32 = key.msgid_crc32;
+		new_node.uid = ref_uid;
+
+		mail_hash_insert(ctx->hash_trans, &key, &new_node, &idx);
+		node = mail_hash_lookup_idx(ctx->hash_trans, idx);
+	} else if (node->uid == 0 && ref_uid != 0) {
+		/* make non-existing node uniquely identifiable */
+		if (node->exists) {
+			mail_hash_transaction_set_corrupted(ctx->hash_trans,
+							    "uid=0 found");
+			ctx->failed = TRUE;
+		} else {
+			node->uid = ref_uid;
+			mail_hash_update(ctx->hash_trans, idx);
+		}
+	}
+
+	/* keep message-ids cached */
+	msgidp = array_idx_modifiable(&ctx->msgid_cache, idx);
+	if (*msgidp == NULL)
+		*msgidp = p_strdup(ctx->msgid_pool, msgid);
+
+	*idx_r = idx;
+	return node;
+}
+
+static void thread_msg_add(struct mail_thread_update_context *ctx, uint32_t uid,
+			   const char *msgid, uint32_t *idx_r)
+{
+	struct mail_thread_node *node;
+	struct mail_thread_node unode;
+
+	if (msgid != NULL) {
+		node = thread_msgid_get(ctx, 0, msgid, idx_r);
+		if (!node->exists) {
+			/* add UID to node */
+			node->uid = uid;
+			node->exists = TRUE;
+		} else {
+			/* duplicate, keep the original. if the original ever
+			   gets expunged, rebuild. */
+			node->expunge_rebuilds = TRUE;
+			msgid = NULL;
+		}
+		mail_hash_update(ctx->hash_trans, *idx_r);
+	}
+
+	if (msgid == NULL) {
+		/* no valid message-id */
+		memset(&unode, 0, sizeof(unode));
+		unode.uid = uid;
+		unode.exists = TRUE;
+		mail_hash_insert(ctx->hash_trans, NULL, &unode, idx_r);
+	}
+}
+
+static bool thread_node_has_ancestor(struct mail_thread_update_context *ctx,
+				     const struct mail_thread_node *node,
+				     const struct mail_thread_node *ancestor)
+{
+	while (node != ancestor) {
+		if (node->parent_idx == 0)
+			return FALSE;
+
+		node = mail_hash_lookup_idx(ctx->hash_trans, node->parent_idx);
+	}
+	return TRUE;
+}
+
+static void thread_link_reference(struct mail_thread_update_context *ctx,
+				  uint32_t parent_idx, uint32_t child_idx)
+{
+	struct mail_thread_node *node, *parent, *child;
+	uint32_t idx;
+
+	parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx);
+	child = mail_hash_lookup_idx(ctx->hash_trans, child_idx);
+
+	child->link_refcount++;
+	mail_hash_update(ctx->hash_trans, child_idx);
+
+	if (thread_node_has_ancestor(ctx, parent, child)) {
+		if (parent == child) {
+			/* loops to itself - ignore */
+			return;
+		}
+
+		/* child is an ancestor of parent. Adding child -> parent_node
+		   would introduce a loop. If any messages referencing the path
+		   between parent_node's parent and child_node get expunged, we
+		   have to rebuild the tree because the loop might break.
+		   For example:
+
+		     #1: a -> b       (a.ref=1, b.ref=1)
+		     #2: b -> a       (a.ref=2, b.ref=2)
+		     #3: c -> a -> b  (a.ref=3, b.ref=3, c.ref=1)
+
+		   Expunging #3 wouldn't break the loop, but expunging #1
+		   would. */
+		node = parent;
+		do {
+			idx = node->parent_idx;
+			if (idx == 0) {
+				/* earlier lookup_idx() failed */
+				ctx->failed = TRUE;
+				break;
+			}
+			node = mail_hash_lookup_idx(ctx->hash_trans, idx);
+			node->parent_unref_rebuilds = TRUE;
+			mail_hash_update(ctx->hash_trans, idx);
+		} while (node != child);
+		return;
+	} else if (child->parent_idx == parent_idx) {
+		/* The same link already exists */
+		return;
+	}
+
+	/* Set parent_node as child_node's parent */
+	if (child->parent_idx == 0)
+		child->parent_idx = parent_idx;
+	else {
+		/* Conflicting parent already exists, keep the original */
+		if (child->exists) {
+			/* If this message gets expunged,
+			   the parent is changed. */
+			child->expunge_rebuilds = TRUE;
+		} else {
+			/* Message doesn't exist, so it was one of the node's
+			   children that created the original reference. If
+			   that reference gets dropped, the parent is changed.
+			   We could catch this in one of several ways:
+
+			    a) Link to parent node gets unreferenced
+			    b) Link to this node gets unreferenced
+			    c) Any of the child nodes gets expunged
+
+			   b) is probably the least likely to happen,
+			   so use it */
+			child->parent_unref_rebuilds = TRUE;
+		}
+	}
+	mail_hash_update(ctx->hash_trans, child_idx);
+}
+
+static void
+thread_link_references(struct mail_thread_update_context *ctx, uint32_t ref_uid,
+		       const char **references)
+{
+	const char *msgid, *last_msgid;
+	uint32_t parent_idx, child_idx;
+
+	last_msgid = message_id_get_next(references);
+	if (last_msgid == NULL)
+		return;
+	(void)thread_msgid_get(ctx, ref_uid, last_msgid, &parent_idx);
+
+	while ((msgid = message_id_get_next(references)) != NULL) {
+		(void)thread_msgid_get(ctx, ref_uid, msgid, &child_idx);
+		thread_link_reference(ctx, parent_idx, child_idx);
+		parent_idx = child_idx;
+		last_msgid = msgid;
+	}
+
+	/* link the last ID to us */
+	*references = last_msgid;
+}
+
+static int thread_get_mail_header(struct mail *mail, const char *name,
+				  const char **value_r)
+{
+	if (mail_get_first_header(mail, name, value_r) < 0) {
+		if (!mail->expunged)
+			return -1;
+
+		/* Message is expunged. Instead of failing the entire THREAD
+		   command, just treat the header as non-existing. */
+		*value_r = NULL;
+	}
+	return 0;
+}
+
+static bool references_are_crc32_unique(const char *references)
+{
+	const char *msgid;
+	uint32_t msgid_crc32;
+	ARRAY_TYPE(uint32_t) crc_arr;
+	const uint32_t *crc;
+	unsigned int i, count;
+
+	t_array_init(&crc_arr, 32);
+	while ((msgid = message_id_get_next(&references)) != NULL) {
+		msgid_crc32 = crc32_str_nonzero(msgid);
+		crc = array_get(&crc_arr, &count);
+		for (i = 0; i < count; i++) {
+			if (crc[i] == msgid_crc32)
+				return FALSE;
+		}
+		array_append(&crc_arr, &msgid_crc32, 1);
+	}
+	return TRUE;
+}
+
+int mail_thread_add(struct mail_thread_update_context *ctx, struct mail *mail)
+{
+	const char *message_id, *in_reply_to, *references, *parent_msgid;
+	const struct mail_thread_node *parent, *old_parent;
+	struct mail_hash_header *hdr;
+	struct mail_thread_node *node;
+	uint32_t idx, parent_idx, ref_uid;
+
+	hdr = mail_hash_get_header(ctx->hash_trans);
+	i_assert(mail->uid > hdr->last_uid);
+	hdr->last_uid = mail->uid;
+	hdr->message_count++;
+
+	if (thread_get_mail_header(mail, HDR_MESSAGE_ID, &message_id) < 0 ||
+	    thread_get_mail_header(mail, HDR_REFERENCES, &references) < 0)
+		return -1;
+
+	ref_uid = references_are_crc32_unique(references) ? mail->uid : 0;
+	thread_msg_add(ctx, mail->uid, message_id_get_next(&message_id), &idx);
+	thread_link_references(ctx, ref_uid, &references);
+
+	if (references != NULL)
+		parent_msgid = references;
+	else {
+		/* no valid IDs in References:, use In-Reply-To: instead */
+		if (thread_get_mail_header(mail, HDR_IN_REPLY_TO,
+					   &in_reply_to) < 0)
+			return -1;
+		parent_msgid = message_id_get_next(&in_reply_to);
+	}
+	parent = parent_msgid == NULL ? NULL :
+		thread_msgid_get(ctx, ref_uid, parent_msgid, &parent_idx);
+
+	node = mail_hash_lookup_idx(ctx->hash_trans, idx);
+	old_parent = node->parent_idx == 0 ? NULL :
+		mail_hash_lookup_idx(ctx->hash_trans, node->parent_idx);
+
+	if (old_parent != NULL &&
+	    (parent == NULL || old_parent->parent_idx != parent_idx)) {
+		/* conflicting parent, remove it. */
+		node->parent_idx = 0;
+		/* If this message gets expunged, we have to revert back to
+		   the original parent. */
+		node->expunge_rebuilds = TRUE;
+		mail_hash_update(ctx->hash_trans, idx);
+	}
+	if (parent != NULL)
+		thread_link_reference(ctx, parent_idx, idx);
+	return 0;
+}
+
+static bool
+mail_thread_node_lookup(struct mail_thread_update_context *ctx, uint32_t uid,
+			uint32_t *idx_r, struct mail_thread_node **node_r)
+{
+	struct msgid_search_key key;
+	const char *msgids;
+	int ret;
+
+	if (!mail_set_uid(ctx->tmp_mail, uid))
+		return FALSE;
+
+	ret = mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID, &msgids);
+	if (ret <= 0)
+		return FALSE;
+
+	key.msgid = message_id_get_next(&msgids);
+	if (key.msgid == NULL)
+		return FALSE;
+	key.msgid_crc32 = crc32_str_nonzero(key.msgid);
+
+	*node_r = mail_hash_lookup(ctx->hash_trans, &key, idx_r);
+	if (*node_r == NULL)
+		return FALSE;
+
+	if ((*node_r)->uid != ctx->tmp_mail->uid) {
+		/* duplicate Message-ID probably */
+		return FALSE;
+	}
+	return TRUE;
+}
+
+static bool
+thread_unref_msgid(struct mail_thread_update_context *ctx, uint32_t child_idx,
+		   const char *msgid, uint32_t *parent_idx_r)
+{
+	struct msgid_search_key key;
+	struct mail_thread_node *parent, *child;
+	uint32_t parent_idx;
+
+	key.msgid = msgid;
+	key.msgid_crc32 = crc32_str_nonzero(msgid);
+
+	ctx->cmp_match_count = 0;
+	ctx->cmp_last_idx = 0;
+
+	parent = mail_hash_lookup(ctx->hash_trans, &key, &parent_idx);
+	if (parent == NULL) {
+		if (ctx->cmp_match_count != 1 || ctx->failed) {
+			/* couldn't find the message-id */
+			return FALSE;
+		}
+
+		/* there's only one key with this crc32 value, so it
+		   must be what we're looking for */
+		parent_idx = ctx->cmp_last_idx;
+		parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx);
+	}
+	if (parent->parent_unref_rebuilds)
+		return FALSE;
+
+	child = mail_hash_lookup_idx(ctx->hash_trans, child_idx);
+	if (child->link_refcount == 0) {
+		mail_hash_transaction_set_corrupted(ctx->hash_trans,
+						    "unexpected refcount=0");
+		return FALSE;
+	}
+	child->link_refcount--;
+	if (child->link_refcount == 0) {
+		/* we don't have a root anymore */
+		child->parent_idx = 0;
+	}
+	mail_hash_update(ctx->hash_trans, child_idx);
+	*parent_idx_r = parent_idx;
+	return TRUE;
+}
+
+static bool
+thread_unref_links(struct mail_thread_update_context *ctx, uint32_t child_idx,
+		   const char *references, bool *valid_r)
+{
+	uint32_t parent_idx;
+	const char *msgid;
+
+	/* tmp_mail may be changed below, so we have to duplicate the
+	   references string */
+	references = t_strdup(references);
+	*valid_r = FALSE;
+
+	while ((msgid = message_id_get_next(&references)) != NULL) {
+		*valid_r = TRUE;
+		if (!thread_unref_msgid(ctx, child_idx, msgid, &parent_idx))
+			return FALSE;
+		child_idx = parent_idx;
+	}
+	return TRUE;
+}
+
+int mail_thread_remove(struct mail_thread_update_context *ctx, uint32_t uid)
+{
+	struct mail_hash_header *hdr;
+	struct mail_thread_node *node;
+	const char *references, *in_reply_to;
+	uint32_t idx, parent_idx;
+	bool have_refs;
+
+	if (!mail_thread_node_lookup(ctx, uid, &idx, &node))
+		return 0;
+	if (node->expunge_rebuilds)
+		return 0;
+
+	if (mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES,
+				  &references) < 0)
+		return -1;
+
+	if (!thread_unref_links(ctx, idx, references, &have_refs))
+		return 0;
+	if (!have_refs) {
+		/* no valid IDs in References:, use In-Reply-To: instead */
+		if (mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO,
+					  &in_reply_to) < 0)
+			return -1;
+		in_reply_to = message_id_get_next(&in_reply_to);
+		if (in_reply_to != NULL) {
+			if (!thread_unref_msgid(ctx, idx, in_reply_to,
+						&parent_idx))
+				return 0;
+		}
+	}
+
+	/* get the node again, the pointer may have changed */
+	node = mail_hash_lookup_idx(ctx->hash_trans, idx);
+
+	node->uid = 0;
+	node->exists = FALSE;
+	mail_hash_update(ctx->hash_trans, idx);
+
+	hdr = mail_hash_get_header(ctx->hash_trans);
+	hdr->message_count--;
+	return 1;
+}