# HG changeset patch
# User Timo Sirainen <tss@iki.fi>
# Date 1042058991 -7200
# Node ID 4f697dde0fca4294d7e99088ac7d5f2e73d6dce2
# Parent  b71883ef0247a31f114666c10f60fa0b586bdd00
THREAD=REFERENCES implementation. Doesn't crash, but I'm not sure how
correct replies it produces :)

diff -r b71883ef0247 -r 4f697dde0fca AUTHORS
--- a/AUTHORS	Wed Jan 08 22:48:04 2003 +0200
+++ b/AUTHORS	Wed Jan 08 22:49:51 2003 +0200
@@ -21,3 +21,5 @@
 Manish Singh       <yosh@gimp.org>
 Owen Taylor        <otaylor@gtk.org>
 Sebastian Wilhelmi <wilhelmi@ira.uka.de>
+
+Simon Tatham (src/lib-storage/mail-thread.c merge sorting)
diff -r b71883ef0247 -r 4f697dde0fca COPYING
--- a/COPYING	Wed Jan 08 22:48:04 2003 +0200
+++ b/COPYING	Wed Jan 08 22:49:51 2003 +0200
@@ -7,3 +7,5 @@
   - md5.c : Public Domain
   - base64.c, mkgmtime.c : BSD-like (read it)
   - hash.c, primes.c, printf-upper-bound.c, tree.c : LGPL v2
+
+src/lib-storage/mail-thread.c : merge sorting code is with MIT license.
diff -r b71883ef0247 -r 4f697dde0fca src/imap/Makefile.am
--- a/src/imap/Makefile.am	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/imap/Makefile.am	Wed Jan 08 22:49:51 2003 +0200
@@ -47,6 +47,7 @@
 	cmd-status.c \
 	cmd-store.c \
 	cmd-subscribe.c \
+	cmd-thread.c \
 	cmd-uid.c \
 	cmd-unsubscribe.c
 
diff -r b71883ef0247 -r 4f697dde0fca src/imap/cmd-search.c
--- a/src/imap/cmd-search.c	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/imap/cmd-search.c	Wed Jan 08 22:49:51 2003 +0200
@@ -42,7 +42,7 @@
 		charset = NULL;
 	}
 
-	pool = pool_alloconly_create("MailSearchArgs", 2048);
+	pool = pool_alloconly_create("mail_search_args", 2048);
 
 	sargs = mail_search_args_build(pool, args, &error);
 	if (sargs == NULL) {
@@ -50,7 +50,7 @@
 		client_send_tagline(client, t_strconcat("NO ", error, NULL));
 	} else {
 		if (client->mailbox->search(client->mailbox, charset,
-					    sargs, NULL,
+					    sargs, NULL, MAIL_THREAD_NONE,
 					    client->output, client->cmd_uid)) {
 			/* NOTE: syncing is allowed when returning UIDs */
 			if (client->cmd_uid)
diff -r b71883ef0247 -r 4f697dde0fca src/imap/cmd-sort.c
--- a/src/imap/cmd-sort.c	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/imap/cmd-sort.c	Wed Jan 08 22:49:51 2003 +0200
@@ -103,11 +103,12 @@
 	if (args->type != IMAP_ARG_ATOM && args->type != IMAP_ARG_STRING) {
 		client_send_command_error(client,
 					  "Invalid charset argument.");
+		return TRUE;
 	}
 	charset = IMAP_ARG_STR(args);
 	args++;
 
-	pool = pool_alloconly_create("MailSortArgs", 2048);
+	pool = pool_alloconly_create("mail_search_args", 2048);
 
 	sargs = mail_search_args_build(pool, args, &error);
 	if (sargs == NULL) {
@@ -115,7 +116,7 @@
 		client_send_tagline(client, t_strconcat("NO ", error, NULL));
 	} else {
 		if (client->mailbox->search(client->mailbox, charset,
-					    sargs, sorting,
+					    sargs, sorting, MAIL_THREAD_NONE,
 					    client->output, client->cmd_uid)) {
 			/* NOTE: syncing is allowed when returning UIDs */
 			if (client->cmd_uid)
diff -r b71883ef0247 -r 4f697dde0fca src/imap/cmd-thread.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/imap/cmd-thread.c	Wed Jan 08 22:49:51 2003 +0200
@@ -0,0 +1,80 @@
+/* Copyright (C) 2002 Timo Sirainen */
+
+#include "common.h"
+#include "buffer.h"
+#include "commands.h"
+#include "mail-search.h"
+#include "mail-sort.h"
+
+int cmd_thread(struct client *client)
+{
+	enum mail_thread_type threading;
+	struct mail_search_arg *sargs;
+	struct imap_arg *args;
+	int args_count;
+	pool_t pool;
+	const char *error, *charset, *str;
+
+	args_count = imap_parser_read_args(client->parser, 0, 0, &args);
+	if (args_count == -2)
+		return FALSE;
+
+	if (args_count < 3) {
+		client_send_command_error(client, args_count < 0 ? NULL :
+					  "Missing or invalid arguments.");
+		return TRUE;
+	}
+
+	if (!client_verify_open_mailbox(client))
+		return TRUE;
+
+	if (args->type != IMAP_ARG_ATOM && args->type != IMAP_ARG_STRING) {
+		client_send_command_error(client,
+					  "Invalid thread algorithm argument.");
+		return TRUE;
+	}
+
+	str = IMAP_ARG_STR(args);
+	if (strcasecmp(str, "ORDEREDSUBJECT") == 0)
+		threading = MAIL_THREAD_ORDEREDSUBJECT;
+	else if (strcasecmp(str, "REFERENCES") == 0)
+		threading = MAIL_THREAD_REFERENCES;
+	else {
+		client_send_command_error(client, "Unknown thread algorithm.");
+		return TRUE;
+	}
+	args++;
+
+	/* charset */
+	if (args->type != IMAP_ARG_ATOM && args->type != IMAP_ARG_STRING) {
+		client_send_command_error(client,
+					  "Invalid charset argument.");
+		return TRUE;
+	}
+	charset = IMAP_ARG_STR(args);
+	args++;
+
+	pool = pool_alloconly_create("mail_search_args", 2048);
+
+	sargs = mail_search_args_build(pool, args, &error);
+	if (sargs == NULL) {
+		/* error in search arguments */
+		client_send_tagline(client, t_strconcat("NO ", error, NULL));
+	} else {
+		if (client->mailbox->search(client->mailbox, charset,
+					    sargs, NULL, threading,
+					    client->output, client->cmd_uid)) {
+			/* NOTE: syncing is allowed when returning UIDs */
+			if (client->cmd_uid)
+				client_sync_full(client);
+			else
+				client_sync_without_expunges(client);
+			client_send_tagline(client, "OK Search completed.");
+		} else {
+			client_send_storage_error(client);
+		}
+	}
+
+	pool_unref(pool);
+	return TRUE;
+}
diff -r b71883ef0247 -r 4f697dde0fca src/imap/cmd-uid.c
--- a/src/imap/cmd-uid.c	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/imap/cmd-uid.c	Wed Jan 08 22:49:51 2003 +0200
@@ -33,6 +33,11 @@
 		else if (strcasecmp(cmd, "SORT") == 0)
 			client->cmd_func = cmd_sort;
 		break;
+	case 't':
+	case 'T':
+		if (strcasecmp(cmd, "THREAD") == 0)
+			client->cmd_func = cmd_thread;
+		break;
 	}
 
 	if (client->cmd_func != NULL) {
diff -r b71883ef0247 -r 4f697dde0fca src/imap/commands.c
--- a/src/imap/commands.c	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/imap/commands.c	Wed Jan 08 22:49:51 2003 +0200
@@ -73,6 +73,10 @@
 		if (strcmp(name, "SUBSCRIBE") == 0)
 			return cmd_subscribe;
 		break;
+	case 'T':
+		if (strcmp(name, "THREAD") == 0)
+			return cmd_thread;
+		break;
 	case 'U':
 		if (strcmp(name, "UID") == 0)
 			return cmd_uid;
diff -r b71883ef0247 -r 4f697dde0fca src/imap/commands.h
--- a/src/imap/commands.h	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/imap/commands.h	Wed Jan 08 22:49:51 2003 +0200
@@ -37,6 +37,7 @@
 int cmd_expunge(struct client *client);
 int cmd_search(struct client *client);
 int cmd_sort(struct client *client);
+int cmd_thread(struct client *client);
 int cmd_fetch(struct client *client);
 int cmd_store(struct client *client);
 int cmd_copy(struct client *client);
diff -r b71883ef0247 -r 4f697dde0fca src/lib-imap/imap-base-subject.c
--- a/src/lib-imap/imap-base-subject.c	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/lib-imap/imap-base-subject.c	Wed Jan 08 22:49:51 2003 +0200
@@ -1,5 +1,8 @@
 /* Copyright (C) 2002 Timo Sirainen */
 
+/* Implementated against draft-ietf-imapext-sort-10 and
+   draft-ietf-imapext-thread-12 */
+
 #include "lib.h"
 #include "buffer.h"
 #include "charset-utf8.h"
@@ -79,7 +82,7 @@
 	buffer_set_used_size(buf, (size_t) (dest - data)+1);
 }
 
-static void remove_subj_trailers(buffer_t *buf)
+static void remove_subj_trailers(buffer_t *buf, int *is_reply_or_forward)
 {
 	const char *data;
 	size_t orig_size, size;
@@ -93,10 +96,14 @@
 	for (size = orig_size-2; size > 0; ) {
 		if (data[size] == ' ')
 			size--;
-		else if (size >= 5 && memcmp(data + size - 5, "(fwd)", 5) == 0)
+		else if (size >= 5 &&
+			 memcmp(data + size - 5, "(fwd)", 5) == 0) {
+			if (is_reply_or_forward != NULL)
+				*is_reply_or_forward = TRUE;
 			size -= 5;
-		else
+		} else {
 			break;
+		}
 	}
 
 	if (size != orig_size-2) {
@@ -126,7 +133,7 @@
 	return TRUE;
 }
 
-static int remove_subj_leader(buffer_t *buf)
+static int remove_subj_leader(buffer_t *buf, int *is_reply_or_forward)
 {
 	const char *data, *orig_data;
 	int ret = FALSE;
@@ -173,6 +180,8 @@
 	data++;
 	buffer_set_start_pos(buf, buffer_get_start_pos(buf) +
 			     (size_t) (data - orig_data));
+	if (is_reply_or_forward != NULL)
+		*is_reply_or_forward = TRUE;
 	return TRUE;
 }
 
@@ -190,7 +199,7 @@
 	return FALSE;
 }
 
-static int remove_subj_fwd_hdr(buffer_t *buf)
+static int remove_subj_fwd_hdr(buffer_t *buf, int *is_reply_or_forward)
 {
 	const char *data;
 	size_t size;
@@ -206,6 +215,9 @@
 	if (data[size-2] != ']')
 		return FALSE;
 
+	if (is_reply_or_forward != NULL)
+		*is_reply_or_forward = TRUE;
+
 	buffer_set_used_size(buf, size-2);
 	buffer_append_c(buf, '\0');
 
@@ -213,12 +225,16 @@
 	return TRUE;
 }
 
-const char *imap_get_base_subject_cased(pool_t pool, const char *subject)
+const char *imap_get_base_subject_cased(pool_t pool, const char *subject,
+					int *is_reply_or_forward)
 {
 	buffer_t *buf;
 	size_t subject_len;
 	int found;
 
+	if (is_reply_or_forward != NULL)
+		*is_reply_or_forward = FALSE;
+
 	subject_len = strlen(subject);
 	buf = buffer_create_dynamic(pool, subject_len, (size_t)-1);
 
@@ -235,12 +251,12 @@
 		/* (2) Remove all trailing text of the subject that matches
 		   the subj-trailer ABNF, repeat until no more matches are
 		   possible. */
-		remove_subj_trailers(buf);
+		remove_subj_trailers(buf, is_reply_or_forward);
 
 		do {
 			/* (3) Remove all prefix text of the subject that
 			   matches the subj-leader ABNF. */
-			found = remove_subj_leader(buf);
+			found = remove_subj_leader(buf, is_reply_or_forward);
 
 			/* (4) If there is prefix text of the subject that
 			   matches the subj-blob ABNF, and removing that prefix
@@ -254,7 +270,7 @@
 		/* (6) If the resulting text begins with the subj-fwd-hdr ABNF
 		   and ends with the subj-fwd-trl ABNF, remove the
 		   subj-fwd-hdr and subj-fwd-trl and repeat from step (2). */
-	} while (remove_subj_fwd_hdr(buf));
+	} while (remove_subj_fwd_hdr(buf, is_reply_or_forward));
 
 	/* (7) The resulting text is the "base subject" used in the
 	   SORT. */
diff -r b71883ef0247 -r 4f697dde0fca src/lib-imap/imap-base-subject.h
--- a/src/lib-imap/imap-base-subject.h	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/lib-imap/imap-base-subject.h	Wed Jan 08 22:49:51 2003 +0200
@@ -3,7 +3,11 @@
 
 /* Returns the base subject of the given string, according to
    draft-ietf-imapext-sort-10. String is returned so that it's suitable for
-   strcmp() comparing with another base subject. */
-const char *imap_get_base_subject_cased(pool_t pool, const char *subject);
+   strcmp() comparing with another base subject.
+
+   is_reply_or_forward is set to TRUE if message looks like reply or forward,
+   according to draft-ietf-imapext-thread-12 rules. */
+const char *imap_get_base_subject_cased(pool_t pool, const char *subject,
+					int *is_reply_or_forward);
 
 #endif
diff -r b71883ef0247 -r 4f697dde0fca src/lib-storage/Makefile.am
--- a/src/lib-storage/Makefile.am	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/lib-storage/Makefile.am	Wed Jan 08 22:49:51 2003 +0200
@@ -4,14 +4,17 @@
 
 INCLUDES = \
 	-I$(top_srcdir)/src/lib \
+	-I$(top_srcdir)/src/lib-mail \
 	-I$(top_srcdir)/src/lib-imap
 
 libstorage_a_SOURCES = \
 	mail-search.c \
 	mail-sort.c \
-	mail-storage.c
+	mail-storage.c \
+	mail-thread.c
 
 noinst_HEADERS = \
 	mail-search.h \
 	mail-sort.h \
-	mail-storage.h
+	mail-storage.h \
+	mail-thread.h
diff -r b71883ef0247 -r 4f697dde0fca src/lib-storage/index/index-search.c
--- a/src/lib-storage/index/index-search.c	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/lib-storage/index/index-search.c	Wed Jan 08 22:49:51 2003 +0200
@@ -16,6 +16,7 @@
 #include "mail-modifylog.h"
 #include "mail-custom-flags.h"
 #include "mail-search.h"
+#include "mail-thread.h"
 
 #include <stdlib.h>
 #include <ctype.h>
@@ -33,18 +34,25 @@
 	struct index_mailbox *ibox;
 	struct mail_index_record *rec;
 	unsigned int client_seq;
-	int cached;
 	const char *charset;
 	const char *error;
+
+	unsigned int cached:1;
+	unsigned int threading:1;
+
+	/* for threading: */
+	const char *message_id, *in_reply_to, *references;
 };
 
 struct search_header_context {
         struct search_index_context *index_context;
 	struct mail_search_arg *args;
-	int custom_header;
 
 	const unsigned char *name, *value;
 	size_t name_len, value_len;
+
+	unsigned int custom_header:1;
+	unsigned int threading:1;
 };
 
 struct search_body_context {
@@ -532,7 +540,20 @@
 {
 	struct search_header_context *ctx = context;
 
-	if ((name_len > 0 && ctx->custom_header) ||
+	if (ctx->threading) {
+		struct search_index_context *ictx = ctx->index_context;
+
+		if (name_len == 10 && memcasecmp(name, "Message-ID", 10) == 0)
+			ictx->message_id = t_strndup(value, value_len);
+		else if (name_len == 11 &&
+			 memcasecmp(name, "In-Reply-To", 11) == 0)
+			ictx->in_reply_to = t_strndup(value, value_len);
+		else if (name_len == 10 &&
+			 memcasecmp(name, "References", 10) == 0)
+			ictx->references = t_strndup(value, value_len);
+	}
+
+	if ((ctx->custom_header && name_len > 0) ||
 	    (name_len == 4 && memcasecmp(name, "Date", 4) == 0) ||
 	    (name_len == 4 && memcasecmp(name, "From", 4) == 0) ||
 	    (name_len == 2 && memcasecmp(name, "To", 2) == 0) ||
@@ -581,6 +602,8 @@
 
 	/* first check what we need to use */
 	mail_search_args_analyze(args, &have_headers, &have_body, &have_text);
+	if (ctx->threading)
+		have_headers = TRUE;
 	if (!have_headers && !have_body && !have_text)
 		return TRUE;
 
@@ -594,6 +617,7 @@
 		hdr_ctx.index_context = ctx;
 		hdr_ctx.custom_header = TRUE;
 		hdr_ctx.args = args;
+		hdr_ctx.threading = ctx->threading;
 
 		message_parse_header(NULL, input, NULL,
 				     search_header, &hdr_ctx);
@@ -788,6 +812,7 @@
 static int search_messages(struct index_mailbox *ibox, const char *charset,
 			   struct mail_search_arg *args,
 			   struct mail_sort_context *sort_ctx,
+			   struct mail_thread_context *thread_ctx,
 			   struct ostream *output, int uid_result)
 {
 	struct search_index_context ctx;
@@ -818,6 +843,7 @@
 	memset(&ctx, 0, sizeof(ctx));
 	ctx.ibox = ibox;
 	ctx.charset = charset;
+	ctx.threading = thread_ctx != NULL;
 
 	for (; rec != NULL && rec->uid <= last_uid; client_seq++) {
 		while (expunges->uid1 != 0 && expunges->uid1 < rec->uid) {
@@ -833,18 +859,22 @@
 		ctx.client_seq = client_seq;
 		ctx.cached = FALSE;
 
+		ctx.message_id = ctx.in_reply_to = ctx.references = NULL;
+
 		mail_search_args_reset(args);
 
 		t_push();
+
 		mail_search_args_foreach(args, search_index_arg, &ctx);
 		mail_search_args_foreach(args, search_cached_arg, &ctx);
 		mail_search_args_foreach(args, search_envelope_arg, &ctx);
 		failed = !search_arg_match_text(args, &ctx);
                 imap_msgcache_close(ibox->cache);
-		t_pop();
 
-		if (ctx.error != NULL)
+		if (ctx.error != NULL) {
+			t_pop();
 			break;
+		}
 
 		if (!failed) {
 			found = TRUE;
@@ -856,19 +886,23 @@
 			}
 
 			if (found) {
-				if (sort_ctx == NULL) {
-					t_push();
+				if (sort_ctx != NULL)
+					mail_sort_input(sort_ctx, rec->uid);
+				else if (thread_ctx != NULL) {
+					mail_thread_input(thread_ctx, rec->uid,
+							  ctx.message_id,
+							  ctx.in_reply_to,
+							  ctx.references);
+				} else {
 					o_stream_send(output, " ", 1);
 
 					str = dec2str(uid_result ? rec->uid :
 						      client_seq);
 					o_stream_send_str(output, str);
-					t_pop();
-				} else {
-					mail_sort_input(sort_ctx, rec->uid);
 				}
 			}
 		}
+		t_pop();
 
 		rec = ibox->index->next(ibox->index, rec);
 	}
@@ -884,33 +918,49 @@
 int index_storage_search(struct mailbox *box, const char *charset,
 			 struct mail_search_arg *args,
 			 enum mail_sort_type *sorting,
+                         enum mail_thread_type threading,
 			 struct ostream *output, int uid_result)
 {
 	struct index_mailbox *ibox = (struct index_mailbox *) box;
 	struct mail_sort_context *sort_ctx;
+	struct mail_thread_context *thread_ctx;
 	struct index_sort_context index_sort_ctx;
 	int failed;
 
 	if (!index_storage_sync_and_lock(ibox, TRUE, MAIL_LOCK_SHARED))
 		return FALSE;
 
-	if (sorting == NULL) {
-		sort_ctx = NULL;
-		o_stream_send_str(output, "* SEARCH");
-	} else {
+	if (sorting != NULL) {
 		memset(&index_sort_ctx, 0, sizeof(index_sort_ctx));
 		index_sort_ctx.ibox = ibox;
 		index_sort_ctx.output = output;
 
+		thread_ctx = NULL;
 		sort_ctx = mail_sort_init(sort_unsorted, sorting,
-					  index_sort_funcs, &index_sort_ctx);
+					  &index_sort_callbacks,
+					  &index_sort_ctx);
 		o_stream_send_str(output, "* SORT");
+	} else if (threading != MAIL_THREAD_NONE) {
+		memset(&index_sort_ctx, 0, sizeof(index_sort_ctx));
+		index_sort_ctx.ibox = ibox;
+
+		sort_ctx = NULL;
+		thread_ctx = mail_thread_init(threading, output,
+					      &index_sort_callbacks,
+					      &index_sort_ctx);
+		o_stream_send_str(output, "* THREAD");
+	} else {
+		sort_ctx = NULL;
+		thread_ctx = NULL;
+		o_stream_send_str(output, "* SEARCH");
 	}
 
-	failed = !search_messages(ibox, charset, args, sort_ctx,
+	failed = !search_messages(ibox, charset, args, sort_ctx, thread_ctx,
 				  output, uid_result);
 	if (sort_ctx != NULL)
 		mail_sort_deinit(sort_ctx);
+	if (thread_ctx != NULL)
+		mail_thread_finish(thread_ctx);
 
 	o_stream_send(output, "\r\n", 2);
 
diff -r b71883ef0247 -r 4f697dde0fca src/lib-storage/index/index-sort.c
--- a/src/lib-storage/index/index-sort.c	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/lib-storage/index/index-sort.c	Wed Jan 08 22:49:51 2003 +0200
@@ -12,9 +12,12 @@
 static struct imap_message_cache *
 search_open_cache(struct index_sort_context *ctx, unsigned int uid)
 {
+	i_assert(uid != 0);
+
 	if (ctx->last_uid != uid) {
 		ctx->cached = FALSE;
 		ctx->last_uid = uid;
+
 		ctx->rec = ctx->ibox->index->lookup_uid_range(ctx->ibox->index,
 							      uid, uid, NULL);
 		if (ctx->rec == NULL) {
@@ -156,6 +159,7 @@
 	struct index_sort_context *ctx = context;
 	size_t i;
 
+	/* FIXME: works only with UIDs! */
 	for (i = 0; i < count; i++) {
 		t_push();
 		o_stream_send(ctx->output, " ", 1);
@@ -164,7 +168,7 @@
 	}
 }
 
-struct mail_sort_funcs index_sort_funcs = {
+struct mail_sort_callbacks index_sort_callbacks = {
 	_input_time,
 	_input_uofft,
 	_input_mailbox,
diff -r b71883ef0247 -r 4f697dde0fca src/lib-storage/index/index-sort.h
--- a/src/lib-storage/index/index-sort.h	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/lib-storage/index/index-sort.h	Wed Jan 08 22:49:51 2003 +0200
@@ -14,6 +14,6 @@
 	unsigned int cached:1;
 };
 
-extern struct mail_sort_funcs index_sort_funcs;
+extern struct mail_sort_callbacks index_sort_callbacks;
 
 #endif
diff -r b71883ef0247 -r 4f697dde0fca src/lib-storage/index/index-storage.h
--- a/src/lib-storage/index/index-storage.h	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/lib-storage/index/index-storage.h	Wed Jan 08 22:49:51 2003 +0200
@@ -92,6 +92,7 @@
 int index_storage_search(struct mailbox *box, const char *charset,
 			 struct mail_search_arg *args,
 			 enum mail_sort_type *sorting,
+                         enum mail_thread_type threading,
 			 struct ostream *output, int uid_result);
 
 #endif
diff -r b71883ef0247 -r 4f697dde0fca src/lib-storage/mail-sort.c
--- a/src/lib-storage/mail-sort.c	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/lib-storage/mail-sort.c	Wed Jan 08 22:49:51 2003 +0200
@@ -14,7 +14,7 @@
 	enum mail_sort_type output[MAX_SORT_PROGRAM_SIZE];
 	enum mail_sort_type common_mask;
 
-	struct mail_sort_funcs funcs;
+	const struct mail_sort_callbacks *callbacks;
 	void *func_context;
 
 	buffer_t *sort_buffer;
@@ -76,7 +76,7 @@
 
 struct mail_sort_context *
 mail_sort_init(const enum mail_sort_type *input, enum mail_sort_type *output,
-	       struct mail_sort_funcs funcs, void *context)
+	       const struct mail_sort_callbacks *callbacks, void *context)
 {
 	struct mail_sort_context *ctx;
 	enum mail_sort_type norm_input[MAX_SORT_PROGRAM_SIZE];
@@ -110,7 +110,7 @@
 						 (size_t)-1);
 
 	ctx->temp_pool = pool_alloconly_create("Sort", 8192);
-	ctx->funcs = funcs;
+	ctx->callbacks = callbacks;
 	ctx->func_context = context;
 	return ctx;
 }
@@ -151,8 +151,8 @@
 		return 1;
 
 	p_clear(pool);
-	ret = strcmp(imap_get_base_subject_cased(pool, s1),
-		     imap_get_base_subject_cased(pool, s2));
+	ret = strcmp(imap_get_base_subject_cased(pool, s1, NULL),
+		     imap_get_base_subject_cased(pool, s2, NULL));
 	return ret;
 }
 
@@ -165,8 +165,8 @@
 	int changed = FALSE;
 
 	if (ctx->common_mask & MAIL_SORT_ARRIVAL) {
-		t = ctx->funcs.input_time(MAIL_SORT_ARRIVAL, id,
-					  ctx->func_context);
+		t = ctx->callbacks->input_time(MAIL_SORT_ARRIVAL, id,
+					       ctx->func_context);
 		if (t != ctx->last_arrival) {
 			ctx->last_arrival = t;
 			changed = TRUE;
@@ -174,8 +174,8 @@
 	}
 
 	if (ctx->common_mask & MAIL_SORT_CC) {
-		str = ctx->funcs.input_str(MAIL_SORT_CC, id,
-					   ctx->func_context);
+		str = ctx->callbacks->input_str(MAIL_SORT_CC, id,
+						ctx->func_context);
 		if (addr_strcmp(str, ctx->last_cc) != 0) {
 			i_free(ctx->last_cc);
 			ctx->last_cc = i_strdup(str);
@@ -184,8 +184,8 @@
 	}
 
 	if (ctx->common_mask & MAIL_SORT_DATE) {
-		t = ctx->funcs.input_time(MAIL_SORT_DATE, id,
-					  ctx->func_context);
+		t = ctx->callbacks->input_time(MAIL_SORT_DATE, id,
+					       ctx->func_context);
 		if (t != ctx->last_date) {
 			ctx->last_date = t;
 			changed = TRUE;
@@ -193,8 +193,8 @@
 	}
 
 	if (ctx->common_mask & MAIL_SORT_FROM) {
-		str = ctx->funcs.input_str(MAIL_SORT_FROM, id,
-					   ctx->func_context);
+		str = ctx->callbacks->input_str(MAIL_SORT_FROM, id,
+						ctx->func_context);
 		if (addr_strcmp(str, ctx->last_from) != 0) {
 			i_free(ctx->last_from);
 			ctx->last_from = i_strdup(str);
@@ -203,8 +203,8 @@
 	}
 
 	if (ctx->common_mask & MAIL_SORT_SIZE) {
-		size = ctx->funcs.input_time(MAIL_SORT_SIZE, id,
-					     ctx->func_context);
+		size = ctx->callbacks->input_time(MAIL_SORT_SIZE, id,
+						  ctx->func_context);
 		if (size != ctx->last_size) {
 			ctx->last_size = size;
 			changed = TRUE;
@@ -212,8 +212,8 @@
 	}
 
 	if (ctx->common_mask & MAIL_SORT_SUBJECT) {
-		str = ctx->funcs.input_str(MAIL_SORT_SUBJECT, id,
-					   ctx->func_context);
+		str = ctx->callbacks->input_str(MAIL_SORT_SUBJECT, id,
+						ctx->func_context);
 		if (subject_cmp(ctx->temp_pool, str, ctx->last_subject) != 0) {
 			i_free(ctx->last_subject);
 			ctx->last_subject = i_strdup(str);
@@ -222,8 +222,8 @@
 	}
 
 	if (ctx->common_mask & MAIL_SORT_TO) {
-		str = ctx->funcs.input_str(MAIL_SORT_TO, id,
-					   ctx->func_context);
+		str = ctx->callbacks->input_str(MAIL_SORT_TO, id,
+						ctx->func_context);
 		if (addr_strcmp(str, ctx->last_to) != 0) {
 			i_free(ctx->last_to);
 			ctx->last_to = i_strdup(str);
@@ -249,11 +249,15 @@
 {
 	const unsigned int *i1 = p1;
 	const unsigned int *i2 = p2;
-	enum mail_sort_type *output = mail_sort_qsort_context->output;
-        struct mail_sort_funcs *funcs = &mail_sort_qsort_context->funcs;
-	void *ctx = mail_sort_qsort_context->func_context;
+	enum mail_sort_type *output;
+        const struct mail_sort_callbacks *cb;
+	void *ctx;
 	int ret, reverse = FALSE;
 
+	output = mail_sort_qsort_context->output;
+	cb = mail_sort_qsort_context->callbacks;
+	ctx = mail_sort_qsort_context->func_context;
+
 	t_push();
 
 	ret = 0;
@@ -268,16 +272,16 @@
 		case MAIL_SORT_DATE: {
 			time_t r1, r2;
 
-			r1 = funcs->input_time(*output, *i1, ctx);
-			r2 = funcs->input_time(*output, *i2, ctx);
+			r1 = cb->input_time(*output, *i1, ctx);
+			r2 = cb->input_time(*output, *i2, ctx);
 			ret = r1 < r2 ? -1 : r1 > r2 ? 1 : 0;
 			break;
 		}
 		case MAIL_SORT_SIZE: {
 			uoff_t r1, r2;
 
-			r1 = funcs->input_uofft(*output, *i1, ctx);
-			r2 = funcs->input_uofft(*output, *i2, ctx);
+			r1 = cb->input_uofft(*output, *i1, ctx);
+			r2 = cb->input_uofft(*output, *i2, ctx);
 			ret = r1 < r2 ? -1 : r1 > r2 ? 1 : 0;
 			break;
 		}
@@ -286,15 +290,15 @@
 		case MAIL_SORT_TO: {
 			const char *a1, *a2;
 
-			a1 = funcs->input_mailbox(*output, *i1, ctx);
-			a2 = funcs->input_mailbox(*output, *i2, ctx);
+			a1 = cb->input_mailbox(*output, *i1, ctx);
+			a2 = cb->input_mailbox(*output, *i2, ctx);
 			ret = addr_strcmp(a1, a2);
 			break;
 		}
 		case MAIL_SORT_SUBJECT:
 			ret = subject_cmp(mail_sort_qsort_context->temp_pool,
-					  funcs->input_str(*output, *i1, ctx),
-					  funcs->input_str(*output, *i2, ctx));
+					  cb->input_str(*output, *i1, ctx),
+					  cb->input_str(*output, *i2, ctx));
 			break;
 		default:
 			i_unreached();
@@ -310,7 +314,7 @@
 		reverse = FALSE;
 	}
 
-	funcs->input_reset(ctx);
+	cb->input_reset(ctx);
 
 	t_pop();
 
@@ -328,6 +332,6 @@
 	count = buffer_get_used_size(ctx->sort_buffer) / sizeof(unsigned int);
 	qsort(arr, count, sizeof(unsigned int), mail_sort_qsort_func);
 
-	ctx->funcs.output(arr, count, ctx->func_context);
+	ctx->callbacks->output(arr, count, ctx->func_context);
 	buffer_set_used_size(ctx->sort_buffer, 0);
 }
diff -r b71883ef0247 -r 4f697dde0fca src/lib-storage/mail-sort.h
--- a/src/lib-storage/mail-sort.h	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/lib-storage/mail-sort.h	Wed Jan 08 22:49:51 2003 +0200
@@ -6,7 +6,7 @@
 /* Maximum size for sort program, 2x for reverse + END */
 #define MAX_SORT_PROGRAM_SIZE (2*7 + 1)
 
-struct mail_sort_funcs {
+struct mail_sort_callbacks {
 	/* arrival, date */
 	time_t (*input_time)(enum mail_sort_type type, unsigned int id,
 			     void *context);
@@ -33,11 +33,11 @@
    is known, the less memory is used. */
 struct mail_sort_context *
 mail_sort_init(const enum mail_sort_type *input, enum mail_sort_type *output,
-	       struct mail_sort_funcs funcs, void *context);
+	       const struct mail_sort_callbacks *callbacks, void *context);
 void mail_sort_deinit(struct mail_sort_context *ctx);
 
 /* id is either UID or sequence number of message, whichever is preferred
-   in MailSortFuncs parameters. */
+   in mail_sort_callbacks parameters. */
 void mail_sort_input(struct mail_sort_context *ctx, unsigned int id);
 
 #endif
diff -r b71883ef0247 -r 4f697dde0fca src/lib-storage/mail-storage.h
--- a/src/lib-storage/mail-storage.h	Wed Jan 08 22:48:04 2003 +0200
+++ b/src/lib-storage/mail-storage.h	Wed Jan 08 22:49:51 2003 +0200
@@ -51,6 +51,12 @@
 	MAIL_SORT_END		= 0x0000 /* ends sort program */
 };
 
+enum mail_thread_type {
+	MAIL_THREAD_NONE,
+	MAIL_THREAD_ORDEREDSUBJECT,
+	MAIL_THREAD_REFERENCES
+};
+
 struct mail_storage;
 struct mail_storage_callbacks;
 struct mailbox_status;
@@ -191,6 +197,7 @@
 	int (*search)(struct mailbox *box, const char *charset,
 		      struct mail_search_arg *args,
 		      enum mail_sort_type *sorting,
+		      enum mail_thread_type threading,
 		      struct ostream *output, int uid_result);
 
 	/* Save a new mail into mailbox. timezone_offset specifies the
diff -r b71883ef0247 -r 4f697dde0fca src/lib-storage/mail-thread.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-storage/mail-thread.c	Wed Jan 08 22:49:51 2003 +0200
@@ -0,0 +1,833 @@
+/* Copyright (C) 2002 Timo Sirainen */
+
+/*
+ * Merge sort code in sort_child_nodes() is copyright 2001 Simon Tatham.
+ * 
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ * 
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Implementation of draft-ietf-imapext-thread-12 threading algorithm */
+
+#include "lib.h"
+#include "hash.h"
+#include "ostream.h"
+#include "str.h"
+#include "message-tokenize.h"
+#include "imap-base-subject.h"
+#include "mail-thread.h"
+
+#include <stdlib.h>
+
+/* how much memory to allocate initially. these are very rough
+   approximations. */
+#define APPROX_MSG_COUNT 128
+#define APPROX_MSGID_SIZE 45
+
+/* Try to buffer this much data before sending it to output stream. */
+#define OUTPUT_BUF_SIZE 2048
+
+#define NODE_IS_DUMMY(node) ((node)->id == 0 || (node)->id == UINT_MAX)
+
+struct node {
+	struct node *parent, *first_child, *next;
+
+	char *msgid;
+	unsigned int id;
+};
+
+struct root_data {
+	struct node *node;
+
+	time_t sent_date;
+	unsigned int sort_id;
+
+	const char *base_subject;
+	unsigned int reply:1;
+};
+
+struct mail_thread_context {
+	pool_t pool;
+	pool_t str_pool; /* for node->msgid and root_data->base_subject */
+
+	struct hash_table *msgid_hash;
+	struct hash_table *subject_hash;
+
+	struct node *root_nodes;
+	struct root_data *root_data; /* [root_count] */
+	size_t root_count;
+
+	const struct mail_sort_callbacks *callbacks;
+	void *callback_context;
+
+        struct ostream *output;
+};
+
+struct mail_thread_context *
+mail_thread_init(enum mail_thread_type type, struct ostream *output,
+		 const struct mail_sort_callbacks *callbacks,
+		 void *callback_context)
+{
+	struct mail_thread_context *ctx;
+	pool_t pool;
+
+	pool = pool_alloconly_create("mail_thread_context",
+				     sizeof(struct node) * APPROX_MSG_COUNT);
+
+	ctx = p_new(pool, struct mail_thread_context, 1);
+	ctx->pool = pool;
+	ctx->str_pool =
+		pool_alloconly_create("mail_thread_context strings",
+				      sizeof(struct node) *
+				      APPROX_MSG_COUNT * APPROX_MSGID_SIZE);
+	ctx->msgid_hash = hash_create(default_pool,
+				      APPROX_MSGID_SIZE*2, str_hash,
+				      (HashCompareFunc)strcmp);
+	ctx->callbacks = callbacks;
+	ctx->callback_context = callback_context;
+	ctx->output = output;
+	return ctx;
+}
+
+static void mail_thread_deinit(struct mail_thread_context *ctx)
+{
+	if (ctx->msgid_hash != NULL)
+		hash_destroy(ctx->msgid_hash);
+	if (ctx->subject_hash != NULL)
+		hash_destroy(ctx->subject_hash);
+	pool_unref(ctx->str_pool);
+	pool_unref(ctx->pool);
+}
+
+static void add_root(struct mail_thread_context *ctx, struct node *node)
+{
+	i_assert(node->next == NULL);
+
+	node->next = ctx->root_nodes;
+	ctx->root_nodes = node;
+}
+
+static struct node *create_node(struct mail_thread_context *ctx,
+				const char *msgid)
+{
+	struct node *node;
+
+	node = p_new(ctx->pool, struct node, 1);
+	node->msgid = p_strdup(ctx->str_pool, msgid);
+
+	hash_insert(ctx->msgid_hash, node->msgid, node);
+	return node;
+}
+
+static struct node *create_invalid_node(struct mail_thread_context *ctx,
+					unsigned int id)
+{
+	struct node *node;
+
+	node = p_new(ctx->pool, struct node, 1);
+	node->id = id;
+	return node;
+}
+
+static void update_message_id(struct mail_thread_context *ctx,
+			      const char *msgid, unsigned int id)
+{
+	struct node *node;
+
+	if (msgid == NULL) {
+		add_root(ctx, create_invalid_node(ctx, id));
+		return;
+	}
+
+	node = hash_lookup(ctx->msgid_hash, msgid);
+	if (node == NULL) {
+		/* first time we see this message */
+		node = create_node(ctx, msgid);
+		node->id = id;
+		return;
+	}
+
+	if (node->id == 0) {
+		/* seen before in references */
+		node->id = id;
+	} else {
+		/* duplicate -> invalidate all of them.
+		   the message-id stays and acts like a dummy node. */
+		if (node->id != UINT_MAX) {
+			add_root(ctx, create_invalid_node(ctx, node->id));
+			node->id = UINT_MAX;
+		}
+
+		add_root(ctx, create_invalid_node(ctx, id));
+	}
+}
+
+static int get_untokenized_msgid(const char **msgid_p, string_t *msgid)
+{
+	static const enum message_token stop_tokens[] = { '>', TOKEN_LAST };
+	struct message_tokenizer *tok;
+	int valid_end;
+
+	tok = message_tokenize_init(*msgid_p, (size_t)-1, NULL, NULL);
+	message_tokenize_dot_token(tok, FALSE); /* just a minor speedup */
+
+	message_tokenize_get_string(tok, msgid, NULL, stop_tokens);
+	valid_end = message_tokenize_get(tok) == '>';
+
+	*msgid_p += message_tokenize_get_parse_position(tok);
+	message_tokenize_deinit(tok);
+
+	if (valid_end) {
+		if (strchr(str_c(msgid), '@') != NULL) {
+			/* <xx@xx> - valid message ID found */
+			return TRUE;
+		}
+	}
+
+	return FALSE;
+}
+
+static const char *get_msgid(const char **msgid_p)
+{
+	const char *msgid = *msgid_p;
+	const char *p;
+	string_t *str = NULL;
+	int found_at;
+
+	if (*msgid_p == NULL)
+		return NULL;
+
+	for (;;) {
+		/* skip until '<' */
+		while (*msgid != '<') {
+			if (*msgid == '\0') {
+				*msgid_p = msgid;
+				return NULL;
+			}
+			msgid++;
+		}
+		msgid++;
+
+		/* check it through quickly to see if it's already normalized */
+		p = msgid; found_at = FALSE;
+		for (;; p++) {
+			if ((unsigned char)*p >= '0') /* matches most */
+				continue;
+
+			if (*p == '@')
+				found_at = TRUE;
+			if (*p == '>' || *p == '"' || *p == '(' ||
+			    *p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
+				break;
+
+			if (*p == '\0') {
+				*msgid_p = p;
+				return NULL;
+			}
+		}
+
+		if (*p == '>') {
+			*msgid_p = p+1;
+			if (found_at)
+				return t_strdup_until(msgid, p-1);
+		} else {
+			/* ok, do it the slow way */
+			*msgid_p = msgid;
+
+			if (str == NULL) {
+				/* allocate only once, so we don't leak
+				   with multiple invalid message IDs */
+				str = t_str_new(256);
+			}
+			if (get_untokenized_msgid(msgid_p, str))
+				return str_c(str);
+		}
+
+		/* invalid message id, see if there's another valid one */
+		msgid = *msgid_p;
+	}
+}
+
+static void unlink_child(struct node *child)
+{
+	struct node **node;
+
+        node = &child->parent->first_child;
+	for (; *node != NULL; node = &(*node)->next) {
+		if (*node == child) {
+			*node = child->next;
+			break;
+		}
+	}
+
+	child->parent = NULL;
+}
+
+static int find_child(struct node *node, struct node *child)
+{
+	do {
+		if (node == child)
+			return TRUE;
+
+		if (node->first_child != NULL) {
+			if (find_child(node->first_child, child))
+				return TRUE;
+		}
+
+		node = node->next;
+	} while (node != NULL);
+
+	return FALSE;
+}
+
+static void link_message(struct mail_thread_context *ctx,
+			 const char *parent_msgid, const char *child_msgid,
+			 int replace)
+{
+	struct node *parent, *child, **node;
+
+	child = hash_lookup(ctx->msgid_hash, child_msgid);
+	if (child == NULL)
+		child = create_node(ctx, child_msgid);
+
+	if (child->parent != NULL && !replace) {
+		/* already got a parent, don't want to replace it */
+		return;
+	}
+
+	parent = hash_lookup(ctx->msgid_hash, parent_msgid);
+	if (parent == NULL)
+		parent = create_node(ctx, parent_msgid);
+
+	if (child->parent == parent) {
+		/* already have this parent, ignore */
+		return;
+	}
+
+	if (find_child(child, parent)) {
+		/* this would create a loop, not allowed */
+		return;
+	}
+
+	if (child->parent != NULL)
+		unlink_child(child);
+
+	/* link them */
+	child->parent = parent;
+
+	node = &parent->first_child;
+	while (*node != NULL)
+		node = &(*node)->next;
+	*node = child;
+}
+
+static int link_references(struct mail_thread_context *ctx,
+			   const char *msgid, const char *references)
+{
+	const char *parent_id, *child_id;
+
+	parent_id = get_msgid(&references);
+	if (parent_id == NULL)
+		return FALSE;
+
+	while ((child_id = get_msgid(&references)) != NULL) {
+		link_message(ctx, parent_id, child_id, FALSE);
+		parent_id = child_id;
+	}
+
+	if (msgid != NULL) {
+		/* link the last message to us */
+		link_message(ctx, msgid, parent_id, TRUE);
+	}
+
+	return TRUE;
+}
+
+void mail_thread_input(struct mail_thread_context *ctx, unsigned int id,
+		       const char *message_id, const char *in_reply_to,
+		       const char *references)
+{
+	/* (1) link message ids */
+	const char *msgid, *refid;
+
+	i_assert(id > 0 && id < UINT_MAX);
+
+	/* get our message ID */
+	msgid = get_msgid(&message_id);
+	update_message_id(ctx, msgid, id);
+
+	/* link references */
+	if (!link_references(ctx, msgid, references) && msgid != NULL) {
+		refid = get_msgid(&in_reply_to);
+		if (refid != NULL)
+			link_message(ctx, msgid, refid, TRUE);
+		else {
+			/* no references, make sure it's not linked */
+			struct node *node;
+
+			node = hash_lookup(ctx->msgid_hash, msgid);
+			if (node != NULL)
+				unlink_child(node);
+		}
+	}
+}
+
+static struct node *find_last_child(struct node *node)
+{
+	while (node->next != NULL)
+		node = node->next;
+
+	return node;
+}
+
+static struct node **promote_children(struct node **parent)
+{
+	struct node *new_parent, *old_parent, *child;
+
+	old_parent = *parent;
+	new_parent = old_parent->parent;
+
+	child = old_parent->first_child;
+	*parent = child;
+
+	for (;;) {
+		child->parent = new_parent;
+		if (child->next == NULL)
+			break;
+		child = child->next;
+	}
+
+	child->next = old_parent->next;
+	return &child->next;
+}
+
+static void prune_dummy_messages(struct node **node_p)
+{
+	struct node **a;
+
+	a = node_p;
+	while (*node_p != NULL) {
+		if ((*node_p)->first_child != NULL)
+			prune_dummy_messages(&(*node_p)->first_child);
+
+		if (NODE_IS_DUMMY(*node_p)) {
+			if ((*node_p)->first_child == NULL) {
+				/* no children -> delete */
+				*node_p = (*node_p)->next;
+				continue;
+			} else if ((*node_p)->parent != NULL ||
+				   (*node_p)->first_child->next == NULL) {
+				/* promote children to our level,
+				   deleting the dummy node */
+				node_p = promote_children(node_p);
+				continue;
+			}
+		}
+
+                node_p = &(*node_p)->next;
+	}
+}
+
+static int node_cmp(struct mail_thread_context *ctx,
+		    struct node *a, struct node *b)
+{
+	time_t date_a, date_b;
+
+	t_push();
+	date_a = ctx->callbacks->input_time(MAIL_SORT_DATE,
+					    a->id, ctx->callback_context);
+	date_b = ctx->callbacks->input_time(MAIL_SORT_DATE,
+					    b->id, ctx->callback_context);
+	ctx->callbacks->input_reset(ctx->callback_context);
+	t_pop();
+
+	if (date_a == date_b || date_a == 0 || date_b == 0)
+		return a->id < b->id ? -1 : 1;
+	else
+		return date_a < date_b ? -1 : 1;
+}
+
+static struct node *
+sort_nodes(struct mail_thread_context *ctx, struct node *list)
+{
+	struct node *p, *q, *e, *tail;
+	size_t insize, nmerges, psize, qsize, i;
+
+	i_assert(list != NULL);
+
+	insize = 1;
+
+	for (;;) {
+		p = list;
+		list = NULL;
+		tail = NULL;
+
+		nmerges = 0;  /* count number of merges we do in this pass */
+		while (p != 0) {
+			nmerges++;  /* there exists a merge to be done */
+
+			/* step `insize' places along from p */
+			q = p;
+			psize = 0;
+			for (i = 0; i < insize; i++) {
+				psize++;
+				q = q->next;
+				if (q == NULL) break;
+			}
+
+			/* if q hasn't fallen off end, we have two lists to
+			   merge */
+			qsize = insize;
+
+			/* now we have two lists; merge them */
+			while (psize > 0 || (qsize > 0 && q != NULL)) {
+				/* decide whether next element of merge comes
+				   from p or q */
+				if (psize == 0) {
+					/* p is empty; e must come from q. */
+					e = q; q = q->next; qsize--;
+				} else if (qsize == 0 || !q) {
+					/* q is empty; e must come from p. */
+					e = p; p = p->next; psize--;
+				} else if (node_cmp(ctx, p, q) <= 0) {
+					/* First element of p is lower
+					   (or same); e must come from p. */
+					e = p; p = p->next; psize--;
+				} else {
+					/* First element of q is lower;
+					   e must come from q. */
+					e = q; q = q->next; qsize--;
+				}
+
+				/* add the next element to the merged list */
+				if (tail)
+					tail->next = e;
+				else
+					list = e;
+				tail = e;
+			}
+
+			/* now p has stepped `insize' places along,
+			   and q has too */
+			p = q;
+		}
+		tail->next = NULL;
+
+		/* If we have done only one merge, we're finished. */
+		if (nmerges <= 1) {
+                        /* allow for nmerges == 0, the empty list case */
+			return list;
+		}
+
+		/* Otherwise repeat, merging lists twice the size */
+		insize *= 2;
+	}
+}
+
+static void add_base_subject(struct mail_thread_context *ctx,
+			     const char *subject, struct root_data *root)
+{
+	struct root_data *hash_root;
+	char *hash_subject;
+	void *key, *value;
+	int is_reply_or_forward;
+
+	if (subject == NULL)
+		return;
+
+	subject = imap_get_base_subject_cased(data_stack_pool, subject,
+					      &is_reply_or_forward);
+	if (*subject == '\0')
+		return;
+
+	if (!hash_lookup_full(ctx->subject_hash, subject, &key, &value)) {
+		hash_subject = p_strdup(ctx->str_pool, subject);
+		hash_root = root;
+		hash_insert(ctx->subject_hash, hash_subject, root);
+	} else {
+		hash_subject = key;
+		hash_root = value;
+
+		if (!NODE_IS_DUMMY(hash_root->node) &&
+		    (NODE_IS_DUMMY(root->node) ||
+		     (hash_root->reply && !is_reply_or_forward)))
+			hash_update(ctx->subject_hash, hash_subject, root);
+	}
+
+	root->base_subject = hash_subject;
+	root->reply = is_reply_or_forward;
+}
+
+static void gather_root_data(struct mail_thread_context *ctx)
+{
+	const struct mail_sort_callbacks *cb;
+	struct node *node;
+	size_t i, count;
+	unsigned int id;
+
+	/* get the number of root nodes */
+	for (count = 0, node = ctx->root_nodes; node != NULL; node = node->next)
+		count++;
+	ctx->root_count = count;
+
+	ctx->root_data = p_new(ctx->pool, struct root_data, ctx->root_count);
+	cb = ctx->callbacks;
+
+	ctx->subject_hash =
+		hash_create(default_pool, ctx->root_count * 2, str_hash,
+			    (HashCompareFunc)strcmp);
+
+	node = ctx->root_nodes;
+	for (i = 0; i < ctx->root_count; i++, node = node->next) {
+		ctx->root_data[i].node = node;
+
+		if (!NODE_IS_DUMMY(node))
+			id = node->id;
+		else {
+			/* sort children, use the first one's id */
+			node->first_child = sort_nodes(ctx, node->first_child);
+			id = node->first_child->id;
+		}
+
+		t_push();
+
+		ctx->root_data[i].sort_id = id;
+		ctx->root_data[i].sent_date =
+			cb->input_time(MAIL_SORT_DATE, id,
+				       ctx->callback_context);
+
+		add_base_subject(ctx, cb->input_str(MAIL_SORT_SUBJECT, id,
+						    ctx->callback_context),
+				 &ctx->root_data[i]);
+
+		cb->input_reset(ctx->callback_context);
+		t_pop();
+	}
+
+	i_assert(node == NULL);
+}
+
+static void merge_subject_threads(struct mail_thread_context *ctx)
+{
+        struct root_data *root, *hash_root;
+	size_t i;
+
+	for (i = 0; i < ctx->root_count; i++) {
+		root = &ctx->root_data[i];
+
+		/* (ii) If the thread subject is empty, skip this message. */
+		if (root->base_subject == NULL)
+			continue;
+
+		/* (iii) Lookup the message associated with this thread
+		   subject in the subject table. */
+		hash_root = hash_lookup(ctx->subject_hash, root->base_subject);
+		i_assert(hash_root != NULL);
+
+		/* (iv) If the message in the subject table is the current
+		   message, skip this message. */
+		if (hash_root == root)
+			continue;
+
+		/* Otherwise, merge the current message with the one in the
+		   subject table using the following rules: */
+
+		if (NODE_IS_DUMMY(root->node) &&
+		    NODE_IS_DUMMY(hash_root->node)) {
+			/* If both messages are dummies, append the current
+			   message's children to the children of the message in
+			   the subject table (the children of both messages
+			   become siblings), and then delete the current
+			   message. */
+			find_last_child(hash_root->node)->next =
+				root->node->first_child;
+			root->node = NULL;
+		} else if (NODE_IS_DUMMY(hash_root->node) ||
+			   (root->reply && !hash_root->reply)) {
+			/* If the message in the subject table is a dummy
+			   and the current message is not, make the current
+			   message a child of the message in the subject table
+			   (a sibling of its children).
+
+			   If the current message is a reply or forward and
+			   the message in the subject table is not, make the
+			   current message a child of the message in the
+			   subject table (a sibling of its children). */
+			root->node->parent = hash_root->node;
+			root->node->next = hash_root->node->first_child;
+			hash_root->node->first_child = root->node;
+			root->node = NULL;
+		} else {
+			/* Otherwise, create a new dummy message and make both
+			   the current message and the message in the subject
+			   table children of the dummy.  Then replace the
+			   message in the subject table with the dummy
+			   message. */
+			struct node *node;
+
+			node = p_new(ctx->pool, struct node, 1);
+			node->first_child = root->node;
+
+			root->node->next = hash_root->node;
+			root->node->parent = node;
+
+			hash_root->node->next = NULL;
+			hash_root->node->parent = node;
+
+			hash_root->node = node;
+			hash_root->reply = FALSE;
+			root->node = NULL;
+		}
+	}
+}
+
+static int send_nodes(struct mail_thread_context *ctx,
+		      string_t *str, struct node *node)
+{
+	/* sort the siblings first */
+	node = sort_nodes(ctx, node);
+
+	while (node != NULL) {
+		if (str_len(str) + MAX_INT_STRLEN + 3 >= OUTPUT_BUF_SIZE) {
+			/* string getting full, flush it */
+			if (!o_stream_send(ctx->output,
+					   str_data(str), str_len(str)))
+				return FALSE;
+			str_truncate(str, 0);
+		}
+
+		if (node->first_child == NULL)
+			str_printfa(str, "(%u)", node->id);
+		else {
+			str_printfa(str, "(%u ", node->id);
+			send_nodes(ctx, str, node->first_child);
+			str_append_c(str, ')');
+		}
+
+		node = node->next;
+	}
+	return TRUE;
+}
+
+static void send_roots(struct mail_thread_context *ctx)
+{
+	struct root_data *root;
+	string_t *str;
+	size_t i;
+
+	str = t_str_new(OUTPUT_BUF_SIZE);
+	str_append_c(str, ' ');
+
+	for (i = 0; i < ctx->root_count; i++) {
+		root = &ctx->root_data[i];
+
+		if (root->node == NULL)
+			continue;
+
+		if (str_len(str) + MAX_INT_STRLEN + 3 >= OUTPUT_BUF_SIZE) {
+			/* string getting full, flush it */
+			if (!o_stream_send(ctx->output,
+					   str_data(str), str_len(str)))
+				return;
+			str_truncate(str, 0);
+		}
+
+		str_append_c(str, '(');
+		if (!NODE_IS_DUMMY(root->node))
+			str_printfa(str, "%u", root->node->id);
+
+		if (root->node->first_child != NULL) {
+			if (!NODE_IS_DUMMY(root->node))
+				str_append_c(str, ' ');
+
+			if (!send_nodes(ctx, str, root->node->first_child))
+				return;
+		}
+
+		str_append_c(str, ')');
+	}
+
+	(void)o_stream_send(ctx->output, str_data(str), str_len(str));
+}
+
+static int root_data_cmp(const void *p1, const void *p2)
+{
+	const struct root_data *d1 = p1;
+	const struct root_data *d2 = p2;
+
+	if (d1->sent_date == d2->sent_date ||
+	    d1->sent_date == 0 || d2->sent_date == 0)
+		return d1->sort_id < d2->sort_id ? -1 :
+			d1->sort_id > d2->sort_id ? 1 : 0;
+	else
+		return d1->sent_date < d2->sent_date ? -1 : 1;
+}
+
+static void save_root_cb(void *key __attr_unused__, void *value, void *context)
+{
+	struct mail_thread_context *ctx = context;
+	struct node *node = value;
+
+	if (node->parent == NULL)
+		add_root(ctx, node);
+}
+
+void mail_thread_finish(struct mail_thread_context *ctx)
+{
+	if (ctx->root_nodes == NULL) {
+		/* no messages */
+		return;
+	}
+
+	/* (2) save root nodes and drop the msgid_hash */
+	hash_foreach(ctx->msgid_hash, save_root_cb, ctx);
+	hash_destroy(ctx->msgid_hash);
+	ctx->msgid_hash = NULL;
+
+	/* drop the memory allocated for message-IDs, reuse their memory
+	   for base subjects */
+	p_clear(ctx->str_pool);
+
+	/* (3) */
+	prune_dummy_messages(&ctx->root_nodes);
+
+	/* get the message dates and subjects here and save them to an array.
+	   this probably gets us better caching than fetching them separately.
+	   for dummy nodes sort their children as required by (4).
+	   subjects are stored into hash as required by (5.B). */
+	gather_root_data(ctx);
+
+	/* (5.C) Merge threads with the same thread subject. */
+	merge_subject_threads(ctx);
+
+	/* (4) sort roots. doing it before subject merging would break
+	   subject_hash, and I can't see how doing it later would change
+	   anything */
+	qsort(ctx->root_data, ctx->root_count,
+	      sizeof(struct root_data), root_data_cmp);
+
+	/* (6) Sort and send replies */
+	t_push();
+	send_roots(ctx);
+	t_pop();
+
+        mail_thread_deinit(ctx);
+}
diff -r b71883ef0247 -r 4f697dde0fca src/lib-storage/mail-thread.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-storage/mail-thread.h	Wed Jan 08 22:49:51 2003 +0200
@@ -0,0 +1,22 @@
+#ifndef __MAIL_THREAD_H
+#define __MAIL_THREAD_H
+
+#include "mail-storage.h"
+#include "mail-sort.h"
+
+struct mail_thread_context;
+
+struct mail_thread_context *
+mail_thread_init(enum mail_thread_type type, struct ostream *output,
+		 const struct mail_sort_callbacks *callbacks,
+		 void *callback_context);
+
+/* id is either UID or sequence number of message, whichever is preferred
+   in mail_thread_callbacks parameters. */
+void mail_thread_input(struct mail_thread_context *ctx, unsigned int id,
+		       const char *message_id, const char *in_reply_to,
+		       const char *references);
+
+void mail_thread_finish(struct mail_thread_context *ctx);
+
+#endif