changeset 8146:70b53e9b232e HEAD

Rewrote thread indexing code. It's a lot simpler and takes less disk space. We no longer try to keep a hash table and the entire thread tree stored on disk. Instead we keep a simple Message-ID string (actually just "uid, ref#" pointer) -> unique index number mapping on disk, read it to memory and use it to build the thread tree. After the initial build the thread tree is still updated incrementally.
author Timo Sirainen <tss@iki.fi>
date Mon, 01 Sep 2008 15:17:00 +0300
parents b296beccb70e
children b7e097200892
files .hgignore src/imap/cmd-thread.c src/lib-index/Makefile.am src/lib-index/mail-hash.c src/lib-index/mail-hash.h src/lib-index/mail-index-private.h src/lib-index/mail-index-strmap.c src/lib-index/mail-index-strmap.h src/lib-index/mail-index.c src/lib-storage/index/Makefile.am src/lib-storage/index/index-search.c src/lib-storage/index/index-thread-finish.c src/lib-storage/index/index-thread-links.c src/lib-storage/index/index-thread-private.h src/lib-storage/index/index-thread.c src/lib-storage/mail-thread.h src/util/Makefile.am src/util/threadview.c
diffstat 18 files changed, 2217 insertions(+), 2419 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Mon Sep 01 15:11:54 2008 +0300
+++ b/.hgignore	Mon Sep 01 15:17:00 2008 +0300
@@ -78,5 +78,6 @@
 src/util/logview
 src/util/maildirlock
 src/util/rawlog
+src/util/threadview
 src/plugins/quota/rquota_xdr.c
 src/plugins/quota/rquota.h
--- a/src/imap/cmd-thread.c	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/imap/cmd-thread.c	Mon Sep 01 15:17:00 2008 +0300
@@ -80,27 +80,18 @@
 {
 	struct mail_thread_context *ctx;
 	string_t *str;
-	bool reset = FALSE;
 	int ret;
 
 	i_assert(thread_type == MAIL_THREAD_REFERENCES ||
 		 thread_type == MAIL_THREAD_REFERENCES2);
 
 	str = str_new(default_pool, 1024);
-	for (;;) {
-		ret = mail_thread_init(cmd->client->mailbox, reset,
-				       search_args, &ctx);
-		if (ret == 0) {
-			ret = imap_thread_write_reply(ctx, str, thread_type,
-						      !cmd->uid);
-			mail_thread_deinit(&ctx);
-		}
-
-		if (ret == 0 || reset)
-			break;
-		/* try again with in-memory hash */
-		reset = TRUE;
-		str_truncate(str, 0);
+	ret = mail_thread_init(cmd->client->mailbox,
+			       search_args, &ctx);
+	if (ret == 0) {
+		ret = imap_thread_write_reply(ctx, str, thread_type,
+					      !cmd->uid);
+		mail_thread_deinit(&ctx);
 	}
 
 	if (ret == 0) {
--- a/src/lib-index/Makefile.am	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-index/Makefile.am	Mon Sep 01 15:17:00 2008 +0300
@@ -12,7 +12,6 @@
 	mail-cache-lookup.c \
 	mail-cache-transaction.c \
 	mail-cache-sync-update.c \
-        mail-hash.c \
         mail-index.c \
         mail-index-dummy-view.c \
         mail-index-fsck.c \
@@ -21,6 +20,7 @@
         mail-index-modseq.c \
         mail-index-transaction.c \
         mail-index-transaction-view.c \
+        mail-index-strmap.c \
         mail-index-sync.c \
         mail-index-sync-ext.c \
         mail-index-sync-keywords.c \
@@ -38,10 +38,10 @@
 headers = \
 	mail-cache.h \
 	mail-cache-private.h \
-        mail-hash.h \
 	mail-index.h \
         mail-index-modseq.h \
 	mail-index-private.h \
+        mail-index-strmap.h \
 	mail-index-sync-private.h \
 	mail-index-transaction-private.h \
 	mail-index-view-private.h \
--- a/src/lib-index/mail-hash.c	Mon Sep 01 15:11:54 2008 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1136 +0,0 @@
-/* Copyright (c) 2006-2008 Dovecot authors, see the included COPYING file */
-
-#include "lib.h"
-#include "ioloop.h"
-#include "array.h"
-#include "primes.h"
-#include "nfs-workarounds.h"
-#include "file-dotlock.h"
-#include "file-set-size.h"
-#include "read-full.h"
-#include "write-full.h"
-#include "mmap-util.h"
-#include "nfs-workarounds.h"
-#include "mail-index-private.h"
-#include "mail-hash.h"
-
-#include <stdio.h>
-#include <stddef.h>
-#include <utime.h>
-#include <sys/stat.h>
-
-/* How large to create the file initially */
-#define FILE_SIZE_INIT_PERCENTAGE 120
-/* How much larger to grow the file when it needs to be done */
-#define MAIL_HASH_GROW_PERCENTAGE 20
-/* Minimum hash size to use */
-#define MAIL_HASH_MIN_SIZE 109
-
-#define MAIL_HASH_SHRINK_PRESSURE 0.3
-#define MAIL_HASH_GROW_PRESSURE 2
-
-#define MAIL_HASH_TIMEOUT_SECS 60
-
-struct mail_hash {
-	struct mail_index *index;
-
-	hash_callback_t *key_hash_cb;
-	mail_hash_ctx_cmp_callback_t *key_compare_cb;
-	mail_hash_remap_callback_t *remap_callback;
-	hash_callback_t *rec_hash_cb;
-	void *cb_context;
-	unsigned int transaction_count;
-
-	char *filepath;
-	char *suffix;
-	int fd;
-	unsigned int record_size;
-
-	dev_t dev;
-	ino_t ino;
-
-	void *mmap_base;
-	size_t mmap_size;
-
-	time_t mtime, mapped_mtime;
-	size_t change_offset_start, change_offset_end;
-
-	int lock_type;
-	struct file_lock *file_lock;
-	struct dotlock *dotlock;
-	struct dotlock_settings dotlock_settings;
-
-	const struct mail_hash_header *hdr;
-
-	unsigned int in_memory:1;
-	unsigned int recreate:1;
-	unsigned int recreated:1;
-};
-
-#define HASH_RECORD_IDX(trans, idx) \
-	PTR_OFFSET((trans)->records_base, (idx) * (trans)->hdr.record_size)
-
-struct mail_hash_transaction {
-	struct mail_hash *hash;
-
-	struct mail_hash_header hdr;
-	/* hash size is [hdr.hash_size] */
-	uint32_t *hash_base;
-	/* record [0] is always unused */
-	void *records_base;
-	/* number of records in records_base.
-	   base_count + inserts.count == hdr.record_count */
-	unsigned int base_count;
-
-	/* bit array of modified data. each bit represents 1024 bytes of the
-	   hash file. used only for data read into memory from hash (not
-	   for mmaped data) */
-	ARRAY_TYPE(uint32_t) updates;
-	/* Records inserted within this transaction */
-	ARRAY_TYPE(mail_hash_record) inserts;
-	unsigned int next_grow_hashed_count;
-
-	uint32_t *hash_buf;
-	uint32_t records_base_1; /* used as records_base if base_count=1 */
-
-	unsigned int failed:1;
-	unsigned int mapped:1;
-};
-
-struct mail_hash_iterate_context {
-	struct mail_hash_transaction *trans;
-	uint32_t next_idx;
-	unsigned int iter_count;
-};
-
-const struct dotlock_settings default_dotlock_settings = {
-	MEMBER(temp_prefix) NULL,
-	MEMBER(lock_suffix) NULL,
-
-	MEMBER(timeout) 10,
-	MEMBER(stale_timeout) 30
-};
-
-static void mail_hash_set_syscall_error(struct mail_hash *hash,
-					const char *function)
-{
-	if (ENOSPACE(errno)) {
-		hash->index->nodiskspace = TRUE;
-		return;
-	}
-
-	mail_index_set_error(hash->index,
-			     "%s failed with index hash file %s: %m",
-			     function, hash->filepath);
-}
-
-void mail_hash_set_corrupted(struct mail_hash *hash, const char *error)
-{
-	mail_index_set_error(hash->index, "Corrupted index hash file %s: %s",
-			     hash->filepath, error);
-	if (unlink(hash->filepath) < 0 && errno != ENOENT)
-		mail_hash_set_syscall_error(hash, "unlink()");
-}
-
-static inline struct mail_hash_record *
-mail_hash_idx(struct mail_hash_transaction *trans, uint32_t idx)
-{
-	if (idx < trans->base_count)
-		return HASH_RECORD_IDX(trans, idx);
-
-	i_assert(idx < trans->hdr.record_count);
-	return array_idx_modifiable(&trans->inserts, idx - trans->base_count);
-}
-
-static void mail_hash_file_close(struct mail_hash *hash)
-{
-	i_assert(hash->transaction_count == 0);
-
-	if (hash->file_lock != NULL)
-		file_lock_free(&hash->file_lock);
-
-	if (hash->mmap_base != NULL) {
-		if (munmap(hash->mmap_base, hash->mmap_size) < 0)
-			mail_hash_set_syscall_error(hash, "munmap()");
-		hash->mmap_base = NULL;
-		hash->mmap_size = 0;
-	}
-	hash->ino = 0;
-	hash->mapped_mtime = 0;
-
-	if (hash->fd != -1) {
-		if (close(hash->fd) < 0)
-			mail_hash_set_syscall_error(hash, "close()");
-		hash->fd = -1;
-	}
-
-	hash->hdr = NULL;
-	hash->recreate = FALSE;
-	hash->recreated = FALSE;
-}
-
-struct mail_hash *
-mail_hash_alloc(struct mail_index *index, const char *suffix,
-		unsigned int record_size,
-		hash_callback_t *key_hash_cb,
-		hash_callback_t *rec_hash_cb,
-		mail_hash_ctx_cmp_callback_t *key_compare_cb,
-		mail_hash_remap_callback_t *remap_callback,
-		void *context)
-{
-	struct mail_hash *hash;
-
-	i_assert(record_size >= sizeof(struct mail_hash_record));
-
-	hash = i_new(struct mail_hash, 1);
-	hash->index = index;
-	hash->in_memory = MAIL_INDEX_IS_IN_MEMORY(index) || suffix == NULL;
-	hash->filepath = hash->in_memory ? i_strdup("(in-memory hash)") :
-		i_strconcat(index->filepath, suffix, NULL);
-	i_assert(hash->filepath != NULL);
-
-	hash->suffix = i_strdup(suffix);
-	hash->record_size = record_size;
-	hash->fd = -1;
-	hash->lock_type = F_UNLCK;
-	hash->dotlock_settings = default_dotlock_settings;
-	hash->dotlock_settings.use_excl_lock = index->use_excl_dotlocks;
-	hash->dotlock_settings.nfs_flush = index->nfs_flush;
-
-	hash->key_hash_cb = key_hash_cb;
-	hash->rec_hash_cb = rec_hash_cb;
-	hash->key_compare_cb = key_compare_cb;
-	hash->remap_callback = remap_callback,
-	hash->cb_context = context;
-	return hash;
-}
-
-void mail_hash_free(struct mail_hash **_hash)
-{
-	struct mail_hash *hash = *_hash;
-
-	*_hash = NULL;
-
-	mail_hash_file_close(hash);
-	i_free(hash->filepath);
-	i_free(hash->suffix);
-	i_free(hash);
-}
-
-static void
-mail_hash_header_init(struct mail_hash *hash, unsigned int hash_size,
-		      struct mail_hash_header *hdr)
-{
-	memset(hdr, 0, sizeof(*hdr));
-	hdr->version = MAIL_HASH_VERSION;
-	hdr->base_header_size = sizeof(*hdr);
-	hdr->header_size = hdr->base_header_size;
-	hdr->record_size = hash->record_size;
-	/* note that since the index may not have been synced yet, the
-	   uid_validity may be 0 */
-	hdr->uid_validity = hash->index->map->hdr.uid_validity;
-
-	hdr->hash_size = I_MAX(primes_closest(hash_size), MAIL_HASH_MIN_SIZE);
-	hdr->record_count = 1; /* [0] always exists */
-	hdr->reset_counter = hash->hdr == NULL ? ioloop_time :
-		hash->hdr->reset_counter + 1;
-}
-
-static bool mail_hash_check_header(struct mail_hash *hash,
-				   const struct mail_hash_header *hdr)
-{
-	uoff_t file_size;
-
-	if (hdr->version != MAIL_HASH_VERSION ||
-	    (hdr->last_uid != 0 &&
-	     hdr->uid_validity != hash->index->map->hdr.uid_validity) ||
-	    (hdr->corrupted && hash->change_offset_end == 0)) {
-		/* silent rebuild */
-		if (unlink(hash->filepath) < 0 && errno != ENOENT)
-			mail_hash_set_syscall_error(hash, "unlink()");
-		return FALSE;
-	}
-
-	if (hdr->record_size != hash->record_size) {
-		mail_hash_set_corrupted(hash, "record_size mismatch");
-		return FALSE;
-	}
-	if (hdr->base_header_size != sizeof(*hdr)) {
-		mail_hash_set_corrupted(hash, "base_header_size mismatch");
-		return FALSE;
-	}
-	if (hdr->header_size < hdr->base_header_size) {
-		mail_hash_set_corrupted(hash, "Invalid header_size");
-		return FALSE;
-	}
-	if (hdr->record_count == 0) {
-		mail_hash_set_corrupted(hash, "Invalid record_count");
-		return FALSE;
-	}
-	if (hdr->hashed_count > hdr->record_count) {
-		mail_hash_set_corrupted(hash, "Invalid hashed_count");
-		return FALSE;
-	}
-	if (hdr->message_count > hdr->record_count - 1) {
-		mail_hash_set_corrupted(hash, "Invalid message_count");
-		return FALSE;
-	}
-	if (hdr->last_uid < hdr->message_count) {
-		mail_hash_set_corrupted(hash, "Invalid last_uid");
-		return FALSE;
-	}
-	if (hdr->uid_validity == 0 && hdr->message_count > 0) {
-		mail_hash_set_corrupted(hash, "Zero uidvalidity");
-		return FALSE;
-	}
-
-	if (hdr->hash_size < primes_closest(1)) {
-		mail_hash_set_corrupted(hash, "Invalid hash_size");
-		return FALSE;
-	}
-
-	file_size = hdr->header_size +
-		hdr->hash_size * sizeof(uint32_t) +
-		hdr->record_count * hdr->record_size;
-	if (hash->mmap_size < file_size) {
-		mail_hash_set_corrupted(hash, "File too small");
-		return FALSE;
-	}
-	return TRUE;
-}
-
-static int mail_hash_file_fstat(struct mail_hash *hash, struct stat *st_r)
-{
-	if (fstat(hash->fd, st_r) < 0) {
-		mail_hash_set_syscall_error(hash, "fstat()");
-		return -1;
-	}
-	hash->dev = st_r->st_dev;
-	hash->ino = st_r->st_ino;
-	return 0;
-}
-
-static int mail_hash_file_map(struct mail_hash *hash, bool full)
-{
-	struct stat st;
-
-	i_assert(hash->transaction_count == 0);
-	i_assert(hash->lock_type != F_UNLCK);
-
-	if (mail_hash_file_fstat(hash, &st) < 0)
-		return -1;
-
-	if (st.st_size < (off_t)sizeof(*hash->hdr)) {
-		mail_hash_set_corrupted(hash, "File too small");
-		return 0;
-	}
-
-	if (!hash->index->mmap_disable) {
-		if (hash->mmap_base != NULL) {
-			if (munmap(hash->mmap_base, hash->mmap_size) < 0)
-				mail_hash_set_syscall_error(hash, "munmap()");
-		}
-		hash->mmap_size = st.st_size;
-		hash->mmap_base = mmap(NULL, hash->mmap_size,
-				       PROT_READ | PROT_WRITE,
-				       MAP_PRIVATE, hash->fd, 0);
-		if (hash->mmap_base == MAP_FAILED) {
-			hash->mmap_size = 0;
-			hash->mmap_base = NULL;
-			mail_hash_set_syscall_error(hash, "mmap()");
-			return -1;
-		}
-		hash->mapped_mtime = st.st_mtime;
-	} else {
-		//FIXME
-	}
-	hash->mtime = st.st_mtime;
-	hash->hdr = hash->mmap_base;
-	return 1;
-}
-
-static int mail_hash_file_map_header(struct mail_hash *hash)
-{
-	int ret;
-
-	if (hash->fd == -1) {
-		hash->hdr = NULL;
-		return 1;
-	}
-
-	if ((ret = mail_hash_file_map(hash, FALSE)) <= 0)
-		return ret;
-
-	return mail_hash_check_header(hash, hash->hdr) ? 1 : 0;
-}
-
-static int mail_hash_open_fd(struct mail_hash *hash)
-{
-	hash->fd = nfs_safe_open(hash->filepath, O_RDWR);
-	if (hash->fd == -1) {
-		if (errno == ENOENT)
-			return 0;
-		mail_hash_set_syscall_error(hash, "open()");
-		return -1;
-	}
-	return 1;
-}
-
-static int mail_hash_reopen_if_needed(struct mail_hash *hash)
-{
-	struct stat st;
-
-	if (hash->fd == -1)
-		return mail_hash_open_fd(hash);
-
-	if (hash->index->nfs_flush)
-		nfs_flush_file_handle_cache(hash->filepath);
-
-	if (hash->ino == 0) {
-		if (mail_hash_file_fstat(hash, &st) < 0)
-			return -1;
-	}
-
-	if (nfs_safe_stat(hash->filepath, &st) < 0) {
-		if (errno != ENOENT) {
-			mail_hash_set_syscall_error(hash, "stat()");
-			return -1;
-		}
-	} else if (st.st_ino == hash->ino && CMP_DEV_T(st.st_dev, hash->dev)) {
-		/* the file looks the same */
-		if (!hash->index->nfs_flush || fstat(hash->fd, &st) == 0) {
-			/* it is the same */
-			return 0;
-		}
-		if (errno != ESTALE) {
-			mail_hash_set_syscall_error(hash, "fstat()");
-			return -1;
-		}
-		/* ESTALE - another file renamed over */
-	}
-	mail_hash_file_close(hash);
-	return mail_hash_open_fd(hash);
-}
-
-static int
-mail_hash_file_lock(struct mail_hash *hash, int lock_type, bool try_lock)
-{
-	enum dotlock_create_flags flags;
-	unsigned int timeout;
-	int ret;
-
-	i_assert(hash->fd != -1);
-
-	if (hash->index->lock_method != FILE_LOCK_METHOD_DOTLOCK) {
-		i_assert(hash->file_lock == NULL);
-
-		timeout = !try_lock ? MAIL_HASH_TIMEOUT_SECS : 0;
-		ret = file_wait_lock(hash->fd, hash->filepath, lock_type,
-				     hash->index->lock_method,
-				     timeout, &hash->file_lock);
-		if (ret < 0 || (ret == 0 && !try_lock)) {
-			mail_hash_set_syscall_error(hash,
-						    "file_wait_lock()");
-		}
-	} else {
-		i_assert(hash->dotlock == NULL);
-
-		flags = try_lock ? DOTLOCK_CREATE_FLAG_NONBLOCK : 0;
-		ret = file_dotlock_create(&hash->dotlock_settings,
-					  hash->filepath, flags,
-					  &hash->dotlock);
-		if (ret < 0 || (ret == 0 && !try_lock)) {
-			mail_hash_set_syscall_error(hash,
-						    "file_dotlock_create()");
-		}
-	}
-	return ret;
-}
-
-int mail_hash_lock_shared(struct mail_hash *hash)
-{
-	int ret;
-
-	i_assert(hash->lock_type == F_UNLCK);
-
-	if (hash->in_memory) {
-		if (hash->hdr == NULL)
-			return 0;
-		hash->lock_type = F_RDLCK;
-		return 1;
-	}
-
-	if (hash->fd == -1) {
-		if ((ret = mail_hash_open_fd(hash)) <= 0)
-			return ret;
-	}
-
-	do {
-		if ((ret = mail_hash_file_lock(hash, F_RDLCK, FALSE)) <= 0)
-			return -1;
-	} while ((ret = mail_hash_reopen_if_needed(hash)) > 0);
-	if (ret < 0 || hash->fd == -1)
-		return ret;
-
-	hash->lock_type = F_RDLCK;
-	mail_index_flush_read_cache(hash->index, hash->filepath,
-				    hash->fd, TRUE);
-	if ((ret = mail_hash_file_map_header(hash)) <= 0) {
-		mail_hash_unlock(hash);
-		return ret;
-	}
-	return 1;
-}
-
-static int
-mail_hash_lock_exclusive_fd(struct mail_hash *hash,
-			    enum mail_hash_lock_flags flags, bool *created_r)
-{
-	bool exists = TRUE;
-	int ret;
-
-	i_assert(hash->file_lock == NULL);
-	i_assert(hash->dotlock == NULL);
-
-	*created_r = FALSE;
-
-	if (hash->index->lock_method == FILE_LOCK_METHOD_DOTLOCK) {
-		/* use dotlocking */
-	} else if (hash->fd == -1 && (ret = mail_hash_open_fd(hash)) <= 0) {
-		if (ret < 0 ||
-		    (flags & MAIL_HASH_LOCK_FLAG_CREATE_MISSING) == 0)
-			return ret;
-		/* the file doesn't exist - we need to use dotlocking */
-		exists = FALSE;
-	} else {
-		/* first try to lock the file descriptor */
-		ret = mail_hash_file_lock(hash, F_WRLCK, TRUE);
-		if (ret != 0) {
-			/* success / error */
-			return ret;
-		}
-		if ((flags & MAIL_HASH_LOCK_FLAG_TRY) != 0)
-			return 0;
-
-		/* already locked. if it's only read-locked, we can
-		   overwrite the file. first wait for a shared lock. */
-		if (mail_hash_lock_shared(hash) <= 0)
-			return -1;
-		/* try once again if we can upgrade our shared lock to
-		   an exclusive lock */
-		ret = file_lock_try_update(hash->file_lock, F_WRLCK);
-		if (ret != 0)
-			return ret;
-		/* still no luck - fallback to dotlocking */
-	}
-	if (file_dotlock_create(&hash->dotlock_settings, hash->filepath,
-				0, &hash->dotlock) <= 0) {
-		mail_hash_set_syscall_error(hash, "file_dotlock_create()");
-		return -1;
-	}
-	if (!exists) {
-		/* file didn't exist - see if someone just created it */
-		i_assert(hash->fd == -1);
-		ret = mail_hash_open_fd(hash);
-		if (ret != 0) {
-			(void)file_dotlock_delete(&hash->dotlock);
-			if (ret < 0)
-				return -1;
-
-			/* the file was created - we need to have it locked,
-			   so retry this operation */
-			return mail_hash_lock_exclusive_fd(hash, flags,
-							   created_r);
-		}
-		*created_r = TRUE;
-	}
-	/* other sessions are reading the file, we must not overwrite */
-	hash->recreate = TRUE;
-	return 1;
-}
-
-static int mail_hash_lock_excl_full(struct mail_hash *hash,
-				    enum mail_hash_lock_flags flags,
-				    bool *created_r)
-{
-	bool create_missing = (flags & MAIL_HASH_LOCK_FLAG_CREATE_MISSING) != 0;
-	int ret;
-
-	i_assert(hash->lock_type == F_UNLCK);
-
-	if (hash->in_memory) {
-		if (hash->hdr == NULL && !create_missing)
-			return 0;
-		hash->lock_type = F_WRLCK;
-		return 1;
-	}
-
-	if ((ret = mail_hash_lock_exclusive_fd(hash, flags, created_r)) <= 0) {
-		mail_hash_unlock(hash);
-		return ret;
-	}
-	hash->lock_type = F_WRLCK;
-
-	mail_index_flush_read_cache(hash->index, hash->filepath,
-				    hash->fd, TRUE);
-	if ((ret = mail_hash_file_map_header(hash)) <= 0) {
-		mail_hash_unlock(hash);
-		if (ret == 0 && create_missing) {
-			/* the broken file was unlinked - try again */
-			mail_hash_file_close(hash);
-			return mail_hash_lock_excl_full(hash, flags, created_r);
-		}
-		return ret;
-	}
-	return 1;
-}
-
-int mail_hash_lock_exclusive(struct mail_hash *hash,
-			     enum mail_hash_lock_flags flags)
-{
-	bool created;
-
-	return mail_hash_lock_excl_full(hash, flags, &created);
-}
-
-void mail_hash_unlock(struct mail_hash *hash)
-{
-	if (hash->recreated)
-		mail_hash_file_close(hash);
-
-	if (hash->file_lock != NULL)
-		file_unlock(&hash->file_lock);
-	if (hash->dotlock != NULL)
-		(void)file_dotlock_delete(&hash->dotlock);
-	hash->lock_type = F_UNLCK;
-}
-
-int mail_hash_get_lock_type(struct mail_hash *hash)
-{
-	return hash->lock_type;
-}
-
-int mail_hash_create_excl_locked(struct mail_hash *hash)
-{
-	bool created;
-
-	if (mail_hash_lock_excl_full(hash, MAIL_HASH_LOCK_FLAG_CREATE_MISSING,
-				     &created) <= 0)
-		return -1;
-
-	if (!created) {
-		mail_hash_unlock(hash);
-		return 0;
-	}
-	return 1;
-}
-
-static void mail_hash_resize(struct mail_hash_transaction *trans)
-{
-	struct mail_hash_record *rec;
-	unsigned int idx, new_size, hash_idx, hash_key;
-
-	new_size = I_MAX(primes_closest(trans->hdr.hashed_count),
-			 MAIL_HASH_MIN_SIZE);
-	i_assert(new_size != trans->hdr.hash_size);
-	trans->hdr.hash_size = new_size;
-
-	i_free(trans->hash_buf);
-	trans->hash_buf = i_new(uint32_t, trans->hdr.hash_size);
-	trans->hash_base = trans->hash_buf;
-
-	for (idx = 1; idx < trans->hdr.record_count; idx++) {
-		rec = mail_hash_idx(trans, idx);
-		if (MAIL_HASH_RECORD_IS_DELETED(rec))
-			continue;
-
-		hash_key = trans->hash->rec_hash_cb(rec);
-		if (hash_key == 0)
-			continue;
-
-		/* found a hashed record, move it to its new position */
-		hash_idx = hash_key % trans->hdr.hash_size;
-		rec->next_idx = trans->hash_buf[hash_idx];
-		trans->hash_buf[hash_idx] = idx;
-	}
-
-	trans->next_grow_hashed_count =
-		trans->hdr.hash_size * MAIL_HASH_GROW_PRESSURE;
-	i_assert(trans->hdr.hashed_count < trans->next_grow_hashed_count);
-}
-
-struct mail_hash_transaction *
-mail_hash_transaction_begin(struct mail_hash *hash, unsigned int min_hash_size)
-{
-	struct mail_hash_transaction *trans;
-
-	i_assert(hash->lock_type != F_UNLCK);
-
-	trans = i_new(struct mail_hash_transaction, 1);
-	trans->hash = hash;
-	if (hash->hdr != NULL)
-		trans->hdr = *hash->hdr;
-	else {
-		mail_hash_header_init(hash, min_hash_size, &trans->hdr);
-		trans->mapped = TRUE;
-	}
-	trans->base_count = trans->hdr.record_count;
-	if (trans->base_count <= 1) {
-		/* empty hash */
-		trans->hash_buf = i_new(uint32_t, trans->hdr.hash_size);
-		trans->hash_base = trans->hash_buf;
-		trans->records_base = &trans->records_base_1;
-	} else {
-		trans->hash_base =
-			PTR_OFFSET(hash->mmap_base, hash->hdr->header_size);
-		trans->records_base = &trans->hash_base[hash->hdr->hash_size];
-	}
-
-	trans->next_grow_hashed_count =
-		trans->hdr.hash_size * MAIL_HASH_GROW_PRESSURE;
-	hash->transaction_count++;
-	return trans;
-}
-
-int mail_hash_transaction_write(struct mail_hash_transaction *trans)
-{
-	struct mail_hash *hash = trans->hash;
-	const struct mail_hash_record *inserts;
-	unsigned int size, count, existing_size;
-	const char *temp_path = NULL;
-	uoff_t offset;
-	float nodes_per_list;
-	int fd = hash->fd;
-
-	i_assert(hash->lock_type == F_WRLCK);
-	i_assert(trans->hdr.hashed_count <= trans->hdr.record_count);
-
-	if (trans->failed)
-		return -1;
-	if (!array_is_created(&trans->inserts) &&
-	    !array_is_created(&trans->updates)) {
-		/* nothing changed */
-		return 0;
-	}
-
-	/* see if hash needs resizing */
-	nodes_per_list = (float)trans->hdr.hashed_count /
-		(float)trans->hdr.hash_size;
-	if ((nodes_per_list < MAIL_HASH_SHRINK_PRESSURE &&
-	     trans->hdr.hash_size > MAIL_HASH_MIN_SIZE) ||
-	    nodes_per_list > MAIL_HASH_GROW_PRESSURE)
-		mail_hash_resize(trans);
-
-	if (trans->hash->in_memory)
-		return 0;
-
-	if (hash->recreate || hash->hdr->hash_size != trans->hdr.hash_size) {
-		/* recreate the file instead of overwriting */
-		fd = -1;
-	}
-
-	existing_size = sizeof(trans->hdr) +
-		trans->hdr.hash_size * sizeof(uint32_t) +
-		trans->base_count * trans->hdr.record_size;
-	if (fd == -1) {
-		temp_path = t_strconcat(hash->filepath, ".tmp", NULL);
-		fd = open(temp_path, O_RDWR | O_CREAT | O_TRUNC,
-			  hash->index->mode);
-		if (fd == -1) {
-			if (ENOSPACE(errno)) {
-				hash->index->nodiskspace = TRUE;
-				return -1;
-			}
-			mail_index_set_error(hash->index,
-				"creat(%s) failed: %m", temp_path);
-			return -1;
-		}
-	}
-
-	if (pwrite_full(fd, &trans->hdr, sizeof(trans->hdr), 0) < 0) {
-		mail_hash_set_syscall_error(hash, "pwrite()");
-		return -1;
-	}
-	/* FIXME: use updates array */
-	offset = sizeof(trans->hdr);
-	if (pwrite_full(fd, trans->hash_base,
-			trans->hdr.hash_size * sizeof(uint32_t), offset) < 0) {
-		mail_hash_set_syscall_error(hash, "pwrite()");
-		return -1;
-	}
-	offset += trans->hdr.hash_size * sizeof(uint32_t);
-	/* if there's only the first null record, don't bother writing it.
-	   especially because then records_base may point to sizeof(uint32_t)
-	   instead of hdr.record_size */
-	if (trans->base_count > 1) {
-		if (pwrite_full(fd, trans->records_base,
-				trans->base_count * trans->hdr.record_size,
-				offset) < 0) {
-			mail_hash_set_syscall_error(hash, "pwrite()");
-			return -1;
-		}
-	}
-
-	/* write the new data */
-	if (array_is_created(&trans->inserts)) {
-		inserts = array_get(&trans->inserts, &count);
-		size = count * trans->hdr.record_size;
-		if (pwrite_full(fd, inserts, size, existing_size) < 0) {
-			mail_hash_set_syscall_error(hash, "pwrite()");
-			return -1;
-		}
-	}
-	if (temp_path != NULL) {
-		if (rename(temp_path, hash->filepath) < 0) {
-			mail_index_set_error(hash->index,
-				"rename(%s, %s) failed: %m",
-				temp_path, hash->filepath);
-			return -1;
-		}
-		if (close(fd) < 0)
-			mail_hash_set_syscall_error(hash, "close()");
-		/* we must not overwrite before reopening the file */
-		hash->recreate = TRUE;
-		hash->recreated = TRUE;
-	}
-	return 0;
-}
-
-void mail_hash_transaction_end(struct mail_hash_transaction **_trans)
-{
-	struct mail_hash_transaction *trans = *_trans;
-
-	*_trans = NULL;
-
-	trans->hash->transaction_count--;
-	if (array_is_created(&trans->inserts))
-		array_free(&trans->inserts);
-	if (array_is_created(&trans->updates))
-		array_free(&trans->updates);
-	i_free(trans->hash_buf);
-	i_free(trans);
-}
-
-bool mail_hash_transaction_is_broken(struct mail_hash_transaction *trans)
-{
-	return trans->failed;
-}
-
-bool mail_hash_transaction_is_in_memory(struct mail_hash_transaction *trans)
-{
-	return trans->hash->in_memory;
-}
-
-struct mail_hash *
-mail_hash_transaction_get_hash(struct mail_hash_transaction *trans)
-{
-	return trans->hash;
-}
-
-void mail_hash_transaction_set_corrupted(struct mail_hash_transaction *trans,
-					 const char *error)
-{
-	trans->failed = TRUE;
-	mail_hash_set_corrupted(trans->hash, error);
-}
-
-void mail_hash_reset(struct mail_hash_transaction *trans)
-{
-	mail_hash_header_init(trans->hash, trans->hdr.hash_size / 2,
-			      &trans->hdr);
-	trans->mapped = TRUE;
-	trans->base_count = trans->hdr.record_count;
-	i_assert(trans->base_count == 1);
-
-	i_free(trans->hash_buf);
-	trans->hash_buf = i_new(uint32_t, trans->hdr.hash_size);
-	trans->hash_base = trans->hash_buf;
-	trans->records_base = &trans->records_base_1;
-}
-
-int mail_hash_map_file(struct mail_hash_transaction *trans)
-{
-	if (!trans->mapped) {
-		if (mail_hash_file_map(trans->hash, TRUE) <= 0) {
-			trans->failed = TRUE;
-			return -1;
-		}
-		trans->mapped = TRUE;
-	}
-	return 0;
-}
-
-struct mail_hash_header *
-mail_hash_get_header(struct mail_hash_transaction *trans)
-{
-	return &trans->hdr;
-}
-
-static void mail_hash_iterate_init(struct mail_hash_iterate_context *iter,
-				   struct mail_hash_transaction *trans,
-				   uint32_t start_idx)
-{
-	memset(iter, 0, sizeof(*iter));
-	iter->trans = trans;
-	iter->next_idx = start_idx;
-}
-
-static int mail_hash_iterate_next(struct mail_hash_iterate_context *iter,
-				  uint32_t *idx_r)
-{
-	struct mail_hash_record *rec;
-	uint32_t idx = iter->next_idx;
-
-	if (idx == 0)
-		return 0;
-	if (unlikely(idx >= iter->trans->hdr.record_count)) {
-		mail_hash_transaction_set_corrupted(iter->trans,
-			"Index points outside file");
-		return -1;
-	}
-	rec = mail_hash_idx(iter->trans, idx);
-	iter->next_idx = rec->next_idx;
-
-	if (++iter->iter_count > iter->trans->hdr.record_count) {
-		/* we've iterated through more indexes than there exist.
-		   we must be looping. */
-		mail_hash_transaction_set_corrupted(iter->trans,
-			"next_iter loops");
-		return -1;
-	}
-	*idx_r = idx;
-	return 1;
-}
-
-void *mail_hash_lookup(struct mail_hash_transaction *trans, const void *key,
-		       uint32_t *idx_r)
-{
-	struct mail_hash *hash = trans->hash;
-	struct mail_hash_iterate_context iter;
-	unsigned int hash_idx;
-	uint32_t idx;
-	int ret;
-
-	hash_idx = hash->key_hash_cb(key) % trans->hdr.hash_size;
-	mail_hash_iterate_init(&iter, trans, trans->hash_base[hash_idx]);
-
-	while ((ret = mail_hash_iterate_next(&iter, &idx)) > 0) {
-		if (hash->key_compare_cb(trans, key, idx, hash->cb_context))
-			break;
-	}
-
-	if (ret <= 0) {
-		*idx_r = 0;
-		return NULL;
-	} else {
-		*idx_r = idx;
-		return mail_hash_idx(trans, idx);
-	}
-}
-
-void *mail_hash_lookup_idx(struct mail_hash_transaction *trans, uint32_t idx)
-{
-	i_assert(idx > 0);
-
-	if (idx >= trans->hdr.record_count) {
-		mail_hash_transaction_set_corrupted(trans,
-			"Index points outside file");
-		/* return pointer to the first dummy record */
-		idx = 0;
-	}
-
-	return mail_hash_idx(trans, idx);
-}
-
-static uoff_t
-mail_hash_idx_to_offset(struct mail_hash_transaction *trans, uint32_t idx)
-{
-	return trans->hdr.header_size +
-		trans->hdr.hash_size * sizeof(uint32_t) +
-		trans->hdr.record_size * idx;
-}
-
-static void
-mail_hash_update_offset(struct mail_hash_transaction *trans,
-			uoff_t offset, unsigned int size)
-{
-	uint32_t *p;
-	unsigned int pos = offset / 1024;
-	unsigned int pos2 = (offset + size - 1) / 1024;
-
-	if (!array_is_created(&trans->updates))
-		i_array_init(&trans->updates, I_MAX(pos, 256));
-
-	while (pos <= pos2) {
-		p = array_idx_modifiable(&trans->updates, pos / 32);
-		*p |= 1 << (pos % 32);
-		pos++;
-	}
-}
-
-static void mail_hash_update_hash_idx(struct mail_hash_transaction *trans,
-				      uint32_t hash_idx)
-{
-	size_t offset;
-
-	offset = trans->hdr.header_size + hash_idx * sizeof(uint32_t);
-	mail_hash_update_offset(trans, offset, sizeof(uint32_t));
-}
-
-static void mail_hash_insert_idx(struct mail_hash_transaction *trans,
-				 const void *value, uint32_t *idx_r)
-{
-	struct mail_hash_record *rec;
-	uint32_t idx;
-
-	if (trans->hdr.first_hole_idx != 0) {
-		/* allocate the record from the first hole */
-		idx = trans->hdr.first_hole_idx;
-		if (idx >= trans->hdr.record_count) {
-			mail_hash_transaction_set_corrupted(trans,
-				"Corrupted first_hole_idx");
-			idx = 0;
-		}
-		rec = mail_hash_idx(trans, idx);
-
-		memcpy(&trans->hdr.first_hole_idx, rec + 1,
-		       sizeof(trans->hdr.first_hole_idx));
-		mail_hash_update(trans, idx);
-	} else {
-		idx = trans->hdr.record_count++;
-		if (!array_is_created(&trans->inserts)) {
-			array_create(&trans->inserts, default_pool,
-				     trans->hdr.record_size, 128);
-		}
-		rec = array_append_space(&trans->inserts);
-	}
-
-	memcpy(rec, value, trans->hdr.record_size);
-	rec->next_idx = 0;
-
-	*idx_r = idx;
-}
-
-static void mail_hash_insert_hash(struct mail_hash_transaction *trans,
-				  uint32_t hash_key, uint32_t idx)
-{
-	struct mail_hash_record *rec;
-	uint32_t hash_idx;
-
-	if (trans->hdr.hashed_count >= trans->next_grow_hashed_count)
-		mail_hash_resize(trans);
-
-	hash_idx = hash_key % trans->hdr.hash_size;
-
-	rec = mail_hash_idx(trans, idx);
-	rec->next_idx = trans->hash_base[hash_idx];
-	trans->hash_base[hash_idx] = idx;
-
-	mail_hash_update_hash_idx(trans, hash_idx);
-	trans->hdr.hashed_count++;
-}
-
-void mail_hash_insert(struct mail_hash_transaction *trans, const void *key,
-		      const void *value, uint32_t *idx_r)
-{
-	uint32_t hash_key;
-
-	mail_hash_insert_idx(trans, value, idx_r);
-	if (key == NULL)
-		hash_key = 0;
-	else {
-		hash_key = trans->hash->key_hash_cb(key);
-		mail_hash_insert_hash(trans, hash_key, *idx_r);
-	}
-	i_assert(trans->hash->rec_hash_cb(value) == hash_key);
-}
-
-void mail_hash_update(struct mail_hash_transaction *trans, uint32_t idx)
-{
-	uoff_t offset;
-
-	i_assert(idx > 0 && idx < trans->hdr.record_count);
-
-	offset = mail_hash_idx_to_offset(trans, idx);
-	mail_hash_update_offset(trans, offset,
-				trans->hdr.record_size);
-}
-
-static void
-mail_hash_remove_idx(struct mail_hash_transaction *trans, uint32_t idx)
-{
-	struct mail_hash_record *rec;
-
-	if (idx+1 == trans->hdr.record_count) {
-		/* removing last record */
-		trans->hdr.record_count--;
-	} else {
-		/* mark the record expunged */
-		rec = mail_hash_idx(trans, idx);
-		rec->next_idx = (uint32_t)-1;
-		/* update the linked list of holes */
-		i_assert(trans->hdr.record_size >=
-			 sizeof(*rec) + sizeof(trans->hdr.first_hole_idx));
-		memcpy(rec+1, &trans->hdr.first_hole_idx,
-		       sizeof(trans->hdr.first_hole_idx));
-		trans->hdr.first_hole_idx = idx;
-
-		mail_hash_update(trans, idx);
-	}
-}
-
-void mail_hash_remove(struct mail_hash_transaction *trans,
-		      uint32_t idx, uint32_t key_hash)
-{
-	struct mail_hash_record *rec, *rec2;
-	unsigned int hash_idx;
-	uint32_t idx2;
-	int ret;
-
-	i_assert(idx > 0 && idx < trans->hdr.record_count);
-
-	if (key_hash == 0) {
-		/* key not in hash table */
-		mail_hash_remove_idx(trans, idx);
-		return;
-	}
-
-	rec = mail_hash_idx(trans, idx);
-
-	hash_idx = key_hash % trans->hdr.hash_size;
-	idx2 = trans->hash_base[hash_idx];
-	if (idx2 == idx) {
-		/* first in the hash table */
-		trans->hash_base[hash_idx] = rec->next_idx;
-		mail_hash_update_hash_idx(trans, hash_idx);
-	} else {
-		/* find the previous record */
-		struct mail_hash_iterate_context iter;
-
-		mail_hash_iterate_init(&iter, trans, idx2);
-		while ((ret = mail_hash_iterate_next(&iter, &idx2)) > 0) {
-			rec2 = mail_hash_idx(trans, idx2);
-			if (rec2->next_idx == idx)
-				break;
-		}
-		if (ret <= 0) {
-			if (ret == 0) {
-				mail_hash_set_corrupted(trans->hash,
-					"Tried to remove non-existing key");
-			}
-			return;
-		}
-
-		rec2->next_idx = rec->next_idx;
-		mail_hash_update_offset(trans, idx2, trans->hdr.record_size);
-
-		if (trans->hdr.hashed_count == 0) {
-			mail_hash_set_corrupted(trans->hash,
-						"Too low hashed_count");
-			return;
-		}
-	}
-
-	trans->hdr.hashed_count--;
-	mail_hash_remove_idx(trans, idx);
-}
--- a/src/lib-index/mail-hash.h	Mon Sep 01 15:11:54 2008 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,151 +0,0 @@
-#ifndef MAIL_HASH_H
-#define MAIL_HASH_H
-
-#include "hash.h"
-
-#define MAIL_HASH_VERSION 1
-
-struct mail_index;
-struct mail_hash;
-struct mail_hash_transaction;
-
-/* File format:
-
-   [header]
-   [hash_size * (uint32_t idx)]
-   [record_count * hdr.record_size]
-
-   The indexes start from 1, so 0 means "doesn't exist".
-*/
-
-struct mail_hash_header {
-	/* File format version */
-	uint8_t version;
-	/* Corrupted flag is set while file is being modified. */
-	uint8_t corrupted;
-	uint8_t unused[3];
-
-	uint16_t base_header_size;
-	/* Full size of each mail_hash_record */
-	uint16_t record_size;
-	/* Full header size. Currently always the same as base_header_size. */
-	uint32_t header_size;
-	/* Number of records after the hash table, includes [0] and holes */
-	uint32_t record_count;
-	/* Number of messages with non-zero hash */
-	uint32_t hashed_count;
-
-	/* Holes is a linked list of unused records. 0 = no holes. */
-	uint32_t first_hole_idx;
-	/* Hash size as number of elements */
-	uint32_t hash_size;
-
-	/* UID validity. */
-	uint32_t uid_validity;
-	/* The last UID which has been added to this file (but may have been
-	   expunged already) */
-	uint32_t last_uid;
-	/* Number of message records (records with non-zero UID) */
-	uint32_t message_count;
-	/* Increased every time the hash is reset */
-	uint32_t reset_counter;
-};
-
-struct mail_hash_record {
-	/* Linked list of records for hash collisions.
-	   (uint32_t)-1 means this record is deleted */
-	uint32_t next_idx;
-	/* user_data[] */
-};
-ARRAY_DEFINE_TYPE(mail_hash_record, struct mail_hash_record);
-#define MAIL_HASH_RECORD_IS_DELETED(rec) \
-	((rec)->next_idx == (uint32_t)-1)
-
-enum mail_hash_lock_flags {
-	MAIL_HASH_LOCK_FLAG_TRY			= 0x01,
-	MAIL_HASH_LOCK_FLAG_CREATE_MISSING	= 0x02
-};
-
-/* Returns 0 if the pointers are equal. */
-typedef bool mail_hash_ctx_cmp_callback_t(struct mail_hash_transaction *trans,
-					  const void *key, uint32_t idx,
-					  void *context);
-/* map[] contains old -> new index mapping. */
-typedef int mail_hash_remap_callback_t(struct mail_hash_transaction *trans,
-				       const uint32_t *map,
-				       unsigned int map_size, void *context);
-
-/* suffix=NULL keeps the has only in memory */
-struct mail_hash *
-mail_hash_alloc(struct mail_index *index, const char *suffix,
-		unsigned int record_size,
-		hash_callback_t *key_hash_cb,
-		hash_callback_t *rec_hash_cb,
-		mail_hash_ctx_cmp_callback_t *key_compare_cb,
-		mail_hash_remap_callback_t *remap_callback,
-		void *context);
-void mail_hash_free(struct mail_hash **hash);
-
-/* Returns 1 if created and locked, 0 if already exists, -1 if error. */
-int mail_hash_create_excl_locked(struct mail_hash *hash);
-
-/* Lock the file. Returns 1 if locking was successful, 0 if file doesn't exist,
-   -1 if error. */
-int mail_hash_lock_shared(struct mail_hash *hash);
-/* If FLAG_TRY_LOCK is set and file is already locked, return 0.
-   Otherwise return values are identical with mail_hash_lock_shared() */
-int mail_hash_lock_exclusive(struct mail_hash *hash,
-			     enum mail_hash_lock_flags flags);
-void mail_hash_unlock(struct mail_hash *hash);
-/* Returns the current locking state (F_UNLCK, F_RDLCK, F_WRLCK) */
-int mail_hash_get_lock_type(struct mail_hash *hash);
-
-struct mail_hash_transaction *
-mail_hash_transaction_begin(struct mail_hash *hash, unsigned int min_hash_size);
-int mail_hash_transaction_write(struct mail_hash_transaction *trans);
-void mail_hash_transaction_end(struct mail_hash_transaction **trans);
-/* Returns TRUE if transaction is in broken state because of an earlier
-   I/O error or detected file corruption. */
-bool mail_hash_transaction_is_broken(struct mail_hash_transaction *trans);
-/* Returns TRUE if hash is currently being updated in memory. */
-bool mail_hash_transaction_is_in_memory(struct mail_hash_transaction *trans);
-
-/* Returns the hash structure of the transaction. */
-struct mail_hash *
-mail_hash_transaction_get_hash(struct mail_hash_transaction *trans);
-
-/* Clear the entire hash file's contents. */
-void mail_hash_reset(struct mail_hash_transaction *trans);
-/* Read the entire file to memory. */
-int mail_hash_map_file(struct mail_hash_transaction *trans);
-
-/* Returns a modifiable hash header. */
-struct mail_hash_header *
-mail_hash_get_header(struct mail_hash_transaction *trans);
-
-/* Look up key from hash and return its value or NULL if key isn't in the hash.
-   NULL is also returned if the lookup fails because of I/O error or file
-   corruption. */
-void *mail_hash_lookup(struct mail_hash_transaction *trans, const void *key,
-		       uint32_t *idx_r);
-/* Never returns NULL. If the lookup fails the transaction is marked broken
-   and a pointer to a dummy record is returned. */
-void *mail_hash_lookup_idx(struct mail_hash_transaction *trans, uint32_t idx);
-
-/* If key=NULL, it's only added as a record after the hash table and not
-   added to the actual hash table. Note that hash=0 equals to key=NULL insert,
-   so a valid hash value must never be 0. */
-void mail_hash_insert(struct mail_hash_transaction *trans, const void *key,
-		      const void *value, uint32_t *idx_r);
-/* Mark the record at given index as modified. */
-void mail_hash_update(struct mail_hash_transaction *trans, uint32_t idx);
-/* idx must be provided. key_hash must be provided if the record was added
-   with non-NULL key. */
-void mail_hash_remove(struct mail_hash_transaction *trans,
-		      uint32_t idx, uint32_t key_hash);
-
-void mail_hash_set_corrupted(struct mail_hash *hash, const char *error);
-void mail_hash_transaction_set_corrupted(struct mail_hash_transaction *trans,
-					 const char *error);
-
-#endif
--- a/src/lib-index/mail-index-private.h	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-index/mail-index-private.h	Mon Sep 01 15:17:00 2008 +0300
@@ -332,4 +332,9 @@
 uint32_t mail_index_uint32_to_offset(uint32_t offset);
 uint32_t mail_index_offset_to_uint32(uint32_t offset);
 
+#define MAIL_INDEX_PACK_MAX_SIZE ((sizeof(uint32_t) * 8 + 7) / 7)
+void mail_index_pack_num(uint8_t **p, uint32_t num);
+int mail_index_unpack_num(const uint8_t **p, const uint8_t *end,
+			  uint32_t *num_r);
+
 #endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-index/mail-index-strmap.c	Mon Sep 01 15:17:00 2008 +0300
@@ -0,0 +1,1224 @@
+/* Copyright (c) 2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "array.h"
+#include "bsearch-insert-pos.h"
+#include "istream.h"
+#include "ostream.h"
+#include "file-lock.h"
+#include "file-dotlock.h"
+#include "crc32.h"
+#include "safe-mkstemp.h"
+#include "str.h"
+#include "mail-index-private.h"
+#include "mail-index-strmap.h"
+
+#include <stdio.h>
+
+struct mail_index_strmap {
+	struct mail_index *index;
+	char *path;
+	int fd;
+	struct istream *input;
+
+	struct file_lock *file_lock;
+	struct dotlock *dotlock;
+	struct dotlock_settings dotlock_settings;
+};
+
+struct mail_index_strmap_view {
+	struct mail_index_strmap *strmap;
+	struct mail_index_view *view;
+
+	ARRAY_TYPE(mail_index_strmap_rec) recs;
+	ARRAY_DEFINE(recs_crc32, uint32_t);
+	struct hash2_table *hash;
+
+	mail_index_strmap_key_cmp_t *key_compare;
+	mail_index_strmap_rec_cmp_t *rec_compare;
+	mail_index_strmap_remap_t *remap_cb;
+	void *cb_context;
+
+	uoff_t last_read_block_offset;
+	uint32_t last_read_uid;
+	uint32_t last_added_uid;
+	uint32_t total_ref_count;
+
+	uint32_t last_ref_index;
+	uint32_t next_str_idx;
+	uint32_t lost_expunged_uid;
+
+	unsigned int desynced:1;
+};
+
+struct mail_index_strmap_read_context {
+	struct mail_index_strmap_view *view;
+
+	struct istream *input;
+	uoff_t end_offset;
+	uint32_t highest_str_idx;
+	uint32_t uid_lookup_idx;
+	uint32_t lost_expunged_uid;
+
+	const unsigned char *data, *end, *str_idx_base;
+	struct mail_index_strmap_rec rec;
+	uint32_t next_ref_index;
+	unsigned int rec_size;
+
+	unsigned int too_large_uids:1;
+};
+
+struct mail_index_strmap_view_sync {
+	struct mail_index_strmap_view *view;
+};
+
+struct mail_index_strmap_hash_key {
+	const char *str;
+	uint32_t crc32;
+};
+
+/* number of bytes required to store one string idx */
+#define STRMAP_FILE_STRIDX_SIZE (sizeof(uint32_t)*2)
+
+/* renumber the string indexes when highest string idx becomes larger than
+   <number of indexes>*STRMAP_FILE_MAX_STRIDX_MULTIPLIER */
+#define STRMAP_FILE_MAX_STRIDX_MULTIPLIER 2
+
+#define STRIDX_MUST_RENUMBER(highest_idx, n_unique_indexes) \
+	(highest_idx > n_unique_indexes * STRMAP_FILE_MAX_STRIDX_MULTIPLIER)
+
+#define MAIL_INDEX_STRMAP_TIMEOUT_SECS 10
+
+const struct dotlock_settings default_dotlock_settings = {
+	MEMBER(temp_prefix) NULL,
+	MEMBER(lock_suffix) NULL,
+
+	MEMBER(timeout) MAIL_INDEX_STRMAP_TIMEOUT_SECS,
+	MEMBER(stale_timeout) 30
+};
+
+struct mail_index_strmap *
+mail_index_strmap_init(struct mail_index *index, const char *suffix)
+{
+	struct mail_index_strmap *strmap;
+
+	strmap = i_new(struct mail_index_strmap, 1);
+	strmap->index = index;
+	strmap->path = i_strconcat(index->filepath, suffix, NULL);
+	strmap->fd = -1;
+
+	strmap->dotlock_settings = default_dotlock_settings;
+	strmap->dotlock_settings.use_excl_lock = index->use_excl_dotlocks;
+	strmap->dotlock_settings.nfs_flush = index->nfs_flush;
+	return strmap;
+}
+
+static bool
+mail_index_strmap_read_rec_next(struct mail_index_strmap_read_context *ctx,
+				uint32_t *crc32_r);
+
+static void
+mail_index_strmap_set_syscall_error(struct mail_index_strmap *strmap,
+				    const char *function)
+{
+	i_assert(function != NULL);
+
+	if (ENOSPACE(errno)) {
+		strmap->index->nodiskspace = TRUE;
+		return;
+	}
+
+	mail_index_set_error(strmap->index,
+			     "%s failed with strmap index file %s: %m",
+			     function, strmap->path);
+}
+
+static void mail_index_strmap_close(struct mail_index_strmap *strmap)
+{
+	if (strmap->file_lock != NULL)
+		file_lock_free(&strmap->file_lock);
+	else if (strmap->dotlock != NULL)
+		file_dotlock_delete(&strmap->dotlock);
+
+	if (strmap->fd != -1) {
+		if (close(strmap->fd) < 0)
+			mail_index_strmap_set_syscall_error(strmap, "close()");
+		strmap->fd = -1;
+	}
+	if (strmap->input != NULL)
+		i_stream_unref(&strmap->input);
+}
+
+void mail_index_strmap_deinit(struct mail_index_strmap **_strmap)
+{
+	struct mail_index_strmap *strmap = *_strmap;
+
+	*_strmap = NULL;
+	mail_index_strmap_close(strmap);
+	i_free(strmap->path);
+	i_free(strmap);
+}
+
+static unsigned int mail_index_strmap_hash_key(const void *_key)
+{
+	const struct mail_index_strmap_hash_key *key = _key;
+
+	return key->crc32;
+}
+
+static bool
+mail_index_strmap_hash_cmp(const void *_key, const void *_value, void *context)
+{
+	const struct mail_index_strmap_hash_key *key = _key;
+	const struct mail_index_strmap_rec *rec = _value;
+	struct mail_index_strmap_view *view = context;
+
+	return view->key_compare(key->str, rec, view->cb_context);
+}
+
+struct mail_index_strmap_view *
+mail_index_strmap_view_open(struct mail_index_strmap *strmap,
+			    struct mail_index_view *idx_view,
+			    mail_index_strmap_key_cmp_t *key_compare_cb,
+			    mail_index_strmap_rec_cmp_t *rec_compare_cb,
+			    mail_index_strmap_remap_t *remap_cb,
+			    void *context,
+			    const ARRAY_TYPE(mail_index_strmap_rec) **recs_r,
+			    const struct hash2_table **hash_r)
+{
+	struct mail_index_strmap_view *view;
+
+	view = i_new(struct mail_index_strmap_view, 1);
+	view->strmap = strmap;
+	view->view = idx_view;
+	view->key_compare = key_compare_cb;
+	view->rec_compare = rec_compare_cb;
+	view->remap_cb = remap_cb;
+	view->cb_context = context;
+	view->next_str_idx = 1;
+
+	i_array_init(&view->recs, 64);
+	i_array_init(&view->recs_crc32, 64);
+	view->hash = hash2_create(0, sizeof(struct mail_index_strmap_rec),
+				  mail_index_strmap_hash_key,
+				  mail_index_strmap_hash_cmp, view);
+	*recs_r = &view->recs;
+	*hash_r = view->hash;
+	return view;
+}
+
+void mail_index_strmap_view_close(struct mail_index_strmap_view **_view)
+{
+	struct mail_index_strmap_view *view = *_view;
+
+	*_view = NULL;
+	array_free(&view->recs);
+	array_free(&view->recs_crc32);
+	hash2_destroy(&view->hash);
+	i_free(view);
+}
+
+uint32_t mail_index_strmap_view_get_highest_idx(struct mail_index_strmap_view *view)
+{
+	return view->next_str_idx-1;
+}
+
+static void mail_index_strmap_view_reset(struct mail_index_strmap_view *view)
+{
+	view->remap_cb(NULL, 0, 0, view->cb_context);
+	array_clear(&view->recs);
+	array_clear(&view->recs_crc32);
+	hash2_clear(view->hash);
+
+	view->last_added_uid = 0;
+	view->lost_expunged_uid = 0;
+	view->desynced = FALSE;
+}
+
+static void
+mail_index_strmap_view_set_corrupted(struct mail_index_strmap_view *view)
+{
+	mail_index_set_error(view->strmap->index,
+			     "Corrupted strmap index file: %s",
+			     view->strmap->path);
+	(void)unlink(view->strmap->path);
+	mail_index_strmap_close(view->strmap);
+	mail_index_strmap_view_reset(view);
+}
+
+static int mail_index_strmap_open(struct mail_index_strmap_view *view)
+{
+	struct mail_index_strmap *strmap = view->strmap;
+	const struct mail_index_header *idx_hdr;
+	struct mail_index_strmap_header hdr;
+	const unsigned char *data;
+	size_t size;
+	int ret;
+
+	i_assert(strmap->fd == -1);
+
+	strmap->fd = open(strmap->path, O_RDWR);
+	if (strmap->fd == -1) {
+		if (errno == ENOENT)
+			return 0;
+		mail_index_strmap_set_syscall_error(strmap, "open()");
+		return -1;
+	}
+	strmap->input = i_stream_create_fd(strmap->fd, (size_t)-1, FALSE);
+	ret = i_stream_read_data(strmap->input, &data, &size, sizeof(hdr)-1);
+	if (ret <= 0) {
+		if (ret < 0) {
+			mail_index_strmap_set_syscall_error(strmap, "read()");
+			mail_index_strmap_close(strmap);
+		} else {
+			i_assert(ret == 0);
+			mail_index_strmap_view_set_corrupted(view);
+		}
+		return ret;
+	}
+	memcpy(&hdr, data, sizeof(hdr));
+
+	idx_hdr = mail_index_get_header(view->view);
+	if (hdr.version != MAIL_INDEX_STRMAP_VERSION ||
+	    hdr.uid_validity != idx_hdr->uid_validity) {
+		/* need to rebuild. if we already had something in the strmap,
+		   we can keep it. */
+		(void)unlink(strmap->path);
+		mail_index_strmap_close(strmap);
+		return 0;
+	}
+
+	/* we'll read the entire file from the beginning */
+	view->last_added_uid = 0;
+	view->last_read_uid = 0;
+	view->total_ref_count = 0;
+	view->last_read_block_offset = sizeof(struct mail_index_strmap_header);
+	view->next_str_idx = 1;
+
+	mail_index_strmap_view_reset(view);
+	return 0;
+}
+
+static bool mail_index_strmap_need_reopen(struct mail_index_strmap *strmap)
+{
+	struct stat st1, st2;
+
+	/* FIXME: nfs flush */
+	if (fstat(strmap->fd, &st1) < 0) {
+		if (errno != ESTALE)
+			mail_index_strmap_set_syscall_error(strmap, "fstat()");
+		return TRUE;
+	}
+	if (stat(strmap->path, &st2) < 0) {
+		mail_index_strmap_set_syscall_error(strmap, "stat()");
+		return TRUE;
+	}
+	return st1.st_ino != st2.st_ino || !CMP_DEV_T(st1.st_dev, st2.st_dev);
+}
+
+static int mail_index_strmap_refresh(struct mail_index_strmap_view *view)
+{
+	uint32_t seq;
+
+	if (MAIL_INDEX_IS_IN_MEMORY(view->strmap->index))
+		return -1;
+
+	if (view->strmap->fd != -1) {
+		if (!mail_index_strmap_need_reopen(view->strmap)) {
+			if (view->lost_expunged_uid != 0) {
+				/* last read failed because view had a message
+				   that didn't exist in the strmap (because it
+				   was expunged by another session). if the
+				   message still isn't expunged in this view,
+				   just continue using the current strmap. */
+				if (mail_index_lookup_seq(view->view,
+						view->lost_expunged_uid, &seq))
+					return -1;
+			} else if (view->desynced) {
+				/* our view isn't synced with the disk, we
+				   can't read strmap without first resetting
+				   the view */
+			} else {
+				i_stream_sync(view->strmap->input);
+				return 0;
+			}
+		}
+		mail_index_strmap_close(view->strmap);
+	}
+
+	return mail_index_strmap_open(view);
+}
+
+static int
+mail_index_strmap_read_packed(struct mail_index_strmap_read_context *ctx,
+			      uint32_t *num_r)
+{
+	const unsigned char *data;
+	const uint8_t *bytes, *p, *end;
+	size_t size;
+	int ret;
+
+	ret = i_stream_read_data(ctx->input, &data, &size, sizeof(*num_r) - 1);
+	if (ret <= 0)
+		return ret;
+
+	if (ctx->input->v_offset + size > ctx->end_offset)
+		size = ctx->end_offset - ctx->input->v_offset;
+	bytes = p = (const uint8_t *)data;
+	end = bytes + size;
+
+	if (mail_index_unpack_num(&p, end, num_r) <  0)
+		return -1;
+	i_stream_skip(ctx->input, p - bytes);
+	return 1;
+}
+
+static int
+mail_index_strmap_uid_exists(struct mail_index_strmap_read_context *ctx,
+			     uint32_t uid)
+{
+	const struct mail_index_record *rec;
+
+	if (ctx->uid_lookup_idx >= ctx->view->view->map->hdr.messages_count) {
+		if (uid >= ctx->view->view->map->hdr.next_uid) {
+			/* thread index has larger UIDs than what we've seen
+			   in our view. we'll have to read them again later
+			   when we know about them */
+			ctx->too_large_uids = TRUE;
+		}
+		return 0;
+	}
+
+	rec = MAIL_INDEX_MAP_IDX(ctx->view->view->map, ctx->uid_lookup_idx);
+	if (rec->uid == uid) {
+		ctx->uid_lookup_idx++;
+		return 1;
+	} else if (rec->uid > uid) {
+		return 0;
+	} else {
+		/* record that exists in index is missing from strmap.
+		   see if it's because the strmap is corrupted or because
+		   our current view is a bit stale and the message has already
+		   been expunged. */
+		(void)mail_index_refresh(ctx->view->view->index);
+		if (mail_index_is_expunged(ctx->view->view,
+					   ctx->uid_lookup_idx + 1))
+			ctx->lost_expunged_uid = rec->uid;
+		return -1;
+	}
+}
+
+static int
+mail_index_strmap_read_rec_first(struct mail_index_strmap_read_context *ctx,
+				 uint32_t *crc32_r)
+{
+	size_t size;
+	uint32_t n, i, count, str_idx;
+	int ret;
+
+	/* <uid> <n> <crc32>*count <str_idx>*count
+	   where
+	     n = 0 -> count=1 (only Message-ID:)
+	     n = 1 -> count=2 (Message-ID: + In-Reply-To:)
+	     n = 2+ -> count=n (Message-ID: + References:)
+	*/
+	if (mail_index_strmap_read_packed(ctx, &n) <= 0)
+		return -1;
+	count = n < 2 ? n + 1 : n;
+	ctx->view->total_ref_count += count;
+
+	ctx->rec_size = count * (sizeof(ctx->rec.str_idx) + sizeof(*crc32_r));
+	ret = mail_index_strmap_uid_exists(ctx, ctx->rec.uid);
+	if (ret < 0)
+		return -1;
+	if (i_stream_read_data(ctx->view->strmap->input, &ctx->data, &size,
+			       ctx->rec_size - 1) <= 0)
+		return -1;
+	ctx->str_idx_base = ctx->data + count * sizeof(uint32_t);
+
+	if (ret == 0) {
+		/* this message has already been expunged, ignore it.
+		   update highest string indexes anyway. */
+		for (i = 0; i < count; i++) {
+			memcpy(&str_idx, ctx->str_idx_base, sizeof(str_idx));
+			if (ctx->highest_str_idx < str_idx)
+				ctx->highest_str_idx = str_idx;
+			ctx->str_idx_base += sizeof(str_idx);
+		}
+		i_stream_skip(ctx->view->strmap->input, ctx->rec_size);
+		return 0;
+	}
+
+	/* everything exists. save it. FIXME: these ref_index values
+	   are thread index specific, perhaps something more generic
+	   should be used some day */
+	ctx->end = ctx->data + count * sizeof(*crc32_r);
+
+	ctx->next_ref_index = 0;
+	if (!mail_index_strmap_read_rec_next(ctx, crc32_r))
+		i_unreached();
+	ctx->next_ref_index = n == 1 ? 1 : 2;
+	return 1;
+}
+
+static bool
+mail_index_strmap_read_rec_next(struct mail_index_strmap_read_context *ctx,
+				uint32_t *crc32_r)
+{
+	if (ctx->data == ctx->end) {
+		i_stream_skip(ctx->view->strmap->input, ctx->rec_size);
+		return FALSE;
+	}
+
+	/* FIXME: str_idx could be stored as packed relative values
+	   (first relative to highest_idx, the rest relative to the
+	   previous str_idx) */
+
+	/* read the record contents */
+	memcpy(&ctx->rec.str_idx, ctx->str_idx_base, sizeof(ctx->rec.str_idx));
+	memcpy(crc32_r, ctx->data, sizeof(*crc32_r));
+
+	ctx->rec.ref_index = ctx->next_ref_index++;
+
+	if (ctx->highest_str_idx < ctx->rec.str_idx)
+		ctx->highest_str_idx = ctx->rec.str_idx;
+
+	/* get to the next record */
+	ctx->data += sizeof(*crc32_r);
+	ctx->str_idx_base += sizeof(ctx->rec.str_idx);
+	return TRUE;
+}
+
+static int
+strmap_read_block_init(struct mail_index_strmap_view *view,
+		       struct mail_index_strmap_read_context *ctx)
+{
+	struct mail_index_strmap *strmap = view->strmap;
+	const unsigned char *data;
+	size_t size;
+	uint32_t block_size, seq1, seq2;
+	int ret;
+
+	if (view->last_read_uid + 1 >= view->view->map->hdr.next_uid) {
+		/* come back later when we know about the new UIDs */
+		return 0;
+	}
+
+	memset(ctx, 0, sizeof(*ctx));
+	ret = i_stream_read_data(strmap->input, &data, &size,
+				 sizeof(block_size)-1);
+	if (ret <= 0) {
+		if (strmap->input->stream_errno == 0) {
+			/* no new data */
+			return 0;
+		}
+		mail_index_strmap_set_syscall_error(strmap, "read()");
+		return -1;
+	}
+	memcpy(&block_size, data, sizeof(block_size));
+	block_size = mail_index_offset_to_uint32(block_size) >> 2;
+	if (block_size == 0) {
+		/* the rest of the file is either not written, or the previous
+		   write didn't finish */
+		return 0;
+	}
+	i_stream_skip(strmap->input, sizeof(block_size));
+
+	ctx->view = view;
+	ctx->input = strmap->input;
+	ctx->end_offset = strmap->input->v_offset + block_size;
+	if (ctx->end_offset < strmap->input->v_offset) {
+		/* block size too large */
+		mail_index_strmap_view_set_corrupted(view);
+		return -1;
+	}
+	ctx->rec.uid = view->last_read_uid + 1;
+
+	/* FIXME: when reading multiple blocks we shouldn't have to calculate
+	   this every time */
+	if (!mail_index_lookup_seq_range(view->view, ctx->rec.uid, (uint32_t)-1,
+					 &seq1, &seq2))
+		seq1 = mail_index_view_get_messages_count(view->view) + 1;
+	ctx->uid_lookup_idx = seq1 - 1;
+	return 1;
+}
+
+static int
+strmap_read_block_next(struct mail_index_strmap_read_context *ctx,
+		       uint32_t *crc32_r)
+{
+	uint32_t uid_diff;
+	int ret;
+
+	if (mail_index_strmap_read_rec_next(ctx, crc32_r))
+		return 1;
+
+	/* get next UID */
+	do {
+		if (ctx->input->v_offset == ctx->end_offset) {
+			/* this block is done */
+			return 0;
+		}
+		if (mail_index_strmap_read_packed(ctx, &uid_diff) < 0)
+			return -1;
+
+		ctx->rec.uid += uid_diff;
+		ret = mail_index_strmap_read_rec_first(ctx, crc32_r);
+	} while (ret == 0);
+	return ret;
+}
+
+static int
+strmap_read_block_deinit(struct mail_index_strmap_read_context *ctx, int ret,
+			 bool update_block_offset)
+{
+	struct mail_index_strmap_view *view = ctx->view;
+	struct mail_index_strmap *strmap = view->strmap;
+
+	if (ctx->highest_str_idx > view->total_ref_count) {
+		/* if all string indexes are unique, highest_str_index equals
+		   total_ref_count. otherwise it's always lower. */
+		mail_index_set_error(strmap->index,
+				     "Corrupted strmap index file %s: "
+				     "String indexes too high "
+				     "(highest=%u max=%u)",
+				     strmap->path, ctx->highest_str_idx,
+				     view->total_ref_count);
+		mail_index_strmap_view_set_corrupted(view);
+		return -1;
+	}
+	if (ctx->lost_expunged_uid != 0) {
+		/* our view contained a message that had since been expunged. */
+		i_assert(ret < 0);
+		view->lost_expunged_uid = ctx->lost_expunged_uid;
+	} else if (ret < 0) {
+		if (strmap->input->stream_errno != 0)
+			mail_index_strmap_set_syscall_error(strmap, "read()");
+		else
+			mail_index_strmap_view_set_corrupted(view);
+		return -1;
+	} else if (update_block_offset && !ctx->too_large_uids) {
+		view->last_read_block_offset = strmap->input->v_offset;
+		view->last_read_uid = ctx->rec.uid;
+	}
+	if (view->next_str_idx <= ctx->highest_str_idx)
+		view->next_str_idx = ctx->highest_str_idx + 1;
+	return ret;
+}
+
+static bool
+strmap_view_sync_handle_conflict(struct mail_index_strmap_read_context *ctx,
+				 const struct mail_index_strmap_rec *hash_rec,
+				 struct hash2_iter *iter)
+{
+	uint32_t seq;
+
+	/* hopefully it's a message that has since been expunged */
+	if (!mail_index_lookup_seq(ctx->view->view, hash_rec->uid, &seq)) {
+		/* message is no longer in our view. remove it completely. */
+		hash2_remove_iter(ctx->view->hash, iter);
+		return TRUE;
+	}
+	if (mail_index_is_expunged(ctx->view->view, seq)) {
+		/* it's quite likely a conflict. we may not be able to verify
+		   it, so just assume it is. nothing breaks even if we guess
+		   wrong, the performance just suffers a bit. */
+		return FALSE;
+	}
+
+	/* 0 means "doesn't match", which is the only acceptable case */
+	return ctx->view->rec_compare(&ctx->rec, hash_rec,
+				      ctx->view->cb_context) == 0;
+}
+
+static int
+mail_index_strmap_view_sync_block(struct mail_index_strmap_read_context *ctx)
+{
+	struct mail_index_strmap_rec *hash_rec;
+	struct hash2_iter iter;
+	uint32_t crc32, prev_uid = 0;
+	int ret;
+
+	while ((ret = strmap_read_block_next(ctx, &crc32)) > 0) {
+		if (ctx->rec.uid <= ctx->view->last_added_uid) {
+			if (ctx->rec.uid < ctx->view->last_added_uid ||
+			    prev_uid != ctx->rec.uid) {
+				/* we've already added this */
+				continue;
+			}
+		}
+		prev_uid = ctx->rec.uid;
+
+		/* check for conflicting string indexes. they may happen if
+
+		   1) msgid exists only for a message X that has been expunged
+		   2) another process doesn't see X, but sees msgid for another
+		      message and writes it using a new string index
+		   3) if we still see X, we now see the same msgid with two
+		      string indexes.
+
+		   if we detect such a conflict, we can't continue using the
+		   strmap index until X has been expunged. */
+		memset(&iter, 0, sizeof(iter));
+		while ((hash_rec = hash2_iterate(ctx->view->hash,
+						 crc32, &iter)) != NULL &&
+		       hash_rec->str_idx != ctx->rec.str_idx) {
+			/* CRC32 matches, but string index doesn't */
+			if (!strmap_view_sync_handle_conflict(ctx, hash_rec,
+							      &iter)) {
+				ctx->lost_expunged_uid = hash_rec->uid;
+				ret = -1;
+				goto error;
+			}
+		}
+
+		ctx->view->last_added_uid = ctx->rec.uid;
+
+		/* add the record to records array */
+		array_append(&ctx->view->recs, &ctx->rec, 1);
+		array_append(&ctx->view->recs_crc32, &crc32, 1);
+
+		/* add a separate copy of the record to hash */
+		hash_rec = hash2_insert_hash(ctx->view->hash, crc32);
+		memcpy(hash_rec, &ctx->rec, sizeof(*hash_rec));
+	}
+error:
+	return strmap_read_block_deinit(ctx, ret, TRUE);
+}
+
+struct mail_index_strmap_view_sync *
+mail_index_strmap_view_sync_init(struct mail_index_strmap_view *view,
+				 uint32_t *last_uid_r)
+{
+	struct mail_index_strmap_view_sync *sync;
+	struct mail_index_strmap_read_context ctx;
+	int ret;
+
+	sync = i_new(struct mail_index_strmap_view_sync, 1);
+	sync->view = view;
+
+	if (mail_index_strmap_refresh(view) < 0) {
+		/* reading the strmap failed - just ignore and do
+		   this in-memory based on whatever we knew last */
+	} else if (view->strmap->input != NULL) {
+		i_stream_seek(view->strmap->input,
+			      view->last_read_block_offset);
+		while ((ret = strmap_read_block_init(view, &ctx)) > 0) {
+			if (mail_index_strmap_view_sync_block(&ctx) < 0) {
+				ret = -1;
+				break;
+			}
+			if (ctx.too_large_uids)
+				break;
+		}
+
+		if (ret < 0) {
+			/* something failed - we can still use the strmap as far
+			   as we managed to read it, but our view is now out
+			   of sync */
+			view->desynced = TRUE;
+		} else {
+			i_assert(view->lost_expunged_uid == 0);
+		}
+	}
+	*last_uid_r = view->last_added_uid;
+	return sync;
+}
+
+static inline uint32_t crc32_str_nonzero(const char *str)
+{
+	uint32_t value = crc32_str(str);
+	return value == 0 ? 1 : value;
+}
+
+void mail_index_strmap_view_sync_add(struct mail_index_strmap_view_sync *sync,
+				     uint32_t uid, uint32_t ref_index,
+				     const char *key)
+{
+	struct mail_index_strmap_view *view = sync->view;
+	struct mail_index_strmap_rec *rec, *old_rec;
+	struct mail_index_strmap_hash_key hash_key;
+	uint32_t str_idx;
+
+	i_assert(uid > view->last_added_uid ||
+		 (uid == view->last_added_uid &&
+		  ref_index > view->last_ref_index));
+
+	hash_key.str = key;
+	hash_key.crc32 = crc32_str_nonzero(key);
+
+	old_rec = hash2_lookup(view->hash, &hash_key);
+	if (old_rec != NULL) {
+		/* The string already exists, use the same unique idx */
+		str_idx = old_rec->str_idx;
+	} else {
+		/* Newly seen string, assign a new unique idx to it */
+		str_idx = view->next_str_idx++;
+	}
+	i_assert(str_idx != 0);
+
+	rec = hash2_insert(view->hash, &hash_key);
+	rec->uid = uid;
+	rec->ref_index = ref_index;
+	rec->str_idx = str_idx;
+	array_append(&view->recs, rec, 1);
+	array_append(&view->recs_crc32, &hash_key.crc32, 1);
+
+	view->last_added_uid = uid;
+	view->last_ref_index = ref_index;
+}
+
+void mail_index_strmap_view_sync_add_unique(struct mail_index_strmap_view_sync *sync,
+					    uint32_t uid, uint32_t ref_index)
+{
+	struct mail_index_strmap_view *view = sync->view;
+	struct mail_index_strmap_rec rec;
+
+	i_assert(uid > view->last_added_uid ||
+		 (uid == view->last_added_uid &&
+		  ref_index > view->last_ref_index));
+
+	memset(&rec, 0, sizeof(rec));
+	rec.uid = uid;
+	rec.ref_index = ref_index;
+	rec.str_idx = view->next_str_idx++;
+	array_append(&view->recs, &rec, 1);
+	(void)array_append_space(&view->recs_crc32);
+
+	view->last_added_uid = uid;
+	view->last_ref_index = ref_index;
+}
+
+static void mail_index_strmap_view_renumber(struct mail_index_strmap_view *view)
+{
+	struct mail_index_strmap_read_context ctx;
+	struct mail_index_strmap_rec *recs, *hash_rec;
+	uint32_t prev_uid, str_idx, *recs_crc32, *renumber_map;
+	unsigned int i, dest, count, count2;
+	int ret;
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.view = view;
+
+	/* create a map of old -> new index and remove records of
+	   expunged messages */
+	renumber_map = i_new(uint32_t, view->next_str_idx);
+	str_idx = 0; prev_uid = 0;
+	recs = array_get_modifiable(&view->recs, &count);
+	recs_crc32 = array_get_modifiable(&view->recs_crc32, &count2);
+	i_assert(count == count2);
+
+	for (i = dest = 0; i < count; ) {
+		if (prev_uid != recs[i].uid) {
+			/* see if this record should be removed */
+			prev_uid = recs[i].uid;
+			ret = mail_index_strmap_uid_exists(&ctx, prev_uid);
+			i_assert(ret >= 0);
+			if (ret == 0) {
+				/* message expunged */
+				do {
+					i++;
+				} while (i < count && recs[i].uid == prev_uid);
+				continue;
+			}
+		}
+
+		i_assert(recs[i].str_idx < view->next_str_idx);
+		if (renumber_map[recs[i].str_idx] == 0)
+			renumber_map[recs[i].str_idx] = ++str_idx;
+		if (i != dest) {
+			recs[dest] = recs[i];
+			recs_crc32[dest] = recs_crc32[i];
+		}
+		i++; dest++;
+	}
+	i_assert(renumber_map[0] == 0);
+	array_delete(&view->recs, dest, i-dest);
+	array_delete(&view->recs_crc32, dest, i-dest);
+
+	/* notify caller of the renumbering */
+	i_assert(str_idx <= view->next_str_idx);
+	view->remap_cb(renumber_map, view->next_str_idx, str_idx + 1,
+		       view->cb_context);
+
+	/* renumber the indexes in-place and recreate the hash */
+	recs = array_get_modifiable(&view->recs, &count);
+	hash2_clear(view->hash);
+	for (i = 0; i < count; i++) {
+		recs[i].str_idx = renumber_map[recs[i].str_idx];
+		hash_rec = hash2_insert_hash(view->hash, recs_crc32[i]);
+		memcpy(hash_rec, &recs[i], sizeof(*hash_rec));
+	}
+
+	/* update the new next_str_idx only after remapping */
+	view->next_str_idx = str_idx + 1;
+	i_free(renumber_map);
+}
+
+static int mail_index_strmap_write_block(struct mail_index_strmap_view *view,
+					 struct ostream *output,
+					 unsigned int i, uint32_t base_uid)
+{
+	const struct mail_index_strmap_rec *recs;
+	const uint32_t *crc32;
+	unsigned int j, n, count, count2, uid_rec_count;
+	uint32_t block_size;
+	uint8_t *p, packed[MAIL_INDEX_PACK_MAX_SIZE*2];
+	uoff_t block_offset, end_offset;
+
+	/* skip over the block size for now, we don't know it yet */
+	block_offset = output->offset;
+	block_size = 0;
+	o_stream_send(output, &block_size, sizeof(block_size));
+
+	/* write records */
+	recs = array_get(&view->recs, &count);
+	crc32 = array_get(&view->recs_crc32, &count2);
+	i_assert(count == count2);
+	while (i < count) {
+		/* @UNSAFE: <uid diff> */
+		p = packed;
+		mail_index_pack_num(&p, recs[i].uid - base_uid);
+		base_uid = recs[i].uid;
+
+		/* find how many records belong to this UID */
+		uid_rec_count = 1;
+		for (j = i + 1; j < count; j++) {
+			if (recs[j].uid != base_uid)
+				break;
+			uid_rec_count++;
+		}
+		view->total_ref_count += uid_rec_count;
+
+		/* <n> <crc32>*count <str_idx>*count -
+		   FIXME: thread index specific code */
+		i_assert(recs[i].ref_index == 0);
+		if (uid_rec_count == 1) {
+			/* Only Message-ID: header */
+			n = 0;
+		} else if (recs[i+1].ref_index == 1) {
+			/* In-Reply-To: header */
+			n = 1;
+			i_assert(uid_rec_count == 2);
+		} else {
+			/* References: header */
+			n = uid_rec_count;
+			i_assert(recs[i+1].ref_index == 2);
+		}
+
+		mail_index_pack_num(&p, n);
+		o_stream_send(output, packed, p-packed);
+		for (j = 0; j < uid_rec_count; j++)
+			o_stream_send(output, &crc32[i+j], sizeof(crc32[i+j]));
+		for (j = 0; j < uid_rec_count; j++) {
+			i_assert(j < 2 || recs[i+j].ref_index == j+1);
+			o_stream_send(output, &recs[i+j].str_idx,
+				      sizeof(recs[i+j].str_idx));
+		}
+		i += uid_rec_count;
+	}
+
+	/* we know the block size now - write it */
+	block_size = output->offset - (block_offset + sizeof(block_size));
+	block_size = mail_index_uint32_to_offset(block_size << 2);
+	i_assert(block_size != 0);
+
+	end_offset = output->offset;
+	o_stream_seek(output, block_offset);
+	o_stream_send(output, &block_size, sizeof(block_size));
+	o_stream_seek(output, end_offset);
+
+	if (output->last_failed_errno != 0)
+		return -1;
+	else {
+		i_assert(view->last_added_uid == recs[count-1].uid);
+		view->last_read_uid = recs[count-1].uid;
+		view->last_read_block_offset = output->offset;
+		return 0;
+	}
+}
+
+static void
+mail_index_strmap_recreate_write(struct mail_index_strmap_view *view,
+				 struct ostream *output)
+{
+	const struct mail_index_header *idx_hdr;
+	struct mail_index_strmap_header hdr;
+
+	idx_hdr = mail_index_get_header(view->view);
+
+	/* write header */
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.version = MAIL_INDEX_STRMAP_VERSION;
+	hdr.uid_validity = idx_hdr->uid_validity;
+	o_stream_send(output, &hdr, sizeof(hdr));
+
+	view->total_ref_count = 0;
+	(void)mail_index_strmap_write_block(view, output, 0, 1);
+}
+
+static int mail_index_strmap_recreate(struct mail_index_strmap_view *view)
+{
+	struct mail_index_strmap *strmap = view->strmap;
+	string_t *str;
+	struct ostream *output;
+	const char *temp_path;
+	int fd, ret = 0;
+
+	if (array_count(&view->recs) == 0) {
+		/* everything expunged - just unlink the existing index */
+		if (unlink(strmap->path) < 0)
+			mail_index_strmap_set_syscall_error(strmap, "unlink()");
+		return 0;
+	}
+
+	str = t_str_new(256);
+	str_append(str, strmap->path);
+	fd = safe_mkstemp_hostpid(str, view->view->index->mode,
+				  (uid_t)-1, view->view->index->gid);
+	temp_path = str_c(str);
+
+	if (fd == -1) {
+		mail_index_set_error(strmap->index,
+				     "safe_mkstemp_hostpid(%s) failed: %m",
+				     temp_path);
+		return -1;
+	}
+	output = o_stream_create_fd(fd, 0, FALSE);
+	o_stream_cork(output);
+	mail_index_strmap_recreate_write(view, output);
+	if (output->last_failed_errno != 0) {
+		errno = output->last_failed_errno;
+		mail_index_set_error(strmap->index,
+				     "write(%s) failed: %m", temp_path);
+		ret = -1;
+	}
+	o_stream_destroy(&output);
+	if (close(fd) < 0) {
+		mail_index_set_error(strmap->index,
+				     "close(%s) failed: %m", temp_path);
+		ret = -1;
+	} else if (ret == 0 && rename(temp_path, strmap->path) < 0) {
+		mail_index_set_error(strmap->index,
+				     "rename(%s, %s) failed: %m",
+				     temp_path, strmap->path);
+		ret = -1;
+	}
+	if (ret < 0)
+		(void)unlink(temp_path);
+	return ret;
+}
+
+static int mail_index_strmap_lock(struct mail_index_strmap *strmap)
+{
+	int ret;
+
+	i_assert(strmap->fd != -1);
+
+	if (strmap->index->lock_method != FILE_LOCK_METHOD_DOTLOCK) {
+		i_assert(strmap->file_lock == NULL);
+
+		ret = file_wait_lock(strmap->fd, strmap->path, F_WRLCK,
+				     strmap->index->lock_method,
+				     MAIL_INDEX_STRMAP_TIMEOUT_SECS,
+				     &strmap->file_lock);
+		if (ret <= 0) {
+			mail_index_strmap_set_syscall_error(strmap,
+							    "file_wait_lock()");
+		}
+	} else {
+		i_assert(strmap->dotlock == NULL);
+
+		ret = file_dotlock_create(&strmap->dotlock_settings,
+					  strmap->path, 0, &strmap->dotlock);
+		if (ret <= 0) {
+			mail_index_strmap_set_syscall_error(strmap,
+				"file_dotlock_create()");
+		}
+	}
+	return ret;
+}
+
+static void mail_index_strmap_unlock(struct mail_index_strmap *strmap)
+{
+	if (strmap->file_lock != NULL)
+		file_unlock(&strmap->file_lock);
+	else if (strmap->dotlock != NULL)
+		file_dotlock_delete(&strmap->dotlock);
+}
+
+static int strmap_rec_cmp(const void *key, const void *value)
+{
+	const uint32_t *uid = key;
+	const struct mail_index_strmap_rec *rec = value;
+
+	return *uid < rec->uid ? -1 :
+		(*uid > rec->uid ? 1 : 0);
+}
+
+static int
+mail_index_strmap_write_append(struct mail_index_strmap_view *view)
+{
+	struct mail_index_strmap_read_context ctx;
+	const struct mail_index_strmap_rec *old_recs;
+	unsigned int i, old_count;
+	struct ostream *output;
+	uint32_t crc32, next_uid;
+	bool full_block;
+	int ret;
+
+	/* Check first if another process had written new records to the file.
+	   If there are any, hopefully they're the same as what we would be
+	   writing. There are two problematic cases when messages have been
+	   expunged recently:
+
+	   1) The file contains UIDs that we don't have. This means the string
+	   indexes won't be compatible anymore, so we'll have to renumber ours
+	   to match the ones in the strmap file.
+
+	   Currently we don't bother handling 1) case. If indexes don't match
+	   what we have, we just don't write anything.
+
+	   2) We have UIDs that don't exist in the file. We can't simply skip
+	   those records, because other records may have pointers to them using
+	   different string indexes than we have. Even if we renumbered those,
+	   future appends by other processes might cause the same problem (they
+	   see the string for the first time and assign it a new index, but we
+	   already have internally given it another index). So the only
+	   sensible choice is to write nothing and hope that the message goes
+	   away soon. */
+	old_recs = array_get(&view->recs, &old_count);
+	next_uid = view->last_read_uid + 1;
+	(void)bsearch_insert_pos(&next_uid, old_recs, old_count,
+				 sizeof(*old_recs), strmap_rec_cmp, &i);
+	if (i < old_count) {
+		while (i > 0 && old_recs[i-1].uid == old_recs[i].uid)
+			i--;
+	}
+
+	i_stream_sync(view->strmap->input);
+	i_stream_seek(view->strmap->input, view->last_read_block_offset);
+	full_block = TRUE;
+	while (i < old_count &&
+	       (ret = strmap_read_block_init(view, &ctx)) > 0) {
+		while ((ret = strmap_read_block_next(&ctx, &crc32)) > 0) {
+			if (ctx.rec.uid != old_recs[i].uid ||
+			    ctx.rec.str_idx != old_recs[i].str_idx) {
+				/* mismatch */
+				if (ctx.rec.uid > old_recs[i].uid) {
+					/* 1) case */
+					ctx.lost_expunged_uid = ctx.rec.uid;
+				} else if (ctx.rec.uid < old_recs[i].uid) {
+					/* 2) case */
+					ctx.lost_expunged_uid = old_recs[i].uid;
+				} else {
+					/* string index mismatch,
+					   shouldn't happen */
+				}
+				ret = -1;
+				break;
+			}
+			if (++i == old_count) {
+				full_block = FALSE;
+				break;
+			}
+		}
+		if (strmap_read_block_deinit(&ctx, ret, full_block) < 0) {
+			ret = -1;
+			break;
+		}
+	}
+	if (ret < 0)
+		return -1;
+	if (i == old_count) {
+		/* nothing new to write */
+		return 0;
+	}
+	i_assert(full_block);
+	i_assert(old_recs[i].uid > view->last_read_uid);
+
+	/* write the new records */
+	output = o_stream_create_fd(view->strmap->fd, 0, FALSE);
+	o_stream_seek(output, view->last_read_block_offset);
+	o_stream_cork(output);
+	if (mail_index_strmap_write_block(view, output, i,
+					  view->last_read_uid + 1) < 0) {
+		errno = output->last_failed_errno;
+		mail_index_strmap_set_syscall_error(view->strmap, "write()");
+		ret = -1;
+	}
+	o_stream_destroy(&output);
+	return ret;
+}
+
+static int mail_index_strmap_write(struct mail_index_strmap_view *view)
+{
+	int ret;
+
+	/* FIXME: this renumbering doesn't work well when running for a long
+	   time since records aren't removed from hash often enough */
+	if (STRIDX_MUST_RENUMBER(view->next_str_idx - 1,
+				 hash2_count(view->hash))) {
+		mail_index_strmap_view_renumber(view);
+		if (!MAIL_INDEX_IS_IN_MEMORY(view->strmap->index)) {
+			if (mail_index_strmap_recreate(view) < 0) {
+				view->desynced = TRUE;
+				return -1;
+			}
+		}
+		return 0;
+	}
+
+	if (MAIL_INDEX_IS_IN_MEMORY(view->strmap->index) || view->desynced)
+		return 0;
+
+	if (view->strmap->fd == -1) {
+		/* initial file creation */
+		if (mail_index_strmap_recreate(view) < 0) {
+			view->desynced = TRUE;
+			return -1;
+		}
+		return 0;
+	}
+
+	/* append the new records to the strmap file */
+	if (mail_index_strmap_lock(view->strmap) <= 0) {
+		/* timeout / error */
+		ret = -1;
+	} else if (mail_index_strmap_need_reopen(view->strmap)) {
+		/* the file was already recreated - leave the syncing as it is
+		   for now and let the next sync re-read the file. */
+		ret = 0;
+	} else {
+		ret = mail_index_strmap_write_append(view);
+	}
+	mail_index_strmap_unlock(view->strmap);
+	if (ret < 0)
+		view->desynced = TRUE;
+	return ret;
+}
+
+void mail_index_strmap_view_sync_commit(struct mail_index_strmap_view_sync **_sync)
+{
+	struct mail_index_strmap_view_sync *sync = *_sync;
+	struct mail_index_strmap_view *view = sync->view;
+
+	*_sync = NULL;
+	i_free(sync);
+
+	(void)mail_index_strmap_write(view);
+
+	/* zero-terminate the records array */
+	(void)array_append_space(&view->recs);
+	array_delete(&view->recs, array_count(&view->recs)-1, 1);
+}
+
+void mail_index_strmap_view_sync_rollback(struct mail_index_strmap_view_sync **_sync)
+{
+	struct mail_index_strmap_view_sync *sync = *_sync;
+
+	*_sync = NULL;
+
+	mail_index_strmap_view_reset(sync->view);
+	i_free(sync);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-index/mail-index-strmap.h	Mon Sep 01 15:17:00 2008 +0300
@@ -0,0 +1,79 @@
+#ifndef MAIL_INDEX_STRMAP_H
+#define MAIL_INDEX_STRMAP_H
+
+#include "hash2.h"
+
+struct mail_index;
+struct mail_index_view;
+
+struct mail_index_strmap_header {
+#define MAIL_INDEX_STRMAP_VERSION 1
+	uint8_t version;
+	uint8_t unused[3];
+
+	uint32_t uid_validity;
+};
+
+struct mail_index_strmap_rec {
+	uint32_t uid;
+	uint32_t ref_index;
+	/* unique index number for the string */
+	uint32_t str_idx;
+};
+ARRAY_DEFINE_TYPE(mail_index_strmap_rec, struct mail_index_strmap_rec);
+
+typedef bool
+mail_index_strmap_key_cmp_t(const char *key,
+			    const struct mail_index_strmap_rec *rec,
+			    void *context);
+/* Returns 1 if matches, 0 if not, -1 if one of the records is expunged and
+   the result can't be determined */
+typedef int
+mail_index_strmap_rec_cmp_t(const struct mail_index_strmap_rec *rec1,
+			    const struct mail_index_strmap_rec *rec2,
+			    void *context);
+/* called when string indexes are renumbered. idx_map[old_idx] = new_idx.
+   if new_idx is 0, the record was expunged. As a special case if count=0,
+   the strmap was reset. */
+typedef void mail_index_strmap_remap_t(const uint32_t *idx_map,
+				       unsigned int old_count,
+				       unsigned int new_count, void *context);
+
+struct mail_index_strmap *
+mail_index_strmap_init(struct mail_index *index, const char *suffix);
+void mail_index_strmap_deinit(struct mail_index_strmap **strmap);
+
+/* Returns strmap records and hash that can be used for read-only access.
+   The records array always teminates with a record containing zeros (but it's
+   not counted in the array count). */
+struct mail_index_strmap_view *
+mail_index_strmap_view_open(struct mail_index_strmap *strmap,
+			    struct mail_index_view *idx_view,
+			    mail_index_strmap_key_cmp_t *key_compare_cb,
+			    mail_index_strmap_rec_cmp_t *rec_compare_cb,
+			    mail_index_strmap_remap_t *remap_cb,
+			    void *context,
+			    const ARRAY_TYPE(mail_index_strmap_rec) **recs_r,
+			    const struct hash2_table **hash_r);
+void mail_index_strmap_view_close(struct mail_index_strmap_view **view);
+
+/* Return the highest used string index. */
+uint32_t mail_index_strmap_view_get_highest_idx(struct mail_index_strmap_view *view);
+
+/* Synchronize strmap: Caller adds missing entries, expunged messages may be
+   removed internally and the changes are written to disk. Note that the strmap
+   recs/hash shouldn't be used until _sync_commit() is called, because the
+   string indexes may be renumbered if another process had already written the
+   same changes as us. */
+struct mail_index_strmap_view_sync *
+mail_index_strmap_view_sync_init(struct mail_index_strmap_view *view,
+				 uint32_t *last_uid_r);
+void mail_index_strmap_view_sync_add(struct mail_index_strmap_view_sync *sync,
+				     uint32_t uid, uint32_t ref_index,
+				     const char *key);
+void mail_index_strmap_view_sync_add_unique(struct mail_index_strmap_view_sync *sync,
+					    uint32_t uid, uint32_t ref_index);
+void mail_index_strmap_view_sync_commit(struct mail_index_strmap_view_sync **sync);
+void mail_index_strmap_view_sync_rollback(struct mail_index_strmap_view_sync **sync);
+
+#endif
--- a/src/lib-index/mail-index.c	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-index/mail-index.c	Mon Sep 01 15:17:00 2008 +0300
@@ -690,3 +690,50 @@
 		 ((offset & 0x7f000000) >> 24)) << 2;
 }
 #endif
+
+void mail_index_pack_num(uint8_t **p, uint32_t num)
+{
+	/* number continues as long as the highest bit is set */
+	while (num >= 0x80) {
+		**p = (num & 0x7f) | 0x80;
+		*p += 1;
+		num >>= 7;
+	}
+
+	**p = num;
+	*p += 1;
+}
+
+int mail_index_unpack_num(const uint8_t **p, const uint8_t *end,
+			  uint32_t *num_r)
+{
+	const uint8_t *c = *p;
+	uint32_t value = 0;
+	unsigned int bits = 0;
+
+	for (;;) {
+		if (unlikely(c == end)) {
+			/* we should never see EOF */
+			*num_r = 0;
+			return -1;
+		}
+
+		value |= (*c & 0x7f) << bits;
+		if (*c < 0x80)
+			break;
+
+		bits += 7;
+		c++;
+	}
+
+	if (unlikely(bits >= 32)) {
+		/* broken input */
+		*p = end;
+		*num_r = 0;
+		return -1;
+	}
+
+	*p = c + 1;
+	*num_r = value;
+	return 0;
+}
--- a/src/lib-storage/index/Makefile.am	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-storage/index/Makefile.am	Mon Sep 01 15:17:00 2008 +0300
@@ -26,7 +26,6 @@
 	index-thread.c \
 	index-thread-finish.c \
 	index-thread-links.c \
-	index-thread-list.c \
 	index-transaction.c
 
 headers = \
--- a/src/lib-storage/index/index-search.c	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-storage/index/index-search.c	Mon Sep 01 15:17:00 2008 +0300
@@ -986,8 +986,7 @@
 
 	mail_search_args_reset(ctx->mail_ctx.args->args, TRUE);
 	if (args->have_inthreads) {
-		if (mail_thread_init(_t->box, FALSE, NULL,
-				     &ctx->thread_ctx) < 0)
+		if (mail_thread_init(_t->box, NULL, &ctx->thread_ctx) < 0)
 			ctx->failed = TRUE;
 		if (search_build_inthreads(ctx, args->args) < 0)
 			ctx->failed = TRUE;
--- a/src/lib-storage/index/index-thread-finish.c	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-storage/index/index-thread-finish.c	Mon Sep 01 15:17:00 2008 +0300
@@ -2,6 +2,7 @@
 
 #include "lib.h"
 #include "array.h"
+#include "hash.h"
 #include "imap-base-subject.h"
 #include "mail-storage-private.h"
 #include "index-thread-private.h"
@@ -36,8 +37,7 @@
 	unsigned int refcount;
 
 	struct mail *tmp_mail;
-	struct mail_hash_transaction *hash_trans;
-	struct mail_thread_list_context *hash_list_ctx;
+	struct mail_thread_cache *cache;
 
 	ARRAY_DEFINE(roots, struct mail_thread_root_node);
 	ARRAY_DEFINE(shadow_nodes, struct mail_thread_shadow_node);
@@ -133,13 +133,13 @@
 {
 	const struct mail_thread_node *node;
 
-	node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-	i_assert(MAIL_INDEX_NODE_EXISTS(node));
-	i_assert(node->uid_or_id != 0);
-	return node->uid_or_id;
+	node = array_idx(&ctx->cache->thread_nodes, idx);
+	i_assert(MAIL_THREAD_NODE_EXISTS(node));
+	i_assert(node->uid != 0);
+	return node->uid;
 }
 
-static int
+static void
 thread_child_node_fill(struct thread_finish_context *ctx,
 		       struct mail_thread_child_node *child)
 {
@@ -150,9 +150,7 @@
 	if (!mail_set_uid(ctx->tmp_mail, child->uid)) {
 		/* the UID should have existed. we would have rebuild
 		   the thread tree otherwise. */
-		mail_hash_transaction_set_corrupted(ctx->hash_trans,
-			t_strdup_printf("Found expunged UID %u", child->uid));
-		return -1;
+		i_unreached();
 	}
 
 	/* get sent date if we want to use it and if it's valid */
@@ -165,10 +163,9 @@
 		/* fallback to received date */
 		(void)mail_get_received_date(ctx->tmp_mail, &child->sort_date);
 	}
-	return 0;
 }
 
-static int
+static void
 thread_sort_children(struct thread_finish_context *ctx, uint32_t parent_idx,
 		     ARRAY_TYPE(mail_thread_child_node) *sorted_children)
 {
@@ -188,11 +185,10 @@
 		child.uid = thread_lookup_existing(ctx, child.idx);
 
 		array_append(sorted_children, &child, 1);
-		return 0;
+		return;
 	}
 	while (child.idx != 0) {
-		if (thread_child_node_fill(ctx, &child) < 0)
-			return -1;
+		thread_child_node_fill(ctx, &child);
 
 		array_append(sorted_children, &child, 1);
 		child.idx = shadows[child.idx].next_sibling_idx;
@@ -201,10 +197,9 @@
 	/* sort the children */
 	children = array_get_modifiable(sorted_children, &count);
 	qsort(children, count, sizeof(*children), mail_thread_child_node_cmp);
-	return 0;
 }
 
-static int gather_base_subjects(struct thread_finish_context *ctx)
+static void gather_base_subjects(struct thread_finish_context *ctx)
 {
 	struct subject_gather_context gather_ctx;
 	struct mail_thread_root_node *roots;
@@ -213,12 +208,13 @@
 	ARRAY_TYPE(mail_thread_child_node) sorted_children;
 	const struct mail_thread_child_node *children;
 	uint32_t idx, uid;
-	int ret = 0;
 
 	memset(&gather_ctx, 0, sizeof(gather_ctx));
 	gather_ctx.ctx = ctx;
 
 	roots = array_get_modifiable(&ctx->roots, &count);
+	if (count == 0)
+		return;
 	gather_ctx.subject_pool =
 		pool_alloconly_create(MEMPOOL_GROWING"base subjects",
 				      nearest_power(count * 20));
@@ -233,11 +229,8 @@
 			idx = roots[i].node.idx;
 		else if (!roots[i].ignore) {
 			/* find the oldest child */
-			if (thread_sort_children(ctx, roots[i].node.idx,
-						 &sorted_children) < 0) {
-				ret = -1;
-				break;
-			}
+			thread_sort_children(ctx, roots[i].node.idx,
+					     &sorted_children);
 			children = array_idx(&sorted_children, 0);
 			idx = children[0].idx;
 		} else {
@@ -249,10 +242,7 @@
 		if (!mail_set_uid(ctx->tmp_mail, uid)) {
 			/* the UID should have existed. we would have rebuild
 			   the thread tree otherwise. */
-			mail_hash_transaction_set_corrupted(ctx->hash_trans,
-				"Found expunged UID");
-			ret = -1;
-			break;
+			i_unreached();
 		}
 		if (mail_get_first_header(ctx->tmp_mail, HDR_SUBJECT,
 					  &subject) > 0) T_BEGIN {
@@ -263,8 +253,6 @@
 	array_free(&sorted_children);
 	hash_destroy(&gather_ctx.subject_hash);
 	pool_unref(&gather_ctx.subject_pool);
-
-	return ret;
 }
 
 static void thread_add_shadow_child(struct thread_finish_context *ctx,
@@ -374,19 +362,18 @@
 	return changed;
 }
 
-static int sort_root_nodes(struct thread_finish_context *ctx)
+static void sort_root_nodes(struct thread_finish_context *ctx)
 {
 	ARRAY_TYPE(mail_thread_child_node) sorted_children;
 	const struct mail_thread_child_node *children;
 	const struct mail_thread_shadow_node *shadows;
 	struct mail_thread_root_node *roots;
 	unsigned int i, count, child_count;
-	int ret = 0;
 
 	i_array_init(&sorted_children, 64);
 	shadows = array_idx(&ctx->shadow_nodes, 0);
 	roots = array_get_modifiable(&ctx->roots, &count);
-	for (i = 0; i < count && ret == 0; i++) {
+	for (i = 0; i < count; i++) {
 		if (roots[i].ignore)
 			continue;
 		if (roots[i].dummy) {
@@ -396,33 +383,25 @@
 				roots[i].ignore = TRUE;
 				continue;
 			}
-			if (thread_sort_children(ctx, roots[i].node.idx,
-						 &sorted_children) < 0) {
-				ret = -1;
-				break;
-			}
+			thread_sort_children(ctx, roots[i].node.idx,
+					     &sorted_children);
 			children = array_get(&sorted_children, &child_count);
 			if (child_count == 1) {
 				/* only one child - deferred step (3).
 				   promote the child to the root. */
 				roots[i].node = children[0];
-				ret = thread_child_node_fill(ctx,
-							     &roots[i].node);
+				thread_child_node_fill(ctx, &roots[i].node);
 				roots[i].dummy = FALSE;
 			} else {
 				roots[i].node.uid = children[0].uid;
 				roots[i].node.sort_date = children[0].sort_date;
 			}
 		} else {
-			ret = thread_child_node_fill(ctx, &roots[i].node);
+			thread_child_node_fill(ctx, &roots[i].node);
 		}
 	}
 	array_free(&sorted_children);
-	if (ret < 0)
-		return -1;
-
 	qsort(roots, count, sizeof(*roots), mail_thread_child_node_cmp);
-	return 0;
 }
 
 static int mail_thread_root_node_idx_cmp(const void *key, const void *value)
@@ -434,10 +413,10 @@
 		*idx > root->node.idx ? 1 : 0;
 }
 
-static int sort_root_nodes_ref2(struct thread_finish_context *ctx,
-				uint32_t record_count)
+static void sort_root_nodes_ref2(struct thread_finish_context *ctx,
+				 uint32_t record_count)
 {
-	struct mail_thread_node *node;
+	const struct mail_thread_node *node;
 	struct mail_thread_root_node *roots, *root;
 	struct mail_thread_child_node child;
 	unsigned int root_count;
@@ -445,20 +424,18 @@
 
 	roots = array_get_modifiable(&ctx->roots, &root_count);
 	for (idx = 1; idx < record_count; idx++) {
-		node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-		if (MAIL_HASH_RECORD_IS_DELETED(&node->rec) ||
-		    !MAIL_INDEX_NODE_EXISTS(node))
+		node = array_idx(&ctx->cache->thread_nodes, idx);
+		if (!MAIL_THREAD_NODE_EXISTS(node))
 			continue;
 
 		child.idx = idx;
-		if (thread_child_node_fill(ctx, &child) < 0)
-			return -1;
+		thread_child_node_fill(ctx, &child);
 
 		parent_idx = idx;
 		while (node->parent_idx != 0) {
 			parent_idx = node->parent_idx;
-			node = mail_hash_lookup_idx(ctx->hash_trans,
-						    node->parent_idx);
+			node = array_idx(&ctx->cache->thread_nodes,
+					 node->parent_idx);
 		}
 		root = bsearch(&parent_idx, roots, root_count, sizeof(*roots),
 			       mail_thread_root_node_idx_cmp);
@@ -468,66 +445,45 @@
 			root->node.sort_date = child.sort_date;
 	}
 	qsort(roots, root_count, sizeof(*roots), mail_thread_child_node_cmp);
-	return 0;
 }
 
-static int mail_thread_create_shadows(struct thread_finish_context *ctx,
-				      uint32_t record_count)
+static void mail_thread_create_shadows(struct thread_finish_context *ctx,
+				       uint32_t record_count)
 {
-	struct mail_thread_list_update_context *thread_list_ctx;
-	struct mail_thread_node *node, *parent;
+	const struct mail_thread_node *node, *parent;
 	struct mail_thread_root_node root;
 	struct mail_thread_child_node child;
-	struct mail_hash *hash;
-	uint8_t *referenced;
-	uint32_t idx, i, j, parent_idx, alloc_size, max;
-	int ret = 0;
+	uint32_t idx, parent_idx;
 
 	ctx->use_sent_date = FALSE;
 
 	memset(&root, 0, sizeof(root));
 	memset(&child, 0, sizeof(child));
 
-	alloc_size = record_count/8 + 1;
-	referenced = i_new(uint8_t, alloc_size);
+	/* We may see dummy messages without parents or children. We can't
+	   free them since the nodes are in an array, but they may get reused
+	   later so just leave them be. With the current algorithm when this
+	   happens all the struct fields are always zero at that point, so
+	   we don't even have to try to zero them. */
 	for (idx = 1; idx < record_count; idx++) {
-		node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-		if (MAIL_HASH_RECORD_IS_DELETED(&node->rec)) {
-			/* @UNSAFE: mark deleted records as referenced, so we
-			   don't waste time with them */
-			referenced[idx / 8] |= 1 << (idx % 8);
-			continue;
-		}
-
-		if (MAIL_INDEX_NODE_EXISTS(node)) {
-			/* @UNSAFE: don't remove existing nodes */
-			referenced[idx / 8] |= 1 << (idx % 8);
-		}
+		node = array_idx(&ctx->cache->thread_nodes, idx);
 
 		if (node->parent_idx == 0) {
 			/* root node - add to roots list */
 			root.node.idx = idx;
-			if (!MAIL_INDEX_NODE_EXISTS(node)) {
+			if (!MAIL_THREAD_NODE_EXISTS(node)) {
 				root.dummy = TRUE;
 				root.node.uid = 0;
 			} else {
 				root.dummy = FALSE;
-				root.node.uid = node->uid_or_id;
+				root.node.uid = node->uid;
 			}
 			array_append(&ctx->roots, &root, 1);
 			continue;
 		}
-		if (node->parent_idx >= record_count) {
-			mail_hash_transaction_set_corrupted(ctx->hash_trans,
-				"parent_idx too large");
-			ret = -1;
-			break;
-		}
+		i_assert(node->parent_idx < record_count);
 
-		/* @UNSAFE: keep track of referenced nodes */
-		referenced[node->parent_idx / 8] |= 1 << (node->parent_idx % 8);
-
-		if (!MAIL_INDEX_NODE_EXISTS(node)) {
+		if (!MAIL_THREAD_NODE_EXISTS(node)) {
 			/* dummy node */
 			continue;
 		}
@@ -536,96 +492,52 @@
 		   node as its child. If there are no non-dummy
 		   parents, add it as the highest dummy's child. */
 		parent_idx = node->parent_idx;
-		parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx);
-		while (!MAIL_INDEX_NODE_EXISTS(parent) &&
+		parent = array_idx(&ctx->cache->thread_nodes, parent_idx);
+		while (!MAIL_THREAD_NODE_EXISTS(parent) &&
 		       parent->parent_idx != 0) {
 			parent_idx = parent->parent_idx;
-			parent = mail_hash_lookup_idx(ctx->hash_trans,
-						      parent_idx);
+			parent = array_idx(&ctx->cache->thread_nodes,
+					   parent_idx);
 		}
 		thread_add_shadow_child(ctx, parent_idx, idx);
 	}
-
-	/* remove unneeded records from hash */
-	hash = mail_hash_transaction_get_hash(ctx->hash_trans);
-	if (ret < 0 || mail_hash_get_lock_type(hash) != F_WRLCK) {
-		/* thread index isn't locked, can't delete anything */
-		i_free(referenced);
-		return ret;
-	}
-
-	/* FIXME: we could remove everything from thread list by pointing to
-	   existing messages' References: headers */
-	thread_list_ctx = mail_thread_list_update_begin(ctx->hash_list_ctx,
-							ctx->hash_trans);
-	for (i = 0; i < alloc_size; i++) {
-		if (referenced[i] == 0xff)
-			continue;
-
-		j = i == 0 ? 1 : 0;
-		max = i+1 < alloc_size ? 8 : (record_count % 8);
-		for (; j < max; j++) {
-			if ((referenced[i] & (1 << j)) != 0)
-				continue;
-
-			idx = i*8 + j;
-			node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-
-			if (node->ref_index == MAIL_INDEX_NODE_REF_EXT) {
-				mail_thread_list_remove(thread_list_ctx,
-							node->uid_or_id);
-			}
-			mail_hash_remove(ctx->hash_trans, idx,
-					 node->msgid_crc32);
-		}
-	}
-	i_free(referenced);
-	return mail_thread_list_commit(&thread_list_ctx);
 }
 
-static int mail_thread_finish(struct thread_finish_context *ctx,
-			      enum mail_thread_type thread_type)
+static void mail_thread_finish(struct thread_finish_context *ctx,
+			       enum mail_thread_type thread_type)
 {
-	const struct mail_hash_header *hdr;
+	unsigned int record_count = array_count(&ctx->cache->thread_nodes);
 
-	hdr = mail_hash_get_header(ctx->hash_trans);
-	i_assert(hdr->record_count > 0);
-	ctx->next_new_root_idx = hdr->record_count + 1;
+	ctx->next_new_root_idx = record_count + 1;
 
 	/* (2) save root nodes and (3) remove dummy messages */
-	i_array_init(&ctx->roots, I_MIN(128, hdr->record_count));
-	i_array_init(&ctx->shadow_nodes, hdr->record_count);
+	i_array_init(&ctx->roots, I_MIN(128, record_count));
+	i_array_init(&ctx->shadow_nodes, record_count);
 	/* make sure all shadow indexes are accessible directly. */
-	(void)array_idx_modifiable(&ctx->shadow_nodes, hdr->record_count);
+	(void)array_idx_modifiable(&ctx->shadow_nodes, record_count);
 
-	if (mail_thread_create_shadows(ctx, hdr->record_count) < 0)
-		return -1;
+	mail_thread_create_shadows(ctx, record_count);
 
 	/* (4) */
 	ctx->use_sent_date = TRUE;
 	switch (thread_type) {
 	case MAIL_THREAD_REFERENCES:
-		if (sort_root_nodes(ctx) < 0)
-			return -1;
+		sort_root_nodes(ctx);
 		/* (5) Gather together messages under the root that have
 		   the same base subject text. */
-		if (gather_base_subjects(ctx) < 0)
-			return -1;
+		gather_base_subjects(ctx);
 		/* (5.C) Merge threads with the same thread subject. */
 		if (merge_subject_threads(ctx)) {
 			/* root ordering may have changed, sort them again. */
-			if (sort_root_nodes(ctx) < 0)
-				return -1;
+			sort_root_nodes(ctx);
 		}
 		break;
 	case MAIL_THREAD_REFERENCES2:
-		if (sort_root_nodes_ref2(ctx, hdr->record_count) < 0)
-			return -1;
+		sort_root_nodes_ref2(ctx, record_count);
 		break;
 	default:
 		i_unreached();
 	}
-	return 0;
 }
 
 static void
@@ -643,15 +555,10 @@
 			/* dummy root */
 			if (root)
 				continue;
+			i_unreached();
 		} else {
 			mailbox_get_seq_range(box, uid, uid, &seq, &seq);
-		}
-		if (uid == 0) {
-			mail_hash_transaction_set_corrupted(
-				iter->ctx->hash_trans,
-				t_strdup_printf("Found expunged UID %u", uid));
-			iter->failed = TRUE;
-			seq = 0;
+			i_assert(seq != 0);
 		}
 		children[i].uid = seq;
 	}
@@ -685,45 +592,33 @@
 	child_iter->ctx->refcount++;
 
 	i_array_init(&child_iter->children, 8);
-	if (thread_sort_children(child_iter->ctx, parent_idx,
-				 &child_iter->children) < 0) {
-		child_iter->failed = TRUE;
-		array_clear(&child_iter->children);
-	} else {
-		if (child_iter->ctx->return_seqs)
-			nodes_change_uids_to_seqs(child_iter, FALSE);
-	}
+	thread_sort_children(child_iter->ctx, parent_idx,
+			     &child_iter->children);
+	if (child_iter->ctx->return_seqs)
+		nodes_change_uids_to_seqs(child_iter, FALSE);
 	return child_iter;
 }
 
 struct mail_thread_iterate_context *
-mail_thread_iterate_init_full(struct mail *tmp_mail,
-			      struct mail_hash_transaction *hash_trans,
-			      struct mail_thread_list_context *hash_list_ctx,
+mail_thread_iterate_init_full(struct mail_thread_cache *cache,
+			      struct mail *tmp_mail,
 			      enum mail_thread_type thread_type,
 			      bool return_seqs)
 {
 	struct mail_thread_iterate_context *iter;
 	struct thread_finish_context *ctx;
-	struct mail_hash *hash;
 
 	iter = i_new(struct mail_thread_iterate_context, 1);
 	ctx = iter->ctx = i_new(struct thread_finish_context, 1);
 	ctx->refcount = 1;
+	ctx->cache = cache;
 	ctx->tmp_mail = tmp_mail;
-	ctx->hash_trans = hash_trans;
-	ctx->hash_list_ctx = hash_list_ctx;
 	ctx->return_seqs = return_seqs;
-	if (mail_thread_finish(ctx, thread_type) < 0)
-		iter->failed = TRUE;
-	else {
-		hash = mail_hash_transaction_get_hash(hash_trans);
-		if (mail_hash_get_lock_type(hash) == F_WRLCK)
-			(void)mail_hash_transaction_write(hash_trans);
-		mail_thread_iterate_fill_root(iter);
-		if (return_seqs)
-			nodes_change_uids_to_seqs(iter, TRUE);
-	}
+	mail_thread_finish(ctx, thread_type);
+
+	mail_thread_iterate_fill_root(iter);
+	if (return_seqs)
+		nodes_change_uids_to_seqs(iter, TRUE);
 	return iter;
 }
 
@@ -756,14 +651,9 @@
 int mail_thread_iterate_deinit(struct mail_thread_iterate_context **_iter)
 {
 	struct mail_thread_iterate_context *iter = *_iter;
-	int ret = iter->failed ? -1 : 0;
 
 	*_iter = NULL;
 
-	if (ret < 0) {
-		struct mail *mail = iter->ctx->tmp_mail;
-		mail_storage_set_internal_error(mail->box->storage);
-	}
 	if (--iter->ctx->refcount == 0) {
 		array_free(&iter->ctx->roots);
 		array_free(&iter->ctx->shadow_nodes);
@@ -771,5 +661,5 @@
 	}
 	array_free(&iter->children);
 	i_free(iter);
-	return ret;
+	return 0;
 }
--- a/src/lib-storage/index/index-thread-links.c	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-storage/index/index-thread-links.c	Mon Sep 01 15:17:00 2008 +0300
@@ -6,85 +6,30 @@
 #include "mail-storage.h"
 #include "index-thread-private.h"
 
-static struct mail_thread_node *
-thread_msgid_get(struct mail_thread_update_context *ctx, uint32_t ref_uid,
-		 unsigned int ref_index, const char *msgid, uint32_t *idx_r)
+static uint32_t thread_msg_add(struct mail_thread_cache *cache,
+			       uint32_t uid, uint32_t msgid_idx)
 {
-	struct mail_thread_node *node, new_node;
-	struct msgid_search_key key;
-	const char **msgidp;
-	uint32_t idx;
+	struct mail_thread_node *node;
 
-	i_assert(ref_index != MAIL_INDEX_NODE_REF_MSGID);
-
-	key.msgid = msgid;
-	key.msgid_crc32 = crc32_str_nonzero(msgid);
+	i_assert(msgid_idx != 0);
+	i_assert(msgid_idx < cache->first_invalid_msgid_str_idx);
 
-	node = mail_hash_lookup(ctx->hash_trans, &key, &idx);
-	if (node == NULL) {
-		/* not found, create */
-		memset(&new_node, 0, sizeof(new_node));
-		new_node.msgid_crc32 = key.msgid_crc32;
-		if (ref_index <= MAIL_INDEX_NODE_REF_MAX_VALUE) {
-			new_node.uid_or_id = ref_uid;
-			new_node.ref_index = ref_index;
-		} else {
-			new_node.uid_or_id =
-				mail_thread_list_add(ctx->thread_list_ctx,
-						     msgid);
-			new_node.ref_index = MAIL_INDEX_NODE_REF_EXT;
-		}
+	node = array_idx_modifiable(&cache->thread_nodes, msgid_idx);
+	if (node->uid == 0)
+		node->uid = uid;
+	else {
+		/* duplicate message-id, keep the original.
+		   if the original ever gets expunged, rebuild. */
+		node->expunge_rebuilds = TRUE;
 
-		mail_hash_insert(ctx->hash_trans, &key, &new_node, &idx);
-		node = mail_hash_lookup_idx(ctx->hash_trans, idx);
+		msgid_idx = cache->next_invalid_msgid_str_idx++;
+		node = array_idx_modifiable(&cache->thread_nodes, msgid_idx);
+		node->uid = uid;
 	}
-
-	/* keep message-ids cached */
-	msgidp = array_idx_modifiable(&ctx->msgid_cache, idx);
-	if (*msgidp == NULL)
-		*msgidp = p_strdup(ctx->msgid_pool, msgid);
-
-	*idx_r = idx;
-	return node;
+	return msgid_idx;
 }
 
-static void thread_msg_add(struct mail_thread_update_context *ctx, uint32_t uid,
-			   const char *msgid, uint32_t *idx_r)
-{
-	struct mail_thread_node *node;
-	struct mail_thread_node unode;
-
-	if (msgid != NULL) {
-		node = thread_msgid_get(ctx, 0, 0, msgid, idx_r);
-		if (!MAIL_INDEX_NODE_EXISTS(node)) {
-			/* add UID to node */
-			if (node->ref_index == MAIL_INDEX_NODE_REF_EXT &&
-			    node->uid_or_id != 0) {
-				mail_thread_list_remove(ctx->thread_list_ctx,
-							node->uid_or_id);
-			}
-
-			node->uid_or_id = uid;
-			node->ref_index = MAIL_INDEX_NODE_REF_MSGID;
-		} else {
-			/* duplicate, keep the original. if the original ever
-			   gets expunged, rebuild. */
-			node->expunge_rebuilds = TRUE;
-			msgid = NULL;
-		}
-		mail_hash_update(ctx->hash_trans, *idx_r);
-	}
-
-	if (msgid == NULL) {
-		/* no valid message-id */
-		memset(&unode, 0, sizeof(unode));
-		unode.uid_or_id = uid;
-		unode.ref_index = MAIL_INDEX_NODE_REF_MSGID;
-		mail_hash_insert(ctx->hash_trans, NULL, &unode, idx_r);
-	}
-}
-
-static bool thread_node_has_ancestor(struct mail_thread_update_context *ctx,
+static bool thread_node_has_ancestor(struct mail_thread_cache *cache,
 				     const struct mail_thread_node *node,
 				     const struct mail_thread_node *ancestor)
 {
@@ -92,24 +37,24 @@
 		if (node->parent_idx == 0)
 			return FALSE;
 
-		node = mail_hash_lookup_idx(ctx->hash_trans, node->parent_idx);
+		node = array_idx(&cache->thread_nodes, node->parent_idx);
 	}
 	return TRUE;
 }
 
-static void thread_link_reference(struct mail_thread_update_context *ctx,
+static void thread_link_reference(struct mail_thread_cache *cache,
 				  uint32_t parent_idx, uint32_t child_idx)
 {
 	struct mail_thread_node *node, *parent, *child;
 	uint32_t idx;
 
-	parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx);
-	child = mail_hash_lookup_idx(ctx->hash_trans, child_idx);
+	i_assert(parent_idx < cache->first_invalid_msgid_str_idx);
+
+	parent = array_idx_modifiable(&cache->thread_nodes, parent_idx);
+	child = array_idx_modifiable(&cache->thread_nodes, child_idx);
 
 	child->parent_link_refcount++;
-	mail_hash_update(ctx->hash_trans, child_idx);
-
-	if (thread_node_has_ancestor(ctx, parent, child)) {
+	if (thread_node_has_ancestor(cache, parent, child)) {
 		if (parent == child) {
 			/* loops to itself - ignore */
 			return;
@@ -130,14 +75,9 @@
 		node = parent;
 		do {
 			idx = node->parent_idx;
-			if (idx == 0) {
-				/* earlier lookup_idx() failed */
-				ctx->failed = TRUE;
-				break;
-			}
-			node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-			node->parent_unref_rebuilds = TRUE;
-			mail_hash_update(ctx->hash_trans, idx);
+			i_assert(idx != 0);
+			node = array_idx_modifiable(&cache->thread_nodes, idx);
+			node->child_unref_rebuilds = TRUE;
 		} while (node != child);
 		return;
 	} else if (child->parent_idx == parent_idx) {
@@ -146,11 +86,11 @@
 	}
 
 	/* Set parent_node as child_node's parent */
-	if (child->parent_idx == 0)
+	if (child->parent_idx == 0) {
 		child->parent_idx = parent_idx;
-	else {
+	} else {
 		/* Conflicting parent already exists, keep the original */
-		if (MAIL_INDEX_NODE_EXISTS(child)) {
+		if (MAIL_THREAD_NODE_EXISTS(child)) {
 			/* If this message gets expunged,
 			   the parent is changed. */
 			child->expunge_rebuilds = TRUE;
@@ -166,304 +106,126 @@
 
 			   b) is probably the least likely to happen,
 			   so use it */
-			child->parent_unref_rebuilds = TRUE;
-		}
-	}
-	mail_hash_update(ctx->hash_trans, child_idx);
-}
-
-struct thread_message_id {
-	const char *str;
-	uint32_t crc32;
-	unsigned int collisions_after;
-};
-
-static const char *
-thread_link_references(struct mail_thread_update_context *ctx, uint32_t ref_uid,
-		       const char *references)
-{
-	ARRAY_DEFINE(id_arr, struct thread_message_id);
-	struct thread_message_id *ids;
-	struct thread_message_id id;
-	uint32_t parent_idx, child_idx;
-	unsigned int i, j, count, ref_index;
-
-	/* put all message IDs to an array */
-	memset(&id, 0, sizeof(id));
-	t_array_init(&id_arr, 32);
-	while ((id.str = message_id_get_next(&references)) != NULL) {
-		id.crc32 = crc32_str_nonzero(id.str);
-		array_append(&id_arr, &id, 1);
-	}
-
-	ids = array_get_modifiable(&id_arr, &count);
-	if (count <= 1)
-		return count == 0 ? NULL : ids[0].str;
-
-	/* count collisions */
-	for (i = 0; i < count; i++) {
-		for (j = i + 1; j < count; j++) {
-			if (ids[i].crc32 == ids[j].crc32)
-				ids[i].collisions_after++;
+			child->child_unref_rebuilds = TRUE;
 		}
 	}
-
-	ref_index = MAIL_INDEX_NODE_REF_REFERENCES_LAST +
-		ids[0].collisions_after;
-	thread_msgid_get(ctx, ref_uid, ref_index, ids[0].str, &parent_idx);
-	for (i = 1; i < count; i++) {
-		ref_index = MAIL_INDEX_NODE_REF_REFERENCES_LAST +
-			ids[i].collisions_after;
-		thread_msgid_get(ctx, ref_uid, ref_index,
-				 ids[i].str, &child_idx);
-		thread_link_reference(ctx, parent_idx, child_idx);
-		parent_idx = child_idx;
-	}
-
-	/* link the last ID to us */
-	return ids[count-1].str;
-}
-
-static int thread_get_mail_header(struct mail *mail, const char *name,
-				  const char **value_r)
-{
-	if (mail_get_first_header(mail, name, value_r) < 0) {
-		if (!mail->expunged)
-			return -1;
-
-		/* Message is expunged. Instead of failing the entire THREAD
-		   command, just treat the header as non-existing. */
-		*value_r = NULL;
-	}
-	return 0;
 }
 
-int mail_thread_add(struct mail_thread_update_context *ctx, struct mail *mail)
+static uint32_t
+thread_link_references(struct mail_thread_cache *cache, uint32_t uid,
+		       const struct mail_index_strmap_rec *msgid_map,
+		       unsigned int *msgid_map_idx)
 {
-	const char *message_id, *in_reply_to, *references, *parent_msgid;
-	const struct mail_thread_node *parent, *old_parent;
-	struct mail_hash_header *hdr;
+	uint32_t parent_idx;
+
+	if (msgid_map->uid != uid)
+		return 0;
+
+	parent_idx = msgid_map->str_idx;
+	msgid_map++;
+	*msgid_map_idx += 1;
+
+	for (; msgid_map->uid == uid; msgid_map++) {
+		thread_link_reference(cache, parent_idx, msgid_map->str_idx);
+		parent_idx = msgid_map->str_idx;
+		*msgid_map_idx += 1;
+	}
+	i_assert(parent_idx < cache->first_invalid_msgid_str_idx);
+	return parent_idx;
+}
+
+void mail_thread_add(struct mail_thread_cache *cache,
+		     const struct mail_index_strmap_rec *msgid_map,
+		     unsigned int *msgid_map_idx)
+{
 	struct mail_thread_node *node;
 	uint32_t idx, parent_idx;
-	unsigned int ref_index;
-
-	hdr = mail_hash_get_header(ctx->hash_trans);
-	i_assert(mail->uid > hdr->last_uid);
-	hdr->last_uid = mail->uid;
-	hdr->message_count++;
 
-	if (thread_get_mail_header(mail, HDR_MESSAGE_ID, &message_id) < 0 ||
-	    thread_get_mail_header(mail, HDR_REFERENCES, &references) < 0)
-		return -1;
+	i_assert(msgid_map->ref_index == MAIL_THREAD_NODE_REF_MSGID);
+	i_assert(cache->last_uid <= msgid_map->uid);
 
-	thread_msg_add(ctx, mail->uid, message_id_get_next(&message_id), &idx);
-	parent_msgid = thread_link_references(ctx, mail->uid, references);
+	cache->last_uid = msgid_map->uid;
 
-	if (parent_msgid != NULL)
-		ref_index = MAIL_INDEX_NODE_REF_REFERENCES_LAST;
-	else {
-		/* no valid IDs in References:, use In-Reply-To: instead */
-		if (thread_get_mail_header(mail, HDR_IN_REPLY_TO,
-					   &in_reply_to) < 0)
-			return -1;
-		parent_msgid = message_id_get_next(&in_reply_to);
-		ref_index = MAIL_INDEX_NODE_REF_INREPLYTO;
-	}
-	parent = parent_msgid == NULL ? NULL :
-		thread_msgid_get(ctx, mail->uid, ref_index,
-				 parent_msgid, &parent_idx);
+	idx = thread_msg_add(cache, msgid_map->uid, msgid_map->str_idx);
+	parent_idx = thread_link_references(cache, msgid_map->uid,
+					    msgid_map + 1, msgid_map_idx);
 
-	node = mail_hash_lookup_idx(ctx->hash_trans, idx);
-	old_parent = node->parent_idx == 0 ? NULL :
-		mail_hash_lookup_idx(ctx->hash_trans, node->parent_idx);
-
-	if (old_parent != NULL &&
-	    (parent == NULL || old_parent->parent_idx != parent_idx)) {
+	node = array_idx_modifiable(&cache->thread_nodes, idx);
+	if (node->parent_idx != parent_idx && node->parent_idx != 0) {
 		/* conflicting parent, remove it. */
 		node->parent_idx = 0;
 		/* If this message gets expunged, we have to revert back to
 		   the original parent. */
 		node->expunge_rebuilds = TRUE;
-		mail_hash_update(ctx->hash_trans, idx);
 	}
-	if (parent != NULL)
-		thread_link_reference(ctx, parent_idx, idx);
-	return 0;
+	if (parent_idx != 0)
+		thread_link_reference(cache, parent_idx, idx);
+	*msgid_map_idx += 1;
 }
 
 static bool
-mail_thread_node_lookup(struct mail_thread_update_context *ctx, uint32_t uid,
-			uint32_t *idx_r, const char **msgid_r,
-			struct mail_thread_node **node_r)
-{
-	struct mail_thread_node *node;
-	struct msgid_search_key key;
-	const char *msgids;
-	int ret;
-
-	if (!mail_set_uid(ctx->tmp_mail, uid))
-		return FALSE;
-
-	ret = mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID, &msgids);
-	if (ret <= 0)
-		return FALSE;
-
-	key.msgid = message_id_get_next(&msgids);
-	if (key.msgid == NULL)
-		return FALSE;
-	key.msgid_crc32 = crc32_str_nonzero(key.msgid);
-
-	node = mail_hash_lookup(ctx->hash_trans, &key, idx_r);
-	if (node == NULL)
-		return FALSE;
-
-	if (node->ref_index != MAIL_INDEX_NODE_REF_MSGID ||
-	    node->uid_or_id != uid) {
-		/* duplicate Message-ID probably */
-		return FALSE;
-	}
-	*msgid_r = key.msgid;
-	*node_r = node;
-	return TRUE;
-}
-
-static bool
-thread_msgid_lookup(struct mail_thread_update_context *ctx, const char *msgid,
-		    uint32_t *idx_r)
-{
-	struct msgid_search_key key;
-
-	key.msgid = msgid;
-	key.msgid_crc32 = crc32_str_nonzero(msgid);
-
-	ctx->cmp_match_count = 0;
-	ctx->cmp_last_idx = 0;
-
-	if (mail_hash_lookup(ctx->hash_trans, &key, idx_r) == NULL) {
-		if (ctx->cmp_match_count != 1 || ctx->failed) {
-			/* couldn't find the message-id */
-			return FALSE;
-		}
-
-		/* there's only one key with this crc32 value, so it
-		   must be what we're looking for */
-		*idx_r = ctx->cmp_last_idx;
-	}
-	return TRUE;
-}
-
-static bool
-thread_unref_msgid(struct mail_thread_update_context *ctx,
-		   uint32_t ref_uid, uint32_t parent_idx,
-		   const char *child_msgid, uint32_t child_idx)
+mail_thread_unref_link(struct mail_thread_cache *cache,
+		       uint32_t parent_idx, uint32_t child_idx)
 {
 	struct mail_thread_node *parent, *child;
 
-	parent = mail_hash_lookup_idx(ctx->hash_trans, parent_idx);
-	if (parent->parent_unref_rebuilds)
+	parent = array_idx_modifiable(&cache->thread_nodes, parent_idx);
+	if (parent->child_unref_rebuilds)
 		return FALSE;
 
-	child = mail_hash_lookup_idx(ctx->hash_trans, child_idx);
-	if (child->parent_link_refcount == 0) {
-		mail_hash_transaction_set_corrupted(ctx->hash_trans,
-						    "unexpected refcount=0");
-		return FALSE;
-	}
+	child = array_idx_modifiable(&cache->thread_nodes, child_idx);
+	i_assert(child->parent_link_refcount > 0);
+
 	child->parent_link_refcount--;
 	if (child->parent_link_refcount == 0) {
 		/* we don't have a root anymore */
 		child->parent_idx = 0;
 	}
-
-	if (child->uid_or_id == ref_uid &&
-	    child->ref_index != MAIL_INDEX_NODE_REF_EXT) {
-		child->uid_or_id =
-			mail_thread_list_add(ctx->thread_list_ctx, child_msgid);
-		child->ref_index = MAIL_INDEX_NODE_REF_EXT;
-	}
-	mail_hash_update(ctx->hash_trans, child_idx);
-
-	if (parent->uid_or_id == ref_uid &&
-	    parent->ref_index != MAIL_INDEX_NODE_REF_EXT) {
-		parent->uid_or_id =
-			mail_thread_list_add(ctx->thread_list_ctx, child_msgid);
-		parent->ref_index = MAIL_INDEX_NODE_REF_EXT;
-		mail_hash_update(ctx->hash_trans, parent_idx);
-	}
 	return TRUE;
 }
 
-static bool
-thread_unref_links(struct mail_thread_update_context *ctx, uint32_t ref_uid,
-		   const char *last_child_msgid, uint32_t last_child_idx,
-		   const char *references, bool *valid_r)
+bool mail_thread_remove(struct mail_thread_cache *cache,
+			const struct mail_index_strmap_rec *msgid_map,
+			unsigned int *msgid_map_idx)
 {
-	const char *msgid;
-	uint32_t parent_idx, child_idx;
-
-	/* tmp_mail may be changed below, so we have to duplicate the
-	   references string */
-	references = t_strdup(references);
-	*valid_r = FALSE;
-
-	msgid = message_id_get_next(&references);
-	if (msgid == NULL)
-		return TRUE;
-	if (!thread_msgid_lookup(ctx, msgid, &parent_idx))
-		return FALSE;
-	*valid_r = TRUE;
-
-	while ((msgid = message_id_get_next(&references)) != NULL) {
-		if (!thread_msgid_lookup(ctx, msgid, &child_idx) ||
-		    !thread_unref_msgid(ctx, ref_uid, parent_idx,
-					msgid, child_idx))
-			return FALSE;
-		parent_idx = child_idx;
-	}
-	return thread_unref_msgid(ctx, ref_uid, parent_idx,
-				  last_child_msgid, last_child_idx);
-}
+	struct mail_thread_node *node;
+	uint32_t idx, parent_idx;
+	unsigned int count = 1;
 
-int mail_thread_remove(struct mail_thread_update_context *ctx, uint32_t uid)
-{
-	struct mail_hash_header *hdr;
-	struct mail_thread_node *node;
-	const char *msgid, *references, *in_reply_to;
-	uint32_t idx, parent_idx;
-	bool have_refs;
-
-	if (!mail_thread_node_lookup(ctx, uid, &idx, &msgid, &node))
-		return 0;
-	if (node->expunge_rebuilds)
-		return 0;
+	idx = msgid_map->str_idx;
+	i_assert(idx != 0);
 
-	if (mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES,
-				  &references) < 0)
-		return -1;
-
-	if (!thread_unref_links(ctx, uid, msgid, idx, references, &have_refs))
-		return 0;
-	if (!have_refs) {
-		/* no valid IDs in References:, use In-Reply-To: instead */
-		if (mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO,
-					  &in_reply_to) < 0)
-			return -1;
-		in_reply_to = message_id_get_next(&in_reply_to);
-		if (in_reply_to != NULL &&
-		    (!thread_msgid_lookup(ctx, in_reply_to, &parent_idx) ||
-		     !thread_unref_msgid(ctx, uid, parent_idx,
-					 in_reply_to, idx)))
-			return 0;
+	if (msgid_map->uid > cache->last_uid) {
+		/* this message was never added to the cache, skip */
+		return TRUE;
 	}
 
-	/* get the node again, the pointer may have changed */
-	node = mail_hash_lookup_idx(ctx->hash_trans, idx);
+	node = array_idx_modifiable(&cache->thread_nodes, idx);
+	if (node->expunge_rebuilds) {
+		/* this catches the duplicate message-id case */
+		return FALSE;
+	}
+	i_assert(node->uid == msgid_map->uid);
 
-	node->uid_or_id = mail_thread_list_add(ctx->thread_list_ctx, msgid);
-	node->ref_index = MAIL_INDEX_NODE_REF_EXT;
-	mail_hash_update(ctx->hash_trans, idx);
+	/* update link refcounts */
+	if (msgid_map[count].uid == node->uid) {
+		parent_idx = msgid_map[count].str_idx;
+		count++;
+		while (msgid_map[count].uid == node->uid) {
+			if (!mail_thread_unref_link(cache, parent_idx,
+						    msgid_map[count].str_idx))
+				return FALSE;
+			parent_idx = msgid_map[count].str_idx;
+			count++;
+		}
+		if (!mail_thread_unref_link(cache, parent_idx, idx))
+			return FALSE;
+	}
+	/* mark this message as expunged */
+	node->uid = 0;
 
-	hdr = mail_hash_get_header(ctx->hash_trans);
-	hdr->message_count--;
-	return 1;
+	/* we don't know (and don't want to waste time figuring out) if other
+	   messages point to this removed message, so don't delete the node */
+	*msgid_map_idx += count;
+	return TRUE;
 }
--- a/src/lib-storage/index/index-thread-private.h	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-storage/index/index-thread-private.h	Mon Sep 01 15:17:00 2008 +0300
@@ -2,50 +2,34 @@
 #define INDEX_THREAD_PRIVATE_H
 
 struct index_mailbox;
-struct mail_thread_list_context;
 
 #include "crc32.h"
-#include "mail-hash.h"
 #include "mail-thread.h"
+#include "mail-index-strmap.h"
 
 #define MAIL_THREAD_INDEX_SUFFIX ".thread"
 
+/* After initially building the index, assign first_invalid_msgid_idx to
+   the next unused index + SKIP_COUNT. When more messages are added and
+   the next valid msgid conflicts with the first invalid msgid, the invalid
+   msgids will be moved forward again this many indexes. */
+#define THREAD_INVALID_MSGID_STR_IDX_SKIP_COUNT \
+	(4096 / sizeof(struct mail_thread_node))
+
 #define HDR_MESSAGE_ID "message-id"
 #define HDR_IN_REPLY_TO "in-reply-to"
 #define HDR_REFERENCES "references"
 #define HDR_SUBJECT "subject"
 
-struct msgid_search_key {
-	const char *msgid;
-	uint32_t msgid_crc32;
-};
+#define MAIL_THREAD_NODE_REF_MSGID 0
+#define MAIL_THREAD_NODE_REF_INREPLYTO 1
+#define MAIL_THREAD_NODE_REF_REFERENCES1 2
 
 struct mail_thread_node {
-	struct mail_hash_record rec;
-
-	/* CRC32 checksum of this node's message ID (except CRC32 0 -> 1).
-	   If there are more nodes with the same checksum, use uid and
-	   ref_index fields to find the original string. */
-	uint32_t msgid_crc32;
-	/* ref_index=0: ID in external thread IDs list (beginning from 1)
-	   ref_index=1: UID of the mail whose Message-ID: header points to
-	     this node.
-	   ref_index>1: UID of some message which has references to this
-	     node. ref_index spcifies how the reference string is found. */
-	uint32_t uid_or_id;
-	/* Index for this node's parent node */
-	unsigned int parent_idx:27;
-	/* 0 = uid contains the index to the wanted Message ID in external
-	       thread IDs list
-	   1 = Message-ID: header's first valid ID
-	   2 = In-Reply-To: header's first valid ID
-	   3 = References: header's last ID with the same CRC32
-	   4 = References: header's second last ID with the same CRC32
-	     (i.e. one non-matching ID already had a CRC32 collision)
-	   ..
-	   31 = References: header's 29th last ID with the same CRC32
-	*/
-	unsigned int ref_index:5;
+	/* UID of the message, or 0 for dummy nodes */
+	uint32_t uid;
+	/* Index for this node's parent node, 0 = this is root */
+	uint32_t parent_idx;
 	/* Number of messages containing "this message" -> "parent message"
 	   link, i.e. "number of links to parent node". However since parents
 	   can change, not all of these references might be from our current
@@ -56,34 +40,24 @@
 	unsigned int expunge_rebuilds:1;
 	/* If a link between this node and its child gets unreferenced,
 	   rebuild the thread tree. */
-	unsigned int parent_unref_rebuilds:1;
+	unsigned int child_unref_rebuilds:1;
 };
-#define MAIL_INDEX_NODE_REF_EXT 0
-#define MAIL_INDEX_NODE_REF_MSGID 1
-#define MAIL_INDEX_NODE_REF_INREPLYTO 2
-#define MAIL_INDEX_NODE_REF_REFERENCES_LAST 3
-#define MAIL_INDEX_NODE_REF_MAX_VALUE 31
-
-#define MAIL_INDEX_NODE_EXISTS(node) \
-	((node)->ref_index == MAIL_INDEX_NODE_REF_MSGID)
-
-struct mail_thread_update_context {
-	struct mail *tmp_mail;
+ARRAY_DEFINE_TYPE(mail_thread_node, struct mail_thread_node);
+#define MAIL_THREAD_NODE_EXISTS(node) \
+	((node)->uid != 0)
 
-	struct mail_hash *hash;
-	struct mail_hash_transaction *hash_trans;
-	struct mail_thread_list_update_context *thread_list_ctx;
+struct mail_thread_cache {
+	uint32_t last_uid;
+	/* indexes used for invalid Message-IDs. that means no other messages
+	   point to them and they can safely be moved around whenever
+	   necessary. */
+	uint32_t first_invalid_msgid_str_idx;
+	uint32_t next_invalid_msgid_str_idx;
 
-	/* Hash record idx -> Message-ID */
-	ARRAY_DEFINE(msgid_cache, const char *);
-	pool_t msgid_pool;
+	struct mail_search_result *search_result;
 
-	unsigned int cmp_match_count;
-	uint32_t cmp_last_idx;
-
-	unsigned int failed:1;
-	unsigned int rebuild:1;
-	unsigned int syncing:1;
+	/* indexed by mail_index_strmap_rec.str_idx */
+	ARRAY_TYPE(mail_thread_node) thread_nodes;
 };
 
 static inline uint32_t crc32_str_nonzero(const char *str)
@@ -92,32 +66,19 @@
 	return value == 0 ? 1 : value;
 }
 
-int mail_thread_add(struct mail_thread_update_context *ctx, struct mail *mail);
-int mail_thread_remove(struct mail_thread_update_context *ctx, uint32_t seq);
+void mail_thread_add(struct mail_thread_cache *cache,
+		     const struct mail_index_strmap_rec *msgid_map,
+		     unsigned int *msgid_map_idx);
+bool mail_thread_remove(struct mail_thread_cache *cache,
+			const struct mail_index_strmap_rec *msgid_map,
+			unsigned int *msgid_map_idx);
 
 struct mail_thread_iterate_context *
-mail_thread_iterate_init_full(struct mail *tmp_mail,
-			      struct mail_hash_transaction *hash_trans,
-			      struct mail_thread_list_context *hash_list_ctx,
+mail_thread_iterate_init_full(struct mail_thread_cache *cache,
+			      struct mail *tmp_mail,
 			      enum mail_thread_type thread_type,
 			      bool return_seqs);
 
 void index_thread_mailbox_index_opened(struct index_mailbox *ibox);
 
-struct mail_thread_list_context *
-mail_thread_list_init(struct mailbox *box);
-void mail_thread_list_deinit(struct mail_thread_list_context **ctx);
-
-struct mail_thread_list_update_context *
-mail_thread_list_update_begin(struct mail_thread_list_context *ctx,
-			      struct mail_hash_transaction *hash_trans);
-int mail_thread_list_lookup(struct mail_thread_list_update_context *ctx,
-			    uint32_t id, const char **msgid_r);
-uint32_t mail_thread_list_add(struct mail_thread_list_update_context *ctx,
-			      const char *msgid);
-void mail_thread_list_remove(struct mail_thread_list_update_context *ctx,
-			     uint32_t id);
-int mail_thread_list_commit(struct mail_thread_list_update_context **ctx);
-void mail_thread_list_rollback(struct mail_thread_list_update_context **ctx);
-
 #endif
--- a/src/lib-storage/index/index-thread.c	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-storage/index/index-thread.c	Mon Sep 01 15:17:00 2008 +0300
@@ -4,41 +4,48 @@
 
 #include "lib.h"
 #include "array.h"
+#include "bsearch-insert-pos.h"
+#include "hash2.h"
 #include "message-id.h"
 #include "mail-index-private.h"
 #include "mail-index-sync-private.h"
 #include "mail-search.h"
 #include "mail-search-build.h"
+#include "mailbox-search-result-private.h"
 #include "index-storage.h"
 #include "index-thread-private.h"
 
+#include <stdlib.h>
+
 #define MAIL_THREAD_CONTEXT(obj) \
 	MODULE_CONTEXT(obj, mail_thread_storage_module)
 
-/* how much memory to allocate initially. these are very rough
-   approximations. */
-#define APPROX_MSG_EXTRA_COUNT 10
-#define APPROX_MSGID_SIZE 45
-
 struct mail_thread_context {
-	struct mail_thread_update_context thread_ctx;
-
 	struct mailbox *box;
 	struct mailbox_transaction_context *t;
+	struct mail_index_strmap_view_sync *strmap_sync;
 
-	struct mail_search_context *search;
+	struct mail *tmp_mail;
 	struct mail_search_args *search_args;
-	struct mail_search_arg tmp_search_arg;
+	ARRAY_TYPE(seq_range) added_uids;
+
+	unsigned int failed:1;
 };
 
 struct mail_thread_mailbox {
 	union mailbox_module_context module_ctx;
-	struct mail_hash *hash;
+
+	unsigned int next_msgid_idx;
+	struct mail_thread_cache *cache;
 
-	struct mail_thread_list_context *list_ctx;
+	struct mail_index_strmap *strmap;
+	struct mail_index_strmap_view *strmap_view;
+	/* sorted by UID, ref_index */
+	const ARRAY_TYPE(mail_index_strmap_rec) *msgid_map;
+	const struct hash2_table *msgid_hash;
 
 	/* set only temporarily while needed */
-	struct mail_thread_update_context *ctx;
+	struct mail_thread_context *ctx;
 };
 
 static MODULE_CONTEXT_DEFINE_INIT(mail_thread_storage_module,
@@ -57,402 +64,481 @@
 	return TRUE;
 }
 
-static unsigned int mail_thread_hash_key(const void *key)
-{
-	const struct msgid_search_key *key_rec = key;
-
-	return key_rec->msgid_crc32;
-}
-
-static const char *
-mail_thread_nth_last_refid_crc32(const char *msgids, unsigned int ref_index,
-				 uint32_t msgid_crc32)
+static int
+mail_strmap_rec_get_msgid(struct mail_thread_context *ctx,
+			  const struct mail_index_strmap_rec *rec,
+			  const char **msgid_r)
 {
-	const unsigned int idx_from_end =
-		ref_index - MAIL_INDEX_NODE_REF_REFERENCES_LAST + 1;
-	const char **last_msgids, *msgid;
-	unsigned int idx = 0;
-
-	last_msgids = t_new(const char *, idx_from_end);
-	while ((msgid = message_id_get_next(&msgids)) != NULL) {
-		if (crc32_str_nonzero(msgid) == msgid_crc32) {
-			last_msgids[idx % idx_from_end] = msgid;
-			idx++;
-		}
-	}
-	return last_msgids[idx % idx_from_end];
-}
-
-static const char *
-mail_thread_node_get_msgid(struct mail_hash_transaction *trans,
-			   struct mail_thread_update_context *ctx,
-			   const struct mail_thread_node *node, uint32_t idx)
-{
-	const char *msgids, *msgid = NULL, **p;
+	const char *orig_msgids;
+	const char *msgids = NULL, *msgid;
+	unsigned int n = 0;
 	int ret;
 
-	if (node->ref_index == MAIL_INDEX_NODE_REF_EXT) {
-		if (mail_thread_list_lookup(ctx->thread_list_ctx,
-					    node->uid_or_id, &msgid) < 0)
-			return NULL;
-		if (msgid == NULL) {
-			mail_hash_transaction_set_corrupted(trans,
-				"Referenced list msgid lost");
-		}
-		return msgid;
-	}
-
-	p = array_idx_modifiable(&ctx->msgid_cache, idx);
-	if (*p != NULL)
-		return *p;
+	ret = mail_set_uid(ctx->tmp_mail, rec->uid);
+	if (ret <= 0)
+		return ret;
 
-	ret = mail_set_uid(ctx->tmp_mail, node->uid_or_id);
-	if (ret <= 0) {
-		if (ret == 0) {
-			mail_hash_transaction_set_corrupted(trans,
-				t_strdup_printf("Referenced UID %u lost",
-						node->uid_or_id));
-		}
-		return NULL;
-	}
-	switch (node->ref_index) {
-	case MAIL_INDEX_NODE_REF_MSGID:
+	switch (rec->ref_index) {
+	case MAIL_THREAD_NODE_REF_MSGID:
 		/* Message-ID: header */
-		if (mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID,
-					  &msgids) < 0)
-			return NULL;
-		msgid = message_id_get_next(&msgids);
+		ret = mail_get_first_header(ctx->tmp_mail, HDR_MESSAGE_ID,
+					    &msgids);
 		break;
-	case MAIL_INDEX_NODE_REF_INREPLYTO:
+	case MAIL_THREAD_NODE_REF_INREPLYTO:
 		/* In-Reply-To: header */
-		if (mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO,
-					  &msgids) < 0)
-			return NULL;
-		msgid = message_id_get_next(&msgids);
+		ret = mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO,
+					    &msgids);
 		break;
 	default:
 		/* References: header */
-		if (mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES,
-					  &msgids) < 0)
-			return NULL;
-		msgid = mail_thread_nth_last_refid_crc32(msgids,
-							 node->ref_index,
-							 node->msgid_crc32);
+		ret = mail_get_first_header(ctx->tmp_mail, HDR_REFERENCES,
+					    &msgids);
+		n = rec->ref_index - MAIL_THREAD_NODE_REF_REFERENCES1;
 		break;
 	}
 
+	if (ret < 0) {
+		if (ctx->tmp_mail->expunged) {
+			/* treat it as if it didn't exist. trying to add it
+			   again will result in failure. */
+			return 0;
+		}
+		return -1;
+	}
+
+	/* get the nth message-id */
+	orig_msgids = msgids;
+	msgid = message_id_get_next(&msgids);
+	if (msgid != NULL) {
+		for (; n > 0 && *msgids != '\0'; n--)
+			msgid = message_id_get_next(&msgids);
+	}
+
 	if (msgid == NULL) {
 		/* shouldn't have happened */
-		mail_hash_transaction_set_corrupted(trans, "Message ID lost");
-		return NULL;
+		mail_storage_set_critical(ctx->box->storage,
+					  "Threading lost Message ID");
+		return -1;
 	}
-
-	*p = p_strdup(ctx->msgid_pool, msgid);
-	return *p;
+	*msgid_r = msgid;
+	return 1;
 }
 
-static bool mail_thread_hash_cmp(struct mail_hash_transaction *trans,
-				 const void *key, uint32_t idx, void *context)
+static bool
+mail_thread_hash_key_cmp(const char *key,
+			 const struct mail_index_strmap_rec *rec,
+			 void *context)
 {
-	const struct msgid_search_key *key_rec = key;
 	struct mail_thread_mailbox *tbox = context;
-	struct mail_thread_update_context *ctx = tbox->ctx;
-	const struct mail_thread_node *node;
+	struct mail_thread_context *ctx = tbox->ctx;
 	const char *msgid;
-	bool ret;
-
-	node = mail_hash_lookup_idx(trans, idx);
-	if (key_rec->msgid_crc32 != node->msgid_crc32)
-		return FALSE;
-
-	ctx->cmp_match_count++;
-	ctx->cmp_last_idx = idx;
+	bool cmp_ret;
+	int ret;
 
 	/* either a match or a collision, need to look closer */
 	T_BEGIN {
-		msgid = mail_thread_node_get_msgid(trans, ctx, node, idx);
-		if (msgid != NULL)
-			ret = strcmp(msgid, key_rec->msgid) == 0;
-		else {
-			/* we couldn't figure out the Message-ID for whatever
-			   reason. we'll need to fallback to rebuilding the
-			   whole thread. */
-			ctx->rebuild = TRUE;
-			ret = FALSE;
+		ret = mail_strmap_rec_get_msgid(ctx, rec, &msgid);
+		if (ret <= 0) {
+			if (ret < 0)
+				ctx->failed = TRUE;
+			cmp_ret = FALSE;
+		} else {
+			cmp_ret = strcmp(msgid, key) == 0;
 		}
 	} T_END;
+	return cmp_ret;
+}
+
+static int
+mail_thread_hash_rec_cmp(const struct mail_index_strmap_rec *rec1,
+			 const struct mail_index_strmap_rec *rec2,
+			 void *context)
+{
+	struct mail_thread_mailbox *tbox = context;
+	struct mail_thread_context *ctx = tbox->ctx;
+	const char *msgid1, *msgid2;
+	int ret;
+
+	T_BEGIN {
+		ret = mail_strmap_rec_get_msgid(ctx, rec1, &msgid1);
+		if (ret > 0) {
+			msgid1 = t_strdup(msgid1);
+			ret = mail_strmap_rec_get_msgid(ctx, rec2, &msgid2);
+		}
+		ret = ret <= 0 ? -1 :
+			strcmp(msgid1, msgid2) == 0;
+	} T_END;
 	return ret;
 }
 
-static unsigned int mail_thread_hash_rec(const void *p)
+static void mail_thread_strmap_remap(const uint32_t *idx_map,
+				     unsigned int old_count,
+				     unsigned int new_count, void *context)
 {
-	const struct mail_thread_node *rec = p;
+	struct mail_thread_mailbox *tbox = context;
+	struct mail_thread_cache *cache = tbox->cache;
+	ARRAY_TYPE(mail_thread_node) new_nodes;
+	const struct mail_thread_node *old_nodes;
+	struct mail_thread_node *node;
+	unsigned int i, nodes_count, max, new_first_invalid, invalid_count;
+
+	if (cache->search_result == NULL)
+		return;
+
+	if (new_count == 0) {
+		/* strmap was reset, we'll need to rebuild thread */
+		mailbox_search_result_free(&cache->search_result);
+		return;
+	}
+
+	invalid_count = cache->next_invalid_msgid_str_idx -
+		cache->first_invalid_msgid_str_idx;
+
+	old_nodes = array_get(&cache->thread_nodes, &nodes_count);
+	i_array_init(&new_nodes, new_count + invalid_count + 32);
 
-	return rec->msgid_crc32;
+	/* renumber existing valid nodes. all existing records in old_nodes
+	   should also exist in idx_map since we've removed expunged messages
+	   from the cache before committing the sync. */
+	max = I_MIN(I_MIN(old_count, nodes_count),
+		    cache->first_invalid_msgid_str_idx);
+	for (i = 0; i < max; i++) {
+		if (idx_map[i] == 0) {
+			/* expunged record. */
+			i_assert(old_nodes[i].uid == 0);
+		} else {
+			node = array_idx_modifiable(&new_nodes, idx_map[i]);
+			*node = old_nodes[i];
+			if (node->parent_idx != 0) {
+				node->parent_idx = idx_map[node->parent_idx];
+				i_assert(node->parent_idx != 0);
+			}
+		}
+	}
+
+	/* copy invalid nodes, if any. no other messages point to them,
+	   so this is safe. */
+	new_first_invalid = new_count + 1 +
+		THREAD_INVALID_MSGID_STR_IDX_SKIP_COUNT;
+	array_copy(&new_nodes.arr, new_first_invalid, &cache->thread_nodes.arr,
+		   cache->first_invalid_msgid_str_idx, invalid_count);
+	cache->first_invalid_msgid_str_idx = new_first_invalid;
+	cache->next_invalid_msgid_str_idx = new_first_invalid + invalid_count;
+
+	/* replace the old nodes with the renumbered ones */
+	array_free(&cache->thread_nodes);
+	cache->thread_nodes = new_nodes;
+}
+
+static int thread_get_mail_header(struct mail *mail, const char *name,
+				  const char **value_r)
+{
+	if (mail_get_first_header(mail, name, value_r) < 0) {
+		if (!mail->expunged)
+			return -1;
+
+		/* Message is expunged. Instead of failing the entire THREAD
+		   command, just treat the header as non-existing. */
+		*value_r = NULL;
+	}
+	return 0;
 }
 
 static int
-mail_thread_hash_remap(struct mail_hash_transaction *trans,
-		       const uint32_t *map, unsigned int map_size,
-		       void *context ATTR_UNUSED)
+mail_thread_map_add_mail(struct mail_thread_context *ctx, struct mail *mail)
 {
-	const struct mail_hash_header *hdr;
-	struct mail_thread_node *node;
-	uint32_t idx;
+	const char *message_id, *in_reply_to, *references, *msgid;
+	uint32_t ref_index;
+
+	if (thread_get_mail_header(mail, HDR_MESSAGE_ID, &message_id) < 0 ||
+	    thread_get_mail_header(mail, HDR_REFERENCES, &references) < 0)
+		return -1;
+
+	/* add Message-ID: */
+	msgid = message_id_get_next(&message_id);
+	if (msgid != NULL) {
+		mail_index_strmap_view_sync_add(ctx->strmap_sync, mail->uid,
+						MAIL_THREAD_NODE_REF_MSGID,
+						msgid);
+	} else {
+		mail_index_strmap_view_sync_add_unique(ctx->strmap_sync,
+					mail->uid, MAIL_THREAD_NODE_REF_MSGID);
+	}
 
-	hdr = mail_hash_get_header(trans);
-	for (idx = 1; idx < hdr->record_count; idx++) {
-		node = mail_hash_lookup_idx(trans, idx);
-		i_assert(!MAIL_HASH_RECORD_IS_DELETED(&node->rec));
+	/* add References: if there are any valid ones */
+	msgid = message_id_get_next(&references);
+	if (msgid != NULL) {
+		ref_index = MAIL_THREAD_NODE_REF_REFERENCES1;
+		do {
+			mail_index_strmap_view_sync_add(ctx->strmap_sync,
+							mail->uid,
+							ref_index, msgid);
+			ref_index++;
+			msgid = message_id_get_next(&references);
+		} while (msgid != NULL);
+	} else {
+		/* no References:, use In-Reply-To: */
+		if (thread_get_mail_header(mail, HDR_IN_REPLY_TO,
+					   &in_reply_to) < 0)
+			return -1;
 
-		if (node->parent_idx >= map_size) {
-			mail_hash_transaction_set_corrupted(trans,
-				"invalid parent_idx");
-			return -1;
+		msgid = message_id_get_next(&in_reply_to);
+		if (msgid != NULL) {
+			mail_index_strmap_view_sync_add(ctx->strmap_sync,
+				mail->uid, MAIL_THREAD_NODE_REF_INREPLYTO,
+				msgid);
 		}
-
-		node->parent_idx = map[node->parent_idx];
+	}
+	if (ctx->failed) {
+		/* message-id lookup failed in hash compare */
+		return -1;
 	}
 	return 0;
 }
 
-static bool
-mail_thread_try_use_hash(struct mail_thread_context *ctx,
-			 struct mail_hash *hash,
-			 const struct mailbox_status *status, bool reset)
+static int mail_thread_index_map_build(struct mail_thread_context *ctx)
 {
-	struct mail_search_args *search_args = ctx->search_args;
-	struct mail_search_arg *limit_arg = NULL;
-	const struct mail_hash_header *hdr;
-	struct mail_hash_transaction *hash_trans;
-	uint32_t last_seq, last_uid, seq1, seq2;
-	bool can_use = TRUE, shared_lock = FALSE;
-	int try, ret;
-
-	last_seq = status->messages;
-	last_uid = status->uidnext - 1;
+	static const char *wanted_headers[] = {
+		HDR_MESSAGE_ID, HDR_IN_REPLY_TO, HDR_REFERENCES,
+		NULL
+	};
+	struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(ctx->box);
+	struct mailbox_header_lookup_ctx *headers_ctx;
+	struct mail_search_args *search_args;
+	struct mail_search_context *search_ctx;
+	struct mail *mail;
+	uint32_t last_uid, seq1, seq2;
+	int ret = 0;
 
-	/* Each search condition requires their own separate thread index.
-	   Pretty much all the clients use only "search all" threading, so
-	   we don't need to worry about anything else. */
-	if (search_args->args->next != NULL) {
-		/* too difficult to figure out if we could optimize this.
-		   we most likely couldn't. */
-		return FALSE;
-	} else if (search_args->args->type == SEARCH_ALL) {
-		/* optimize */
-	} else if (search_args->args->type == SEARCH_SEQSET) {
-		const struct seq_range *range;
-		unsigned int count;
+	if (tbox->strmap_view == NULL) {
+		/* first time we're threading this mailbox */
+		struct index_mailbox *ibox = (struct index_mailbox *)ctx->box;
 
-		range = array_get(&search_args->args->value.seqset, &count);
-		if (count == 1 && range[0].seq1 == 1) {
-			/* If we're searching 1..n, we might be able to
-			   optimize this. This is at least useful for testing
-			   incremental index updates if nothing else. :) */
-			last_seq = range[0].seq2;
-			last_uid = 0;
-		} else {
-			return FALSE;
-		}
-	} else {
-		return FALSE;
+		tbox->strmap_view =
+			mail_index_strmap_view_open(tbox->strmap, ibox->view,
+						    mail_thread_hash_key_cmp,
+						    mail_thread_hash_rec_cmp,
+						    mail_thread_strmap_remap,
+						    tbox, &tbox->msgid_map,
+						    &tbox->msgid_hash);
 	}
 
-	for (try = 0;; try++) {
-		if ((ret = mail_hash_lock_shared(hash)) < 0)
-			return FALSE;
-		if (ret > 0)
-			break;
-		if (try == 5) {
-			/* enough tries */
-			return FALSE;
-		}
+	ctx->t = mailbox_transaction_begin(ctx->box, 0);
+	headers_ctx = mailbox_header_lookup_init(ctx->box, wanted_headers);
+	ctx->tmp_mail = mail_alloc(ctx->t, 0, headers_ctx);
 
-		/* doesn't exist, create a new hash */
-		if ((ret = mail_hash_create_excl_locked(hash)) < 0)
-			return FALSE;
-		if (ret > 0) {
-			ctx->thread_ctx.hash_trans =
-				mail_hash_transaction_begin(hash,
-							    status->messages);
-			return TRUE;
-		}
+	/* add all missing UIDs */
+	ctx->strmap_sync = mail_index_strmap_view_sync_init(tbox->strmap_view,
+							    &last_uid);
+	mailbox_get_seq_range(ctx->box, last_uid + 1, (uint32_t)-1,
+			      &seq1, &seq2);
+	if (seq1 == 0) {
+		/* nothing is missing */
+		mailbox_header_lookup_unref(&headers_ctx);
+		return 0;
 	}
 
-again:
-	hash_trans = mail_hash_transaction_begin(hash, status->messages);
-	hdr = mail_hash_get_header(hash_trans);
-	if (reset)
-		mail_hash_reset(hash_trans);
-	else if (hdr->last_uid > last_uid) {
-		/* thread index is newer than our current mailbox view,
-		   can't optimize */
-		can_use = FALSE;
-	} else if (hdr->message_count > last_seq) {
-		/* messages have been expunged, but not removed from
-		   the thread index. we don't know their Message-IDs
-		   anymore, so we have to rebuild the index. */
-		mail_hash_reset(hash_trans);
-	} else if (hdr->message_count > 0) {
-		/* non-empty hash. add only the new messages in there. */
-		mailbox_get_seq_range(ctx->box, 1, hdr->last_uid, &seq1, &seq2);
+	search_args = mail_search_build_init();
+	mail_search_build_add_seqset(search_args, seq1, seq2);
+	search_ctx = mailbox_search_init(ctx->t, search_args, NULL);
+
+	mail = mail_alloc(ctx->t, 0, headers_ctx);
+	mailbox_header_lookup_unref(&headers_ctx);
+
+	while (mailbox_search_next(search_ctx, mail) > 0) {
+		if (mail_thread_map_add_mail(ctx, mail) < 0) {
+			ret = -1;
+			break;
+		}
+	}
+	mail_free(&mail);
+	if (mailbox_search_deinit(&search_ctx) < 0)
+		ret = -1;
 
-		if (seq2 != hdr->message_count ||
-		    hdr->uid_validity != status->uidvalidity) {
-			/* some messages have been expunged. have to rebuild. */
-			mail_hash_reset(hash_trans);
-		} else {
-			/* after all these checks, this is the only case we
-			   can actually optimize. */
-			struct mail_search_arg *arg = &ctx->tmp_search_arg;
+	if (ret < 0)
+		mail_index_strmap_view_sync_rollback(&ctx->strmap_sync);
+	else
+		mail_index_strmap_view_sync_commit(&ctx->strmap_sync);
+	return ret;
+}
+
+static int msgid_map_cmp(const void *key, const void *value)
+{
+	const uint32_t *uid = key;
+	const struct mail_index_strmap_rec *rec = value;
+
+	return *uid < rec->uid ? -1 :
+		(*uid > rec->uid ? 1 : 0);
+}
+
+static bool mail_thread_cache_update_removes(struct mail_thread_mailbox *tbox,
+					     ARRAY_TYPE(seq_range) *added_uids)
+{
+	struct mail_thread_cache *cache = tbox->cache;
+	ARRAY_TYPE(seq_range) removed_uids;
+	const struct seq_range *uids;
+	const struct mail_index_strmap_rec *msgid_map;
+	unsigned int i, j, idx, map_count, uid_count;
+	uint32_t uid;
+
+	t_array_init(&removed_uids, 64);
+	mailbox_search_result_sync(cache->search_result,
+				   &removed_uids, added_uids);
 
-			arg->type = SEARCH_SEQSET;
-			p_array_init(&arg->value.seqset, search_args->pool, 1);
-			if (seq2 == last_seq) {
-				/* no need to update the index,
-				   search nothing */
-				shared_lock = TRUE;
-			} else {
-				/* search next+1..n */
-				seq_range_array_add_range(&arg->value.seqset,
-							  seq2 + 1, last_seq);
-			}
-			limit_arg = &ctx->tmp_search_arg;
+	/* first check that we're not inserting any messages in the middle */
+	uids = array_get(added_uids, &uid_count);
+	if (uid_count > 0 && uids[0].seq1 <= cache->last_uid)
+		return FALSE;
+
+	/* next remove messages so we'll see early if we have to rebuild.
+	   we expect to find all removed UIDs from msgid_map that are <= max
+	   UID in msgid_map */
+	msgid_map = array_get(tbox->msgid_map, &map_count);
+	uids = array_get(&removed_uids, &uid_count);
+	for (i = j = 0; i < uid_count; i++) {
+		/* find and remove from the map */
+		bsearch_insert_pos(&uids[i].seq1, &msgid_map[j], map_count - j,
+				   sizeof(*msgid_map), msgid_map_cmp, &idx);
+		j += idx;
+		if (j == map_count) {
+			/* all removals after this are about messages we never
+			   even added to the cache */
+			i_assert(uids[i].seq1 > cache->last_uid);
+			break;
 		}
-	} else {
-		/* empty hash - make sure anyway that it gets reset */
-		mail_hash_reset(hash_trans);
-	}
+		while (j > 0 && msgid_map[j-1].uid == msgid_map[j].uid)
+			j--;
 
-	if (can_use && !shared_lock) {
-		mail_hash_transaction_end(&hash_trans);
-		mail_hash_unlock(hash);
-		if (mail_hash_lock_exclusive(hash,
-				MAIL_HASH_LOCK_FLAG_CREATE_MISSING) <= 0)
-			return FALSE;
-		shared_lock = TRUE;
-		limit_arg = NULL;
-		goto again;
+		/* remove the messages from cache */
+		for (uid = uids[i].seq1; uid <= uids[i].seq2; uid++) {
+			i_assert(msgid_map[j].uid == uid);
+			if (!mail_thread_remove(cache, msgid_map + j, &j))
+				return FALSE;
+		}
 	}
-	if (!can_use) {
-		mail_hash_transaction_end(&hash_trans);
-		mail_hash_unlock(hash);
-		return FALSE;
-	} else {
-		ctx->thread_ctx.hash_trans = hash_trans;
-		if (limit_arg != NULL) {
-			limit_arg->next = search_args->args;
-			search_args->args = limit_arg;
+	return TRUE;
+}
+
+static void mail_thread_cache_update_adds(struct mail_thread_mailbox *tbox,
+					  ARRAY_TYPE(seq_range) *added_uids)
+{
+	struct mail_thread_cache *cache = tbox->cache;
+	const struct seq_range *uids;
+	const struct mail_index_strmap_rec *msgid_map;
+	unsigned int i, j, map_count, uid_count;
+	uint32_t uid;
+
+	/* everything removed successfully, add the new messages. all of them
+	   should already be in msgid_map. */
+	msgid_map = array_get(tbox->msgid_map, &map_count);
+	uids = array_get(added_uids, &uid_count);
+	i_assert(uid_count == 0 || msgid_map[j].uid <= uids[0].seq1);
+	for (i = 0; i < uid_count; i++) {
+		for (uid = uids[i].seq1; uid <= uids[i].seq2; uid++) {
+			while (j < map_count && msgid_map[j].uid < uid)
+				j++;
+			i_assert(j < map_count && msgid_map[j].uid == uid);
+			mail_thread_add(cache, msgid_map+j, &j);
 		}
-		return TRUE;
 	}
 }
 
 static void
-mail_thread_update_init(struct mail_thread_context *ctx, bool reset)
+mail_thread_cache_fix_invalid_indexes(struct mail_thread_mailbox *tbox)
 {
-	struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(ctx->box);
-	struct mail_hash *hash = NULL;
-	struct mailbox_status status;
-	const struct mail_hash_header *hdr;
-	unsigned int count;
+	struct mail_thread_cache *cache = tbox->cache;
+	uint32_t highest_idx, new_first_idx, count;
 
-	mailbox_get_status(ctx->box, STATUS_MESSAGES | STATUS_UIDNEXT, &status);
-	if (mail_thread_try_use_hash(ctx, tbox->hash, &status, reset))
-		hash = tbox->hash;
-	else {
-		/* fallback to using in-memory hash */
-		struct index_mailbox *ibox = (struct index_mailbox *)ctx->box;
+	highest_idx = mail_index_strmap_view_get_highest_idx(tbox->strmap_view);
+	new_first_idx = highest_idx + 1 +
+		THREAD_INVALID_MSGID_STR_IDX_SKIP_COUNT;
+	count = cache->next_invalid_msgid_str_idx -
+		cache->first_invalid_msgid_str_idx;
 
-		hash = mail_hash_alloc(ibox->index, NULL,
-				       sizeof(struct mail_thread_node),
-				       mail_thread_hash_key,
-				       mail_thread_hash_rec,
-				       mail_thread_hash_cmp,
-				       mail_thread_hash_remap,
-				       tbox);
-		if (mail_hash_lock_exclusive(hash,
-				MAIL_HASH_LOCK_FLAG_CREATE_MISSING) <= 0)
-			i_unreached();
-		ctx->thread_ctx.hash_trans =
-			mail_hash_transaction_begin(hash, 0);
+	if (count == 0) {
+		/* there are no invalid indexes yet, we can update the first
+		   invalid index position to delay conflicts. */
+		cache->first_invalid_msgid_str_idx =
+			cache->next_invalid_msgid_str_idx = new_first_idx;
+	} else if (highest_idx >= cache->first_invalid_msgid_str_idx) {
+		/* conflict - move the invalid indexes forward */
+		array_copy(&cache->thread_nodes.arr, new_first_idx,
+			   &cache->thread_nodes.arr,
+			   cache->first_invalid_msgid_str_idx, count);
+		cache->first_invalid_msgid_str_idx = new_first_idx;
+		cache->next_invalid_msgid_str_idx = new_first_idx + count;
 	}
-	ctx->thread_ctx.hash = hash;
+}
 
-	/* initialize searching */
-	ctx->t = mailbox_transaction_begin(ctx->box, 0);
-	ctx->search = mailbox_search_init(ctx->t, ctx->search_args, NULL);
-	ctx->thread_ctx.tmp_mail = mail_alloc(ctx->t, 0, NULL);
-	ctx->thread_ctx.thread_list_ctx =
-		mail_thread_list_update_begin(tbox->list_ctx,
-					      ctx->thread_ctx.hash_trans);
+static void mail_thread_cache_sync_remove(struct mail_thread_mailbox *tbox,
+					  struct mail_thread_context *ctx)
+{
+	struct mail_thread_cache *cache = tbox->cache;
+
+	if (cache->search_result == NULL)
+		return;
 
-	hdr = mail_hash_get_header(ctx->thread_ctx.hash_trans);
-	count = status.messages < hdr->record_count ? 0 :
-		status.messages - hdr->record_count;
-	count += APPROX_MSG_EXTRA_COUNT;
-	ctx->thread_ctx.msgid_pool =
-		pool_alloconly_create(MEMPOOL_GROWING"msgids",
-				      count * APPROX_MSGID_SIZE);
-	i_array_init(&ctx->thread_ctx.msgid_cache,
-		     I_MAX(hdr->record_count, status.messages));
+	if (mail_search_args_equal(ctx->search_args,
+				   cache->search_result->search_args)) {
+		t_array_init(&ctx->added_uids, 64);
+		if (mail_thread_cache_update_removes(tbox, &ctx->added_uids)) {
+			/* successfully updated the cache */
+			return;
+		}
+	}
+	/* failed to use the cache, rebuild */
+	mailbox_search_result_free(&cache->search_result);
 }
 
-static int
-mail_thread_update(struct mail_thread_context *ctx, bool reset)
+static int mail_thread_cache_sync_add(struct mail_thread_mailbox *tbox,
+				      struct mail_thread_context *ctx)
 {
-	static const char *wanted_headers[] = {
-		HDR_MESSAGE_ID, HDR_IN_REPLY_TO, HDR_REFERENCES, HDR_SUBJECT,
-		NULL
-	};
-	struct mailbox *box;
-	struct mailbox_header_lookup_ctx *headers_ctx;
-	struct mail_hash_header *hdr;
+	struct mail_thread_cache *cache = tbox->cache;
+	struct mail_search_context *search_ctx;
 	struct mail *mail;
-	bool changed = FALSE;
-	uint32_t prev_uid;
-	int ret = 0;
+	const struct mail_index_strmap_rec *msgid_map;
+	unsigned int i, count;
+
+	mail_thread_cache_fix_invalid_indexes(tbox);
+
+	if (cache->search_result != NULL) {
+		/* we already checked at sync_remove that we can use this
+		   search result. */
+		mail_thread_cache_update_adds(tbox, &ctx->added_uids);
+		return 0;
+	}
 
-	mail_thread_update_init(ctx, reset);
-	box = mailbox_transaction_get_mailbox(ctx->t);
+	cache->last_uid = 0;
+	cache->first_invalid_msgid_str_idx = cache->next_invalid_msgid_str_idx =
+		mail_index_strmap_view_get_highest_idx(tbox->strmap_view) + 1 +
+		THREAD_INVALID_MSGID_STR_IDX_SKIP_COUNT;
+	array_clear(&cache->thread_nodes);
 
-	hdr = mail_hash_get_header(ctx->thread_ctx.hash_trans);
-	prev_uid = hdr->last_uid;
+	search_ctx = mailbox_search_init(ctx->t, ctx->search_args, NULL);
+	mail = mail_alloc(ctx->t, 0, NULL);
 
-	headers_ctx = mailbox_header_lookup_init(box, wanted_headers);
-	mail = mail_alloc(ctx->t, MAIL_FETCH_DATE, headers_ctx);
-	while (ret == 0 &&
-	       mailbox_search_next(ctx->search, mail) > 0) {
-		i_assert(mail->uid > prev_uid);
-		prev_uid = mail->uid;
-		changed = TRUE;
+	cache->search_result =
+		mailbox_search_result_save(search_ctx,
+			MAILBOX_SEARCH_RESULT_FLAG_UPDATE |
+			MAILBOX_SEARCH_RESULT_FLAG_QUEUE_SYNC);
 
-		T_BEGIN {
-			ret = mail_thread_add(&ctx->thread_ctx, mail);
-		} T_END;
+	msgid_map = array_get(tbox->msgid_map, &count);
+	i = 0;
+	while (i < count && mailbox_search_next(search_ctx, mail) > 0) {
+		while (msgid_map[i].uid < mail->uid)
+			i++;
+		i_assert(i < count);
+		mail_thread_add(cache, msgid_map+i, &i);
 	}
 	mail_free(&mail);
-	mailbox_header_lookup_unref(&headers_ctx);
-
-	if (ret < 0 || ctx->thread_ctx.failed || ctx->thread_ctx.rebuild)
-		return -1;
-
-	if (changed) {
-		/* even if write failed, we can still finish the thread
-		   building */
-		(void)mail_hash_transaction_write(ctx->thread_ctx.hash_trans);
-	}
-	return 0;
+	return mailbox_search_deinit(&search_ctx);
 }
 
-int mail_thread_init(struct mailbox *box, bool reset,
-		     struct mail_search_args *args,
+int mail_thread_init(struct mailbox *box, struct mail_search_args *args,
 		     struct mail_thread_context **ctx_r)
 {
 	struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(box);
 	struct mail_thread_context *ctx;
-	int ret;
 
 	i_assert(tbox->ctx == NULL);
 
@@ -464,27 +550,17 @@
 	}
 
 	ctx = i_new(struct mail_thread_context, 1);
-	tbox->ctx = &ctx->thread_ctx;
+	tbox->ctx = ctx;
 	ctx->box = box;
 	ctx->search_args = args;
 
-	while ((ret = mail_thread_update(ctx, reset)) < 0) {
-		if (ctx->thread_ctx.hash != tbox->hash) {
-			/* failed with in-memory hash */
-			mail_storage_set_critical(box->storage,
-				"Threading mailbox %s failed unexpectedly",
-				box->name);
-			mail_thread_deinit(&ctx);
-			return -1;
-		}
-
-		/* try again with in-memory hash */
-		mail_thread_clear(ctx);
-		reset = TRUE;
-		memset(ctx, 0, sizeof(*ctx));
-		ctx->box = box;
-		ctx->search_args = args;
+	mail_thread_cache_sync_remove(tbox, ctx);
+	if (mail_thread_index_map_build(ctx) < 0 ||
+	    mail_thread_cache_sync_add(tbox, ctx) < 0) {
+		mail_thread_deinit(&ctx);
+		return -1;
 	}
+	memset(&ctx->added_uids, 0, sizeof(ctx->added_uids));
 
 	*ctx_r = ctx;
 	return 0;
@@ -492,26 +568,8 @@
 
 static void mail_thread_clear(struct mail_thread_context *ctx)
 {
-	struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(ctx->box);
-
-	if (ctx->thread_ctx.thread_list_ctx != NULL)
-		mail_thread_list_rollback(&ctx->thread_ctx.thread_list_ctx);
-
-	mail_hash_transaction_end(&ctx->thread_ctx.hash_trans);
-
-	(void)mailbox_search_deinit(&ctx->search);
-	mail_free(&ctx->thread_ctx.tmp_mail);
+	mail_free(&ctx->tmp_mail);
 	(void)mailbox_transaction_commit(&ctx->t);
-
-	mail_hash_unlock(ctx->thread_ctx.hash);
-	if (ctx->thread_ctx.hash != tbox->hash)
-		mail_hash_free(&ctx->thread_ctx.hash);
-
-	if (ctx->search_args->args == &ctx->tmp_search_arg)
-		ctx->search_args->args = ctx->tmp_search_arg.next;
-
-	array_free(&ctx->thread_ctx.msgid_cache);
-	pool_unref(&ctx->thread_ctx.msgid_pool);
 }
 
 void mail_thread_deinit(struct mail_thread_context **_ctx)
@@ -523,7 +581,6 @@
 
 	mail_thread_clear(ctx);
 	mail_search_args_unref(&ctx->search_args);
- 	i_assert(!tbox->ctx->syncing);
  	tbox->ctx = NULL;
 	i_free(ctx);
 }
@@ -534,127 +591,10 @@
 {
 	struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(ctx->box);
 
-	return mail_thread_iterate_init_full(ctx->thread_ctx.tmp_mail,
-					     ctx->thread_ctx.hash_trans,
-					     tbox->list_ctx,
+	return mail_thread_iterate_init_full(tbox->cache, ctx->tmp_mail,
 					     thread_type, write_seqs);
 }
 
-static bool
-mail_thread_index_is_up_to_date(struct mail_thread_update_context *ctx,
-				struct mail_index_view *view)
-{
-	const struct mail_index_header *hdr;
-	struct mail_hash_header *hash_hdr;
-	uint32_t seq1, seq2;
-
-	hdr = mail_index_get_header(view);
-	hash_hdr = mail_hash_get_header(ctx->hash_trans);
-	if (hash_hdr->last_uid + 1 == hdr->next_uid) {
-		/* all messages have been added to hash */
-		return hash_hdr->message_count == hdr->messages_count;
-	}
-
-	if (!mail_index_lookup_seq_range(view, 1, hash_hdr->last_uid,
-					 &seq1, &seq2))
-		seq2 = 0;
-	return seq2 == hash_hdr->message_count;
-}
-
-static void
-mail_thread_expunge_handler_deinit(struct mail_thread_mailbox *tbox,
-				   struct mail_thread_update_context *ctx)
-{
-	struct mailbox_transaction_context *t;
-
-	t = ctx->tmp_mail->transaction;
-
-	if (ctx->failed)
-		mail_thread_list_rollback(&ctx->thread_list_ctx);
-	else {
-		if (mail_thread_list_commit(&ctx->thread_list_ctx) < 0)
-			ctx->failed = TRUE;
-	}
-	if (!ctx->failed)
-		(void)mail_hash_transaction_write(ctx->hash_trans);
-	mail_hash_transaction_end(&ctx->hash_trans);
-	mail_hash_unlock(tbox->hash);
-
-	mail_free(&ctx->tmp_mail);
-	/* don't commit. we're in the middle of syncing and
-	   this transaction isn't marked as external. */
-	(void)mailbox_transaction_rollback(&t);
-	array_free(&ctx->msgid_cache);
-	pool_unref(&ctx->msgid_pool);
-}
-
-static int
-mail_thread_expunge_handler(struct mail_index_sync_map_ctx *sync_ctx,
-			    uint32_t seq, const void *data,
-			    void **sync_context ATTR_UNUSED, void *context)
-{
-	struct mailbox *box = context;
-	struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(box);
-	struct mail_thread_update_context *ctx = tbox->ctx;
-	struct mailbox_transaction_context *t;
-	uint32_t uid;
-
-	if (data == NULL) {
-		/* deinit */
-		if (!ctx->syncing)
-			return 0;
-		if (ctx->hash != NULL)
-			mail_thread_expunge_handler_deinit(tbox, ctx);
-		i_free_and_null(tbox->ctx);
-		return 0;
-	}
-	if (ctx == NULL) {
-		/* init */
-		if (tbox->ctx != NULL) {
-			/* we're already in the middle of threading */
-			return 0;
-		}
-		tbox->ctx = ctx = i_new(struct mail_thread_update_context, 1);
-		ctx->syncing = TRUE;
-
-		/* we can't wait the lock in here or we could deadlock. */
-		if (mail_hash_lock_exclusive(tbox->hash,
-					     MAIL_HASH_LOCK_FLAG_TRY) <= 0)
-			return 0;
-
-		ctx->hash = tbox->hash;
-		ctx->hash_trans = mail_hash_transaction_begin(ctx->hash, 0);
-		ctx->thread_list_ctx =
-			mail_thread_list_update_begin(tbox->list_ctx,
-						      ctx->hash_trans);
-		ctx->msgid_pool = pool_alloconly_create(MEMPOOL_GROWING"msgids",
-							20 * APPROX_MSGID_SIZE);
-		i_array_init(&ctx->msgid_cache, 20);
-
-		t = mailbox_transaction_begin(box, 0);
-		ctx->tmp_mail = mail_alloc(t, 0, NULL);
-
-		if (!mail_thread_index_is_up_to_date(ctx, sync_ctx->view)) {
-			ctx->failed = TRUE;
-			return 0;
-		}
-	} else {
-		if (ctx->hash == NULL) {
-			/* locking had failed */
-			return 0;
-		}
-		if (!ctx->syncing || ctx->failed)
-			return 0;
-	}
-
-	T_BEGIN {
-		mail_index_lookup_uid(sync_ctx->view, seq, &uid);
-		if (mail_thread_remove(ctx, uid) <= 0)
-			ctx->failed = TRUE;
-	} T_END;
-	return 0;
-}
-
 static int mail_thread_mailbox_close(struct mailbox *box)
 {
 	struct mail_thread_mailbox *tbox = MAIL_THREAD_CONTEXT(box);
@@ -662,8 +602,14 @@
 
 	i_assert(tbox->ctx == NULL);
 
-	mail_thread_list_deinit(&tbox->list_ctx);
-	mail_hash_free(&tbox->hash);
+	if (tbox->strmap_view != NULL)
+		mail_index_strmap_view_close(&tbox->strmap_view);
+	mail_index_strmap_deinit(&tbox->strmap);
+	if (tbox->cache->search_result != NULL)
+		mailbox_search_result_free(&tbox->cache->search_result);
+	array_free(&tbox->cache->thread_nodes);
+	i_free(tbox->cache);
+
 	ret = tbox->module_ctx.super.close(box);
 	i_free(tbox);
 	return ret;
@@ -673,24 +619,17 @@
 {
 	struct mailbox *box = &ibox->box;
 	struct mail_thread_mailbox *tbox;
-	uint32_t ext_id;
 
 	tbox = i_new(struct mail_thread_mailbox, 1);
 	tbox->module_ctx.super = box->v;
 	box->v.close = mail_thread_mailbox_close;
 
-	tbox->list_ctx = mail_thread_list_init(box);
-	tbox->hash = mail_hash_alloc(ibox->index, MAIL_THREAD_INDEX_SUFFIX,
-				     sizeof(struct mail_thread_node),
-				     mail_thread_hash_key,
-				     mail_thread_hash_rec,
-				     mail_thread_hash_cmp,
-				     mail_thread_hash_remap,
-				     tbox);
+	tbox->strmap = mail_index_strmap_init(ibox->index,
+					      MAIL_THREAD_INDEX_SUFFIX);
+	tbox->next_msgid_idx = 1;
 
-	ext_id = mail_index_ext_register(ibox->index, "thread", 0, 0, 0);
-	mail_index_register_expunge_handler(ibox->index, ext_id, TRUE,
-					    mail_thread_expunge_handler, box);
+	tbox->cache = i_new(struct mail_thread_cache, 1);
+	i_array_init(&tbox->cache->thread_nodes, 128);
 
 	MODULE_CONTEXT_SET(box, mail_thread_storage_module, tbox);
 }
--- a/src/lib-storage/mail-thread.h	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/lib-storage/mail-thread.h	Mon Sep 01 15:17:00 2008 +0300
@@ -26,10 +26,8 @@
    unknown. */
 bool mail_thread_type_parse(const char *str, enum mail_thread_type *type_r);
 
-/* Build thread from given search arguments. If reset=TRUE, build a new thread
-   tree to memory even if thread index exists. args=NULL searches everything. */
-int mail_thread_init(struct mailbox *box, bool reset,
-		     struct mail_search_args *args,
+/* Build thread from given search arguments. args=NULL searches everything. */
+int mail_thread_init(struct mailbox *box, struct mail_search_args *args,
 		     struct mail_thread_context **ctx_r);
 void mail_thread_deinit(struct mail_thread_context **ctx);
 
--- a/src/util/Makefile.am	Mon Sep 01 15:11:54 2008 +0300
+++ b/src/util/Makefile.am	Mon Sep 01 15:17:00 2008 +0300
@@ -1,6 +1,14 @@
 pkglibexecdir = $(libexecdir)/dovecot
 
-pkglibexec_PROGRAMS = rawlog gdbhelper idxview listview logview maildirlock
+pkglibexec_PROGRAMS = \
+	rawlog \
+	gdbhelper \
+	idxview \
+	listview \
+	logview \
+	maildirlock \
+	threadview
+
 sbin_PROGRAMS = dovecotpw
 
 AM_CPPFLAGS = \
@@ -41,6 +49,11 @@
 maildirlock_SOURCES = \
 	maildirlock.c
 
+threadview_LDADD = \
+	../lib/liblib.a
+threadview_SOURCES = \
+	threadview.c
+
 dovecotpw_LDADD = \
 	../auth/libpassword.a \
 	../lib-ntlm/libntlm.a \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/util/threadview.c	Mon Sep 01 15:17:00 2008 +0300
@@ -0,0 +1,177 @@
+/* Copyright (c) 2007-2008 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "mmap-util.h"
+#include "mail-index-private.h"
+#include "mail-index-strmap.h"
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+static uint32_t max_likely_index;
+
+uint32_t mail_index_offset_to_uint32(uint32_t offset)
+{
+	const unsigned char *buf = (const unsigned char *) &offset;
+
+	if ((offset & 0x80808080) != 0x80808080)
+		return 0;
+
+	return (((uint32_t)buf[3] & 0x7f) << 2) |
+		(((uint32_t)buf[2] & 0x7f) << 9) |
+		(((uint32_t)buf[1] & 0x7f) << 16) |
+		(((uint32_t)buf[0] & 0x7f) << 23);
+}
+
+int mail_index_unpack_num(const uint8_t **p, const uint8_t *end,
+			  uint32_t *num_r)
+{
+	const uint8_t *c = *p;
+	uint32_t value = 0;
+	unsigned int bits = 0;
+
+	for (;;) {
+		if (unlikely(c == end)) {
+			/* we should never see EOF */
+			*num_r = 0;
+			return -1;
+		}
+
+		value |= (*c & 0x7f) << bits;
+		if (*c < 0x80)
+			break;
+
+		bits += 7;
+		c++;
+	}
+
+	if (unlikely(bits >= 32)) {
+		/* broken input */
+		*p = end;
+		*num_r = 0;
+		return -1;
+	}
+
+	*p = c + 1;
+	*num_r = value;
+	return 0;
+}
+
+static size_t dump_hdr(const struct mail_index_strmap_header *hdr)
+{
+	printf("version = %u\n", hdr->version);
+	printf("uid validity = %u\n", hdr->uid_validity);
+	return sizeof(*hdr);
+}
+
+static int dump_record(const uint8_t **p, const uint8_t *end, uint32_t *uid)
+{
+	uint32_t uid_diff, n, i, count, crc32, idx;
+	size_t size;
+
+	/* <uid diff> <n> <crc32>*count <str_idx>*count */
+	if (mail_index_unpack_num(p, end, &uid_diff) <  0)
+		return -1;
+	*uid += uid_diff;
+
+	if (mail_index_unpack_num(p, end, &n) <  0)
+		return -1;
+	printf(" - uid %u: n=%u\n", *uid, n);
+
+	count = n < 2 ? n + 1 : n;
+	size = sizeof(crc32)*count + sizeof(idx)*count;
+	if (*p + size > end)
+		return -1;
+	for (i = 0; i < count; i++) {
+		if (i == 0)
+			printf("   - message-id: ");
+		else if (i == 1) {
+			if (n == 1)
+				printf("   - in-reply-to: ");
+			else
+				printf("   - references[1]: ");
+		} else {
+			printf("   - references[%u]: ", i);
+		}
+		memcpy(&crc32, *p + sizeof(crc32)*i, sizeof(crc32));
+		memcpy(&idx, *p + sizeof(crc32)*count + sizeof(idx)*i, sizeof(idx));
+		printf("crc32=%08x index=%u\n", crc32, idx);
+		if (idx > max_likely_index)
+			printf(" - index probably broken\n");
+	}
+	*p += size;
+	return 0;
+}
+
+static int dump_block(const uint8_t *data, const uint8_t *end, uint32_t *uid)
+{
+	const uint8_t *p;
+	uint32_t block_size;
+
+	if (data + 4 >= end)
+		return -1;
+
+	memcpy(&block_size, data, sizeof(block_size));
+	block_size = mail_index_offset_to_uint32(block_size) >> 2;
+	printf(" - block_size=%u\n", block_size);
+	if (block_size == 0) {
+		/* finished */
+		return -1;
+	}
+	if (data + sizeof(block_size) + block_size > end) {
+		printf("   - broken!\n");
+		return -1;
+	}
+	p = data + sizeof(block_size);
+	end = p + block_size;
+
+	*uid += 1;
+	while (p != end) {
+		if (dump_record(&p, end, uid) < 0) {
+			printf(" - broken\n");
+			return -1;
+		}
+	}
+	return p - data;
+}
+
+int main(int argc, const char *argv[])
+{
+	unsigned int pos;
+	const void *map, *end;
+	struct stat st;
+	uint32_t uid;
+	int fd, ret;
+
+	lib_init();
+
+	if (argc < 2)
+		i_fatal("Usage: threadview dovecot.index.thread");
+
+	fd = open(argv[1], O_RDONLY);
+	if (fd < 0) {
+		i_error("open(%s) failed: %m", argv[1]);
+		return 1;
+	}
+
+	if (fstat(fd, &st) < 0) {
+		i_error("fstat(%s) failed: %m", argv[1]);
+		return 1;
+	}
+	max_likely_index = (st.st_size / 8) * 2;
+
+	map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	end = CONST_PTR_OFFSET(map, st.st_size);
+	pos = dump_hdr(map);
+	uid = 0;
+	do {
+		printf("block at offset %u:\n", pos);
+		T_BEGIN {
+			ret = dump_block(CONST_PTR_OFFSET(map, pos), end, &uid);
+			pos += ret;
+		} T_END;
+	} while (ret > 0);
+	return 0;
+}