changeset 4849:7f2fee02b07c HEAD

Resize the hash when needed. Also other fixes and cleanups.
author Timo Sirainen <tss@iki.fi>
date Sun, 26 Nov 2006 17:20:11 +0200
parents 967de900c73a
children 0c540b6a0551
files src/imap/imap-thread.c src/lib-index/mail-hash.c src/lib-index/mail-hash.h
diffstat 3 files changed, 245 insertions(+), 95 deletions(-) [+]
line wrap: on
line diff
--- a/src/imap/imap-thread.c	Sun Nov 26 00:17:39 2006 +0200
+++ b/src/imap/imap-thread.c	Sun Nov 26 17:20:11 2006 +0200
@@ -202,6 +202,12 @@
 	pool_unref(ctx->msgid_pool);
 }
 
+static uint32_t crc32_str_nonzero(const char *str)
+{
+	uint32_t value = crc32_str(str);
+	return value == 0 ? 1 : value;
+}
+
 static int
 mail_thread_rec_idx(struct thread_context *ctx, uint32_t idx,
 		    const struct mail_thread_rec **rec_r)
@@ -217,7 +223,7 @@
 	return 0;
 }
 
-static unsigned int mail_thread_rec_hash(const void *key)
+static unsigned int mail_thread_hash_key(const void *key)
 {
 	const struct msgid_rec *key_rec = key;
 
@@ -241,7 +247,7 @@
 	msgids = mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO);
 	msgid = msgids == NULL ? NULL : message_id_get_next(&msgids);
 	if (msgid != NULL) {
-		if (crc32_str(msgid) == msgid_crc32)
+		if (crc32_str_nonzero(msgid) == msgid_crc32)
 			found_msgid = msgid;
 	}
 
@@ -252,7 +258,7 @@
 	}
 
 	while ((msgid = message_id_get_next(&msgids)) != NULL) {
-		if (crc32_str(msgid) == msgid_crc32) {
+		if (crc32_str_nonzero(msgid) == msgid_crc32) {
 			if (found_msgid != NULL) {
 				/* hash collisions, we can't figure this out */
 				return -1;
@@ -284,6 +290,12 @@
 		/* each non-dummy child must have a valid In-Reply-To or
 		   References header pointing to the parent, otherwise it
 		   wouldn't be our child */
+		if (parent_rec->msgid_crc32 == 0) {
+			mail_hash_set_corrupted(ctx->msgid_hash,
+				"msgid_crc32=0 node has children");
+			return NULL;
+		}
+
 		if (mail_thread_find_child_msgid(ctx, child_rec->rec.uid,
 						 parent_rec->msgid_crc32,
 						 &msgid) == 0)
@@ -324,8 +336,8 @@
 	return *p;
 }
 
-static bool mail_thread_rec_msgid_cmp(const void *key, const void *data,
-				      void *context)
+static bool mail_thread_hash_cmp(const void *key, const void *data,
+				 void *context)
 {
 	struct imap_thread_mailbox *tbox = context;
 	struct thread_context *ctx = tbox->ctx;
@@ -350,6 +362,13 @@
 	return strcmp(msgid, key_rec->msgid) == 0;
 }
 
+static unsigned int mail_thread_hash_rec(const void *p)
+{
+	const struct mail_thread_rec *rec = p;
+
+	return rec->msgid_crc32;
+}
+
 static int
 imap_thread_context_init(struct imap_thread_mailbox *tbox,
 			 struct client *client, const char *charset,
@@ -364,6 +383,15 @@
 			       STATUS_MESSAGES | STATUS_UIDNEXT, &status) < 0)
 		return -1;
 
+	if (search_args->type != SEARCH_ALL ||
+	    search_args->next != NULL) {
+		/* each search condition requires their own separate thread
+		   index. for now we don't bother to support anything else than
+		   "search all" threading, since that's what pretty much all
+		   the clients do anyway. */
+		ctx->msgid_hash = NULL;
+	}
+
 	if (ctx->msgid_hash != NULL) {
 		if (mail_hash_lock(ctx->msgid_hash) <= 0)
 			ctx->msgid_hash = NULL;
@@ -380,36 +408,34 @@
 			ctx->msgid_hash = NULL;
 		} else {
 			/* rebuild */
-			if (mail_hash_reset(ctx->msgid_hash) < 0) {
-				mail_hash_unlock(ctx->msgid_hash);
+			if (mail_hash_reset(ctx->msgid_hash, 0) < 0)
 				ctx->msgid_hash = NULL;
-			}
+		}
+	} else if (hdr->last_uid != 0) {
+		/* non-empty hash. add only the new messages in there. */
+		if (mailbox_get_uids(client->mailbox, 1, hdr->last_uid,
+				     &ctx->seqset.seq1,
+				     &ctx->seqset.seq2) < 0) {
+			mail_hash_unlock(ctx->msgid_hash);
+			return -1;
 		}
-	} else if (search_args->next == NULL &&
-		   search_args->type == SEARCH_ALL) {
-		/* threading everything. this is the only case we're trying
-		   to optimize. */
-		if (hdr->last_uid != 0) {
-			/* messages already exist in the hash. add only the
-			   new messages in there. */
-			if (mailbox_get_uids(client->mailbox, 1, hdr->last_uid,
-					     &ctx->seqset.seq1,
-					     &ctx->seqset.seq2) < 0)
-				return -1;
 
-			if (ctx->seqset.seq2 != hdr->message_count) {
-				/* some messages have been expunged.
-				   have to rebuild. */
-				if (mail_hash_reset(ctx->msgid_hash) < 0) {
-					mail_hash_unlock(ctx->msgid_hash);
-					ctx->msgid_hash = NULL;
-				}
-			} else {
-				ctx->tmp_search_arg.type = SEARCH_SEQSET;
-				ctx->tmp_search_arg.value.seqset = &ctx->seqset;
-				ctx->tmp_search_arg.not = TRUE;
-				search_args = &ctx->tmp_search_arg;
-			}
+		if (ctx->seqset.seq2 != hdr->message_count) {
+			/* some messages have been expunged. have to rebuild. */
+			if (mail_hash_reset(ctx->msgid_hash, 0) < 0)
+				ctx->msgid_hash = NULL;
+		} else {
+			/* after all these checks, this is the only case we
+			   can actually optimize. */
+			ctx->tmp_search_arg.type = SEARCH_SEQSET;
+			ctx->tmp_search_arg.value.seqset = &ctx->seqset;
+			ctx->tmp_search_arg.not = TRUE;
+			search_args = &ctx->tmp_search_arg;
+
+			if (mail_hash_resize_if_needed(ctx->msgid_hash,
+						       status.messages -
+						       hdr->message_count) < 0)
+				ctx->msgid_hash = NULL;
 		}
 	}
 
@@ -422,9 +448,10 @@
 			mail_hash_open(ibox->index, ".thread",
 				       MAIL_HASH_OPEN_FLAG_CREATE |
 				       MAIL_HASH_OPEN_FLAG_IN_MEMORY,
-				       sizeof(struct mail_thread_rec),
-				       mail_thread_rec_hash,
-				       mail_thread_rec_msgid_cmp,
+				       sizeof(struct mail_thread_rec), 0,
+				       mail_thread_hash_key,
+				       mail_thread_hash_rec,
+				       mail_thread_hash_cmp,
 				       tbox);
 	}
 
@@ -436,6 +463,8 @@
 	ctx->box = client->mailbox;
 	ctx->output = client->output;
 
+	/* at this point the hash is either locked or we're using in-memory
+	   hash where it doesn't matter */
 	hdr = mail_hash_get_header(ctx->msgid_hash);
 	count = client->messages_count < hdr->record_count ? 0 :
 		client->messages_count - hdr->record_count;
@@ -571,7 +600,7 @@
 			mail_thread_rec_get_msgid(ctx, rec, idx);
 		if (key.msgid == NULL)
 			return -1;
-		key.msgid_crc32 = crc32_str(key.msgid);
+		key.msgid_crc32 = crc32_str_nonzero(key.msgid);
 
 		if (mail_hash_remove_idx(ctx->msgid_hash, idx, &key) < 0) {
 			ctx->failed = TRUE;
@@ -782,7 +811,7 @@
 
 	memset(&rec, 0, sizeof(rec));
 	rec.rec.uid = uid;
-	rec.msgid_crc32 = msgid == NULL ? 0 : crc32_str(msgid);
+	rec.msgid_crc32 = msgid == NULL ? 0 : crc32_str_nonzero(msgid);
 	rec.refcount = 1;
 	rec.sent_date = sent_date;
 
@@ -842,6 +871,7 @@
 		uint32_t orig_idx = idx;
 
 		orig_rec = *recp;
+		rec.msgid_crc32 = 0;
 		if (mail_hash_insert(ctx->msgid_hash, NULL, &rec, &idx) < 0) {
 			ctx->failed = TRUE;
 			return -1;
@@ -975,7 +1005,7 @@
 	int ret;
 
 	key.msgid = msgid;
-	key.msgid_crc32 = crc32_str(msgid);
+	key.msgid_crc32 = crc32_str_nonzero(msgid);
 
 	ret = mail_hash_lookup(ctx->msgid_hash, &key, &value, idx_r);
 	if (ret < 0 || ctx->failed) {
@@ -1816,7 +1846,7 @@
 	key_r->msgid = message_id_get_next(&message_id);
 	if (key_r->msgid == NULL)
 		return 0;
-	key_r->msgid_crc32 = crc32_str(key_r->msgid);
+	key_r->msgid_crc32 = crc32_str_nonzero(key_r->msgid);
 
 	if (mail_hash_lookup(ctx->msgid_hash, key_r, &value, idx_r) <= 0)
 		return -1;
@@ -1851,7 +1881,7 @@
 	references = t_strdup(references);
 	do {
 		key.msgid = msgid;
-		key.msgid_crc32 = crc32_str(msgid);
+		key.msgid_crc32 = crc32_str_nonzero(msgid);
 
 		ctx->cmp_match_count = 0;
 		ctx->cmp_last_idx = 0;
@@ -2005,9 +2035,11 @@
 	tbox->msgid_hash =
 		mail_hash_open(ibox->index, ".thread", create ?
 			       MAIL_HASH_OPEN_FLAG_CREATE : 0,
-			       sizeof(struct mail_thread_rec),
-			       mail_thread_rec_hash,
-			       mail_thread_rec_msgid_cmp, tbox);
+			       sizeof(struct mail_thread_rec), 0,
+			       mail_thread_hash_key,
+			       mail_thread_hash_rec,
+			       mail_thread_hash_cmp,
+			       tbox);
 	if (tbox->msgid_hash == NULL)
 		return;
 
--- a/src/lib-index/mail-hash.c	Sun Nov 26 00:17:39 2006 +0200
+++ b/src/lib-index/mail-hash.c	Sun Nov 26 17:20:11 2006 +0200
@@ -1,8 +1,5 @@
 /* Copyright (C) 2006 Timo Sirainen */
 
-/* FIXME: we need to be able to rebuild the hash table when it gets
-   too small or large */
-
 #include "lib.h"
 #include "ioloop.h"
 #include "array.h"
@@ -16,6 +13,7 @@
 #include "mail-index-private.h"
 #include "mail-hash.h"
 
+#include <stdio.h>
 #include <stddef.h>
 #include <utime.h>
 #include <sys/stat.h>
@@ -24,17 +22,21 @@
 #define FILE_SIZE_INIT_PERCENTAGE 120
 /* How much larger to grow the file when it needs to be done */
 #define MAIL_HASH_GROW_PERCENTAGE 20
+/* Minimum hash size to use */
+#define MAIL_HASH_MIN_SIZE 109
 
 #define MAIL_HASH_TIMEOUT_SECS 3
 
 struct mail_hash {
 	struct mail_index *index;
 
-	hash_callback_t *hash_cb;
+	hash_callback_t *key_hash_cb;
 	hash_ctx_cmp_callback_t *key_compare_cb;
+	hash_callback_t *rec_hash_cb;
 	void *cb_context;
 
 	char *filepath;
+	char *suffix;
 	int fd;
 
 	dev_t dev;
@@ -99,7 +101,7 @@
 	size_t offset = offsetof(struct mail_hash_header, corrupted);
 	struct stat st;
 
-	if (MAIL_HASH_IS_IN_MEMORY(hash))
+	if (hash->fd == -1)
 		return 0;
 
 	hash->hdr->corrupted = set ? 1 : 0;
@@ -212,6 +214,8 @@
 	hash->hdr = NULL;
 	hash->hash_base = NULL;
 	hash->records_base = NULL;
+
+	hash->locked = FALSE;
 }
 
 static int mail_hash_file_map_finish(struct mail_hash *hash)
@@ -410,7 +414,7 @@
 	}
 }
 
-static int mail_hash_file_open(struct mail_hash *hash)
+static int mail_hash_file_open(struct mail_hash *hash, bool lock)
 {
 	int ret;
 
@@ -422,21 +426,27 @@
 		return -1;
 	}
 
-	if (mail_hash_file_lock(hash, F_RDLCK) <= 0)
-		return -1;
+	if (!lock) {
+		if (mail_hash_file_lock(hash, F_RDLCK) <= 0)
+			return -1;
 
-	ret = mail_hash_file_map(hash, FALSE);
-	if (hash->fd != -1)
-		(void)mail_hash_file_lock(hash, F_UNLCK);
+		ret = mail_hash_file_map(hash, FALSE);
+		if (hash->fd != -1)
+			(void)mail_hash_file_lock(hash, F_UNLCK);
+	} else {
+		if (mail_hash_file_lock(hash, F_WRLCK) <= 0)
+			return -1;
+
+		hash->locked = TRUE;
+		ret = mail_hash_file_map(hash, TRUE);
+	}
 	return ret;
 }
 
 static void
-mail_hash_header_init(struct mail_hash *hash, struct mail_hash_header *hdr,
-		      uoff_t *file_size_r)
+mail_hash_header_init(struct mail_hash *hash, unsigned int initial_count,
+		      struct mail_hash_header *hdr, uoff_t *file_size_r)
 {
-	unsigned int message_count;
-
 	memset(hdr, 0, sizeof(*hdr));
 	hdr->version = MAIL_HASH_VERSION;
 	hdr->base_header_size = sizeof(*hdr);
@@ -446,16 +456,19 @@
 	   uid_validity may be 0 */
 	hdr->uid_validity = hash->index->hdr->uid_validity;
 
-	message_count = I_MAX(hash->index->hdr->messages_count, 25);
-	hdr->hash_size = primes_closest(I_MAX(message_count * 2, 109));
+	if (initial_count == 0)
+		initial_count = I_MAX(hash->index->hdr->messages_count, 25);
+	hdr->hash_size = I_MAX(primes_closest(initial_count * 2),
+			       MAIL_HASH_MIN_SIZE);
 
 	*file_size_r = hdr->header_size +
 		hdr->hash_size * sizeof(uint32_t) +
-		hash->record_size * (message_count *
+		hash->record_size * (initial_count *
 				     FILE_SIZE_INIT_PERCENTAGE / 100);
 }
 
-static int mail_hash_file_create(struct mail_hash *hash)
+static int
+mail_hash_file_create(struct mail_hash *hash, unsigned int initial_count)
 {
 	struct dotlock *dotlock;
 	struct mail_hash_header hdr;
@@ -468,7 +481,7 @@
 		return -1;
 	}
 
-	mail_hash_header_init(hash, &hdr, &file_size);
+	mail_hash_header_init(hash, initial_count, &hdr, &file_size);
 	if (write_full(fd, &hdr, sizeof(hdr)) < 0 ||
 	    file_set_size(fd, file_size) < 0) {
 		mail_hash_set_syscall_error(hash, "write()");
@@ -483,12 +496,13 @@
 	return 0;
 }
 
-static void mail_hash_create_in_memory(struct mail_hash *hash)
+static void mail_hash_create_in_memory(struct mail_hash *hash,
+				       unsigned int initial_count)
 {
 	struct mail_hash_header hdr;
 	uoff_t file_size;
 
-	mail_hash_header_init(hash, &hdr, &file_size);
+	mail_hash_header_init(hash, initial_count, &hdr, &file_size);
 
 	hash->mmap_size = file_size;
 	hash->mmap_base = mmap_anon(hash->mmap_size);
@@ -506,8 +520,10 @@
 
 struct mail_hash *
 mail_hash_open(struct mail_index *index, const char *suffix,
-	       enum mail_hash_open_flags flags,
-	       unsigned int record_size, hash_callback_t *hash_cb,
+	       enum mail_hash_open_flags flags, unsigned int record_size,
+	       unsigned int initial_count,
+	       hash_callback_t *key_hash_cb,
+	       hash_callback_t *rec_hash_cb,
 	       hash_ctx_cmp_callback_t *key_compare_cb,
 	       void *context)
 {
@@ -521,16 +537,18 @@
 	hash->filepath = (flags & MAIL_HASH_OPEN_FLAG_IN_MEMORY) != 0 ?
 		i_strdup("(in-memory hash)") :
 		i_strconcat(index->filepath, suffix, NULL);
+	hash->suffix = i_strdup(suffix);
 	hash->record_size = record_size;
 	hash->fd = -1;
 
-	hash->hash_cb = hash_cb;
+	hash->key_hash_cb = key_hash_cb;
+	hash->rec_hash_cb = rec_hash_cb;
 	hash->key_compare_cb = key_compare_cb;
 	hash->cb_context = context;
 
 	ret = MAIL_INDEX_IS_IN_MEMORY(hash->index) ||
 		(flags & MAIL_HASH_OPEN_FLAG_IN_MEMORY) != 0 ? -1 :
-		mail_hash_file_open(hash);
+		mail_hash_file_open(hash, FALSE);
 
 	if (ret <= 0 && (flags & MAIL_HASH_OPEN_FLAG_CREATE) == 0) {
 		/* we don't want to create the hash */
@@ -539,12 +557,12 @@
 	}
 	if (ret == 0) {
 		/* not found or broken, recreate it */
-		ret = mail_hash_reset(hash);
+		ret = mail_hash_reset(hash, initial_count);
 	}
 	if (ret < 0) {
 		/* fallback to in-memory hash */
 		mail_hash_file_close(hash);
-		mail_hash_create_in_memory(hash);
+		mail_hash_create_in_memory(hash, initial_count);
 	}
 	return hash;
 }
@@ -557,28 +575,26 @@
 
 	mail_hash_file_close(hash);
 	i_free(hash->filepath);
+	i_free(hash->suffix);
 	i_free(hash);
 }
 
-int mail_hash_reset(struct mail_hash *hash)
+int mail_hash_reset(struct mail_hash *hash, unsigned int initial_count)
 {
+	bool locked = hash->locked;
 	int ret;
 
 	mail_hash_file_close(hash);
 
-	ret = mail_hash_file_create(hash);
+	ret = mail_hash_file_create(hash, initial_count);
 	if (ret == 0) {
 		/* should work now, try opening again */
-		ret = mail_hash_file_open(hash);
+		ret = mail_hash_file_open(hash, locked);
 		if (ret == 0) {
 			mail_hash_set_corrupted(hash,
 				"Newly created hash file broken");
 			return -1;
 		}
-		if (ret > 0 && hash->locked) {
-			if (mail_hash_file_lock(hash, F_WRLCK) <= 0)
-				return -1;
-		}
 	}
 	return ret < 0 ? -1 : 0;
 }
@@ -589,13 +605,13 @@
 
 	mail_hash_file_close(hash);
 
-	if ((ret = mail_hash_file_open(hash)) < 0)
+	if ((ret = mail_hash_file_open(hash, FALSE)) < 0)
 		return -1;
 	if (ret > 0)
 		return 0;
 
 	/* not found or broken, recreate it */
-	return mail_hash_reset(hash);
+	return mail_hash_reset(hash, 0);
 }
 
 static int mail_hash_reopen_if_needed(struct mail_hash *hash)
@@ -671,7 +687,7 @@
 	unsigned int hash_idx;
 	uint32_t idx;
 
-	hash_idx = hash->hash_cb(key) % hash->hdr->hash_size;
+	hash_idx = hash->key_hash_cb(key) % hash->hdr->hash_size;
 	for (idx = hash->hash_base[hash_idx]; idx != 0; ) {
 		if (idx > hash->hdr->record_count) {
 			mail_hash_set_corrupted(hash,
@@ -773,8 +789,8 @@
 	return 0;
 }
 
-int mail_hash_insert(struct mail_hash *hash, const void *key,
-		     const void *value, uint32_t *idx_r)
+static int mail_hash_insert_with_hash(struct mail_hash *hash, const void *value,
+				      uint32_t hash_key, uint32_t *idx_r)
 {
 	struct mail_hash_record *rec;
 	uint32_t idx, *idx_p;
@@ -807,13 +823,11 @@
 	}
 
 	memcpy(rec, value, hash->record_size);
-
 	if (mail_hash_update_header(hash, rec, FALSE) < 0)
 		return -1;
 
-	if (key != NULL) {
-		idx_p = &hash->hash_base[hash->hash_cb(key) %
-					 hash->hdr->hash_size];
+	if (hash_key != 0) {
+		idx_p = &hash->hash_base[hash_key % hash->hdr->hash_size];
 		while (*idx_p != 0) {
 			rec = HASH_RECORD_IDX(hash, *idx_p);
 			idx_p = &rec->next_idx;
@@ -822,11 +836,26 @@
 		if (mail_hash_mark_update(hash, idx_p, sizeof(*idx_p)) < 0)
 			return -1;
 		*idx_p = idx;
+
+		hash->hdr->hashed_count++;
 	}
+
 	*idx_r = idx;
 	return 0;
 }
 
+int mail_hash_insert(struct mail_hash *hash, const void *key,
+		     const void *value, uint32_t *idx_r)
+{
+	uint32_t hash_key = key == NULL ? 0 : hash->key_hash_cb(key);
+
+	i_assert((key == NULL && hash->rec_hash_cb(value) == 0) ||
+		 (key != NULL && hash_key != 0 &&
+		  hash->rec_hash_cb(value) != 0));
+
+	return mail_hash_insert_with_hash(hash, value, hash_key, idx_r);
+}
+
 int mail_hash_remove(struct mail_hash *hash, const void *key)
 {
 	return mail_hash_remove_idx(hash, 0, key);
@@ -836,12 +865,12 @@
 {
 	struct mail_hash_record *rec = NULL;
 	unsigned int hash_idx;
-	uint32_t *idx_p;
+	uint32_t hash_key, *idx_p;
 
 	i_assert(idx != 0 || key != NULL);
 
 	if (key != NULL) {
-		hash_idx = hash->hash_cb(key) % hash->hdr->hash_size;
+		hash_idx = hash->key_hash_cb(key) % hash->hdr->hash_size;
 		for (idx_p = &hash->hash_base[hash_idx]; *idx_p != 0; ) {
 			if (*idx_p > hash->hdr->record_count) {
 				mail_hash_set_corrupted(hash,
@@ -881,6 +910,15 @@
 		hash->hdr->message_count--;
 	}
 
+	hash_key = hash->rec_hash_cb(rec);
+	if (hash_key != 0) {
+		if (hash->hdr->hashed_count == 0) {
+			mail_hash_set_corrupted(hash, "Too low hashed_count");
+			return -1;
+		}
+		hash->hdr->hashed_count--;
+	}
+
 	if (idx == hash->hdr->record_count) {
 		hash->hdr->record_count--;
 	} else {
@@ -946,3 +984,77 @@
 
 	return mail_hash_update_header(hash, rec, had_uid);
 }
+
+int mail_hash_resize_if_needed(struct mail_hash *hash, unsigned int grow_count)
+{
+	struct mail_hash *tmp_hash;
+	const struct mail_hash_record *rec;
+	const char *tmp_filename;
+	uint32_t hash_key, idx, new_idx;
+	float nodes_per_list;
+	int ret = 0;
+
+	if (MAIL_HASH_IS_IN_MEMORY(hash))
+		return 0;
+
+	i_assert(hash->locked);
+
+	nodes_per_list = (float)(hash->hdr->hashed_count + grow_count) /
+		(float)hash->hdr->hash_size;
+	if ((nodes_per_list > 0.3 && nodes_per_list < 2.0) ||
+	    hash->hdr->hash_size <= MAIL_HASH_MIN_SIZE)
+		return 0;
+
+	/* create a temporary hash */
+	tmp_hash = mail_hash_open(hash->index,
+				  t_strconcat(hash->suffix, ".tmp", NULL),
+				  MAIL_HASH_OPEN_FLAG_CREATE,
+				  hash->record_size,
+				  hash->hdr->hashed_count + grow_count,
+				  hash->key_hash_cb,
+				  hash->rec_hash_cb,
+				  hash->key_compare_cb,
+				  hash->cb_context);
+	if (tmp_hash == NULL)
+		return -1;
+
+	/* populate */
+	for (idx = 1; idx <= hash->hdr->record_count; idx++) {
+		rec = HASH_RECORD_IDX(hash, idx);
+		hash_key = hash->rec_hash_cb(rec);
+
+		if (mail_hash_insert_with_hash(tmp_hash, rec, hash_key,
+					       &new_idx) < 0) {
+			ret = -1;
+			break;
+		}
+		i_assert(idx == new_idx);
+	}
+	(void)mail_hash_file_write_changes(tmp_hash);
+
+	tmp_filename = t_strdup(tmp_hash->filepath);
+	mail_hash_free(&tmp_hash);
+	if (ret < 0) {
+		(void)unlink(tmp_filename);
+		return -1;
+	}
+
+	/* replace the old */
+	if (rename(tmp_filename, hash->filepath) < 0) {
+		mail_hash_set_syscall_error(hash, "rename()");
+		(void)unlink(tmp_filename);
+		return -1;
+	}
+
+	/* reopen the hash */
+	mail_hash_file_close(hash);
+	if ((ret = mail_hash_file_open(hash, TRUE)) < 0)
+		return -1;
+	if (ret == 0) {
+		mail_hash_set_corrupted(hash,
+			"Newly created hash file broken");
+		return -1;
+	}
+
+	return 0;
+}
--- a/src/lib-index/mail-hash.h	Sun Nov 26 00:17:39 2006 +0200
+++ b/src/lib-index/mail-hash.h	Sun Nov 26 17:20:11 2006 +0200
@@ -30,6 +30,8 @@
 	uint32_t record_count;
 	/* Number of message records (records with non-zero UID) */
 	uint32_t message_count;
+	/* Number of messages with non-zero hash */
+	uint32_t hashed_count;
 
 	/* UID validity. */
 	uint32_t uid_validity;
@@ -57,21 +59,24 @@
 	MAIL_HASH_OPEN_FLAG_IN_MEMORY	= 0x02
 };
 
-/* Returns hash code. */
-typedef unsigned int hash_ctx_callback_t(const void *p, void *context);
 /* Returns 0 if the pointers are equal. */
 typedef bool hash_ctx_cmp_callback_t(const void *key, const void *data,
 				     void *context);
 
 struct mail_hash *
 mail_hash_open(struct mail_index *index, const char *suffix,
-	       enum mail_hash_open_flags flags,
-	       unsigned int record_size, hash_callback_t *hash_cb,
+	       enum mail_hash_open_flags flags, unsigned int record_size,
+	       unsigned int initial_count,
+	       hash_callback_t *key_hash_cb,
+	       hash_callback_t *rec_hash_cb,
 	       hash_ctx_cmp_callback_t *key_compare_cb,
 	       void *context);
 void mail_hash_free(struct mail_hash **hash);
 
-int mail_hash_reset(struct mail_hash *hash);
+/* If reset or resize fails, the hash file is closed and the hash is in
+   unusable state until mail_hash_lock() succeeds. */
+int mail_hash_reset(struct mail_hash *hash, unsigned int initial_count);
+int mail_hash_resize_if_needed(struct mail_hash *hash, unsigned int grow_count);
 
 /* Lock hash file. Returns 1 if we locked the file, 0 if timeouted or hash
    is in memory, -1 if error. */
@@ -83,7 +88,8 @@
 int mail_hash_lookup(struct mail_hash *hash, const void *key,
 		     const void **value_r, uint32_t *idx_r);
 /* Remember that inserting may cause existing returned values to be
-   invalidated */
+   invalidated. If key=NULL, it's not inserted into hash table. Note that
+   hash=0 equals to key=NULL insert, so a valid hash value must never be 0. */
 int mail_hash_insert(struct mail_hash *hash, const void *key,
 		     const void *value, uint32_t *idx_r);
 int mail_hash_remove(struct mail_hash *hash, const void *key);