changeset 4855:5bc593f1a8f6 HEAD

Added "squat" full text search indexer backend. Its name and basic ideas originate to Cyrus's squat index, but Dovecot's implementation is completely different and it supports incremental updates. Still a bit broken and lacks locking, but I wanted to get this into CVS now.
author Timo Sirainen <tss@iki.fi>
date Fri, 01 Dec 2006 23:02:10 +0200
parents 09b9b1e8f73d
children f75041ec22ba
files configure.in src/plugins/Makefile.am src/plugins/fts-squat/.cvsignore src/plugins/fts-squat/Makefile.am src/plugins/fts-squat/fts-backend-squat.c src/plugins/fts-squat/fts-squat-plugin.c src/plugins/fts-squat/fts-squat-plugin.h src/plugins/fts-squat/squat-test.c src/plugins/fts-squat/squat-trie.c src/plugins/fts-squat/squat-trie.h src/plugins/fts-squat/squat-uidlist.c src/plugins/fts-squat/squat-uidlist.h
diffstat 12 files changed, 2687 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/configure.in	Fri Dec 01 22:55:55 2006 +0200
+++ b/configure.in	Fri Dec 01 23:02:10 2006 +0200
@@ -1873,6 +1873,7 @@
 src/plugins/expire/Makefile
 src/plugins/fts/Makefile
 src/plugins/fts-lucene/Makefile
+src/plugins/fts-squat/Makefile
 src/plugins/quota/Makefile
 src/plugins/imap-quota/Makefile
 src/plugins/trash/Makefile
--- a/src/plugins/Makefile.am	Fri Dec 01 22:55:55 2006 +0200
+++ b/src/plugins/Makefile.am	Fri Dec 01 23:02:10 2006 +0200
@@ -6,4 +6,4 @@
 FTS_LUCENE = fts-lucene
 endif
 
-SUBDIRS = acl convert expire fts quota imap-quota trash $(ZLIB) $(FTS_LUCENE)
+SUBDIRS = acl convert expire fts fts-squat quota imap-quota trash $(ZLIB) $(FTS_LUCENE)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-squat/.cvsignore	Fri Dec 01 23:02:10 2006 +0200
@@ -0,0 +1,9 @@
+*.la
+*.lo
+*.o
+.deps
+.libs
+Makefile
+Makefile.in
+so_locations
+squat-test
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-squat/Makefile.am	Fri Dec 01 23:02:10 2006 +0200
@@ -0,0 +1,55 @@
+AM_CPPFLAGS = \
+	-I$(top_srcdir)/src/lib \
+	-I$(top_srcdir)/src/lib-mail \
+	-I$(top_srcdir)/src/lib-storage \
+	-I$(top_srcdir)/src/plugins/fts
+
+lib02_fts_squat_plugin_la_LDFLAGS = -module -avoid-version
+
+module_LTLIBRARIES = \
+	lib02_fts_squat_plugin.la
+
+lib02_fts_squat_plugin_la_SOURCES = \
+	fts-squat-plugin.c \
+	fts-backend-squat.c \
+	squat-trie.c \
+	squat-uidlist.c
+
+noinst_HEADERS = \
+	fts-squat-plugin.h \
+	squat-trie.h \
+	squat-uidlist.h
+
+noinst_PROGRAMS = squat-test
+
+squat_test_SOURCES = \
+	squat-test.c
+
+common_objects = \
+	squat-trie.lo \
+	squat-uidlist.lo
+
+libs = \
+	$(top_builddir)/src/lib-storage/register/libstorage-register.a \
+	$(STORAGE_LIBS) \
+	$(top_builddir)/src/lib-storage/libstorage.a \
+	$(top_builddir)/src/lib-storage/list/libstorage_list.a \
+	$(top_builddir)/src/lib-imap/libimap.a \
+	$(top_builddir)/src/lib-mail/libmail.a \
+	$(top_builddir)/src/lib-charset/libcharset.a \
+	$(top_builddir)/src/lib/liblib.a
+
+squat_test_LDADD = \
+	$(common_objects) \
+	$(libs) \
+	$(LIBICONV) \
+	$(RAND_LIBS)
+
+squat_test_DEPENDENCIES = $(libs) $(common_objects)
+
+install-exec-local:
+	for d in imap lda; do \
+	  $(mkdir_p) $(DESTDIR)$(moduledir)/$$d; \
+	  rm -f $(DESTDIR)$(moduledir)/$$d/lib02_fts_squat_plugin.so; \
+	  $(LN_S) ../lib02_fts_squat_plugin.so $(DESTDIR)$(moduledir)/$$d; \
+	done
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-squat/fts-backend-squat.c	Fri Dec 01 23:02:10 2006 +0200
@@ -0,0 +1,191 @@
+/* Copyright (C) 2006 Timo Sirainen */
+
+#include "lib.h"
+#include "array.h"
+#include "mail-storage.h"
+#include "mail-search.h"
+#include "squat-trie.h"
+#include "fts-squat-plugin.h"
+
+#define SQUAT_FILE_PREFIX "dovecot.index.search"
+
+struct squat_fts_backend {
+	struct fts_backend backend;
+	struct squat_trie *trie;
+
+	uint32_t last_uid;
+};
+
+static struct fts_backend *fts_backend_squat_init(struct mailbox *box)
+{
+	struct squat_fts_backend *backend;
+	struct mail_storage *storage;
+	const char *path;
+
+	storage = mailbox_get_storage(box);
+	path = mail_storage_get_mailbox_index_dir(storage,
+						  mailbox_get_name(box));
+	if (*path == '\0') {
+		/* in-memory indexes */
+		return NULL;
+	}
+
+	backend = i_new(struct squat_fts_backend, 1);
+	backend->backend = fts_backend_squat;
+	backend->trie =
+		squat_trie_open(t_strconcat(path, "/"SQUAT_FILE_PREFIX, NULL));
+	return &backend->backend;
+}
+
+static void fts_backend_squat_deinit(struct fts_backend *_backend)
+{
+	struct squat_fts_backend *backend =
+		(struct squat_fts_backend *)_backend;
+
+	squat_trie_close(backend->trie);
+	i_free(backend);
+}
+
+static int fts_backend_squat_get_last_uid(struct fts_backend *_backend,
+					  uint32_t *last_uid_r)
+{
+	struct squat_fts_backend *backend =
+		(struct squat_fts_backend *)_backend;
+
+	*last_uid_r = backend->last_uid;
+	return 0;
+}
+
+static struct fts_backend_build_context *
+fts_backend_squat_build_init(struct fts_backend *_backend, uint32_t *last_uid_r)
+{
+	struct squat_fts_backend *backend =
+		(struct squat_fts_backend *)_backend;
+	struct fts_backend_build_context *ctx;
+
+	*last_uid_r = backend->last_uid;
+
+	ctx = i_new(struct fts_backend_build_context, 1);
+	ctx->backend = _backend;
+	return ctx;
+}
+
+static int
+fts_backend_squat_build_more(struct fts_backend_build_context *ctx,
+			     uint32_t uid, const unsigned char *data,
+			     size_t size)
+{
+	struct squat_fts_backend *backend =
+		(struct squat_fts_backend *)ctx->backend;
+
+	i_assert(uid >= backend->last_uid);
+	backend->last_uid = uid;
+
+	return squat_trie_add(backend->trie, uid, data, size);
+}
+
+static int
+fts_backend_squat_build_deinit(struct fts_backend_build_context *ctx)
+{
+	struct squat_fts_backend *backend =
+		(struct squat_fts_backend *)ctx->backend;
+
+	squat_trie_flush(backend->trie);
+	i_free(ctx);
+	return 0;
+}
+
+static void
+fts_backend_squat_expunge(struct fts_backend *_backend __attr_unused__,
+			  struct mail *mail __attr_unused__)
+{
+}
+
+static int get_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids,
+		    unsigned int *message_count_r)
+{
+	struct mail_search_arg search_arg;
+        struct mailbox_transaction_context *t;
+	struct mail_search_context *ctx;
+	struct mail *mail;
+	unsigned int count = 0;
+	int ret;
+
+	memset(&search_arg, 0, sizeof(search_arg));
+	search_arg.type = SEARCH_ALL;
+
+	t = mailbox_transaction_begin(box, 0);
+	ctx = mailbox_search_init(t, NULL, &search_arg, NULL);
+
+	mail = mail_alloc(t, 0, NULL);
+	while (mailbox_search_next(ctx, mail) > 0) {
+		seq_range_array_add(uids, 0, mail->uid);
+		count++;
+	}
+	mail_free(&mail);
+
+	ret = mailbox_search_deinit(&ctx);
+	mailbox_transaction_rollback(&t);
+
+	*message_count_r = count;
+	return ret;
+}
+
+static void
+fts_backend_squat_expunge_finish(struct fts_backend *_backend,
+				 struct mailbox *box, bool committed)
+{
+	struct squat_fts_backend *backend =
+		(struct squat_fts_backend *)_backend;
+	ARRAY_TYPE(seq_range) uids = ARRAY_INIT;
+	unsigned int count;
+
+	if (!committed)
+		return;
+
+	t_push();
+	t_array_init(&uids, 128);
+	if (get_uids(box, &uids, &count) == 0) {
+		(void)squat_trie_mark_having_expunges(backend->trie, &uids,
+						      count);
+	}
+	t_pop();
+}
+
+static int
+fts_backend_squat_lookup(struct fts_backend *_backend, const char *key,
+			 ARRAY_TYPE(seq_range) *result)
+{
+	struct squat_fts_backend *backend =
+		(struct squat_fts_backend *)_backend;
+
+	return squat_trie_lookup(backend->trie, result, key);
+}
+
+static int
+fts_backend_squat_filter(struct fts_backend *_backend, const char *key,
+			 ARRAY_TYPE(seq_range) *result)
+{
+	struct squat_fts_backend *backend =
+		(struct squat_fts_backend *)_backend;
+
+	return squat_trie_filter(backend->trie, result, key);
+}
+
+struct fts_backend fts_backend_squat = {
+	MEMBER(name) "squat",
+	MEMBER(definite_lookups) FALSE,
+
+	{
+		fts_backend_squat_init,
+		fts_backend_squat_deinit,
+		fts_backend_squat_get_last_uid,
+		fts_backend_squat_build_init,
+		fts_backend_squat_build_more,
+		fts_backend_squat_build_deinit,
+		fts_backend_squat_expunge,
+		fts_backend_squat_expunge_finish,
+		fts_backend_squat_lookup,
+		fts_backend_squat_filter
+	}
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-squat/fts-squat-plugin.c	Fri Dec 01 23:02:10 2006 +0200
@@ -0,0 +1,14 @@
+/* Copyright (C) 2006 Timo Sirainen */
+
+#include "lib.h"
+#include "fts-squat-plugin.h"
+
+void fts_squat_plugin_init(void)
+{
+	fts_backend_register(&fts_backend_squat);
+}
+
+void fts_squat_plugin_deinit(void)
+{
+	fts_backend_unregister(fts_backend_squat.name);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-squat/fts-squat-plugin.h	Fri Dec 01 23:02:10 2006 +0200
@@ -0,0 +1,11 @@
+#ifndef __FTS_SQUAT_PLUGIN_H
+#define __FTS_SQUAT_PLUGIN_H
+
+#include "fts-api-private.h"
+
+extern struct fts_backend fts_backend_squat;
+
+void fts_squat_plugin_init(void);
+void fts_squat_plugin_deinit(void);
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-squat/squat-test.c	Fri Dec 01 23:02:10 2006 +0200
@@ -0,0 +1,117 @@
+/* Copyright (C) 2006 Timo Sirainen */
+
+#include "lib.h"
+#include "array.h"
+#include "istream.h"
+#include "squat-trie.h"
+#include "squat-uidlist.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <time.h>
+#include <sys/time.h>
+
+static void result_print(ARRAY_TYPE(seq_range) *result)
+{
+	const struct seq_range *range;
+	unsigned int i, count;
+
+	range = array_get(result, &count);
+	for (i = 0; i < count; i++) {
+		if (i != 0)
+			printf(",");
+		printf("%u", range[i].seq1);
+		if (range[i].seq1 != range[i].seq2)
+			printf("-%u", range[i].seq2);
+	}
+	printf("\n");
+}
+
+int main(int argc __attr_unused__, char *argv[])
+{
+	struct squat_trie *trie;
+	struct istream *input;
+	ARRAY_TYPE(seq_range) result;
+	char *line, *str, buf[4096];
+	int fd;
+	ssize_t ret;
+	unsigned int last = 0, seq = 0, leaves, uid_lists_mem, uid_lists_count;
+	size_t mem;
+	clock_t clock_start, clock_end;
+	struct timeval tv_start, tv_end;
+	double cputime;
+
+	lib_init();
+	(void)unlink("/tmp/squat-test-index.search");
+	(void)unlink("/tmp/squat-test-index.search.uids");
+	trie = squat_trie_open("/tmp/squat-test-index.search");
+
+	clock_start = clock();
+	gettimeofday(&tv_start, NULL);
+
+	fd = open(argv[1], O_RDONLY);
+	if (fd == -1)
+		return 1;
+
+	input = i_stream_create_file(fd, default_pool, 0, FALSE);
+	while ((line = i_stream_read_next_line(input)) != NULL) {
+		if (last != input->v_offset/(1024*100)) {
+			fprintf(stderr, "\r%ukB", (unsigned)(input->v_offset/1024));
+			fflush(stderr);
+			last = input->v_offset/(1024*100);
+		}
+		if (strncmp(line, "From ", 5) == 0) {
+			seq++;
+			continue;
+		}
+
+		if (squat_trie_add(trie, seq, line, strlen(line)) < 0)
+			break;
+	}
+	squat_trie_flush(trie);
+
+	clock_end = clock();
+	gettimeofday(&tv_end, NULL);
+
+	cputime = (double)(clock_end - clock_start) / CLOCKS_PER_SEC;
+	fprintf(stderr, "\n - Index time: %.2f CPU seconds, "
+		"%.2f real seconds (%.02fMB/CPUs)\n", cputime,
+		(tv_end.tv_sec - tv_start.tv_sec) +
+		(tv_end.tv_usec - tv_start.tv_usec)/1000000.0,
+		input->v_offset / cputime / (1024*1024));
+
+	mem = squat_trie_mem_used(trie, &leaves);
+	uid_lists_mem = squat_uidlist_mem_used(_squat_trie_get_uidlist(trie),
+					       &uid_lists_count);
+	fprintf(stderr, " - %u bytes in %u nodes (%.02f%%)\n"
+		" - %u bytes in %u uid_lists (%.02f%%)\n"
+		" - %u bytes total of %"PRIuUOFF_T" (%.02f%%)\n",
+		(unsigned)mem, leaves, mem / (float)input->v_offset * 100.0,
+		uid_lists_mem, uid_lists_count,
+		uid_lists_mem / (float)input->v_offset * 100.0,
+		(unsigned)(uid_lists_mem + mem), input->v_offset,
+		(uid_lists_mem + mem) / (float)input->v_offset * 100.0);
+
+	i_stream_unref(&input);
+	close(fd);
+
+	i_array_init(&result, 128);
+	while ((str = fgets(buf, sizeof(buf), stdin)) != NULL) {
+		ret = strlen(str)-1;
+		str[ret] = 0;
+
+		array_clear(&result);
+		gettimeofday(&tv_start, NULL);
+		if (!squat_trie_lookup(trie, &result, str))
+			printf("No matches\n");
+		else {
+			gettimeofday(&tv_end, NULL);
+			printf(" - Search took %.05f CPU seconds\n",
+			       (tv_end.tv_sec - tv_start.tv_sec) +
+			       (tv_end.tv_usec - tv_start.tv_usec)/1000000.0);
+			result_print(&result);
+		}
+	}
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-squat/squat-trie.c	Fri Dec 01 23:02:10 2006 +0200
@@ -0,0 +1,1313 @@
+/* Copyright (C) 2006 Timo Sirainen */
+
+#include "lib.h"
+#include "array.h"
+#include "bsearch-insert-pos.h"
+#include "istream.h"
+#include "ostream.h"
+#include "mmap-util.h"
+#include "squat-uidlist.h"
+#include "squat-trie.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+
+/* for non-x86 use memcpy() when accessing unaligned int* addresses */
+#if defined(__i386__) || defined(__x86_64__)
+#  define ALLOW_UNALIGNED_ACCESS
+#endif
+
+#define TRIE_COMPRESS_PERCENTAGE 30
+#define TRIE_COMPRESS_MIN_SIZE (1024*50)
+
+#define BLOCK_SIZE 4
+
+#define ALIGN(size) \
+	(((size) + sizeof(void *)-1) & ~((unsigned int) sizeof(void *)-1))
+
+struct squat_trie {
+	char *filepath;
+	int fd;
+	dev_t dev;
+	ino_t ino;
+
+	void *mmap_base;
+	size_t mmap_size;
+
+	const struct squat_trie_header *hdr;
+
+	struct squat_uidlist *uidlist;
+	struct trie_node *root;
+	buffer_t *buf;
+	unsigned int deleted_space;
+	struct ostream *output;
+	struct squat_uidlist_compress_ctx *compress_ctx;
+
+	uint32_t prev_uid;
+	unsigned int prev_added_size;
+	uint16_t prev_added[BLOCK_SIZE-1];
+
+	unsigned int node_count;
+
+	unsigned int corrupted:1;
+};
+
+struct squat_trie_header {
+	uint32_t uidvalidity;
+	uint32_t header_size;
+	uint32_t used_file_size;
+	uint32_t deleted_space;
+	uint32_t node_count;
+
+	uint32_t root_offset;
+};
+
+/*
+packed_node {
+	packed ((8bit_chars_count << 1) | have_16bit_chars);
+	uint8_t 8bit_chars[8bit_chars_count];
+	uint32_t idx[8bit_chars_count];
+	if (have_16bit_chars) {
+		packed 16bit_chars_count;
+		uint16_t 16bit_chars[16bit_chars_count];
+		uint32_t idx[16bit_chars_count];
+	}
+}
+*/
+
+struct trie_node {
+	/* new characters have been added to this node */
+	uint8_t resized:1;
+	/* idx pointers have been updated */
+	uint8_t modified:1;
+	uint8_t chars_8bit_count;
+	uint16_t chars_16bit_count;
+
+	uint32_t file_offset;
+	uint32_t orig_size;
+
+	/* the node pointers are valid as long as their lowest bit is 0,
+	   otherwise they're offsets to the trie file (>> 1).
+
+	   in leaf nodes the children pointers are uint32_t uid_list_idx[]; */
+	/* uint8_t 8bit_chars[chars_8bit_count]; */
+	/* struct trie_node *children[chars_8bit_count]; */
+	/* uint16_t 16bit_chars[chars_16bit_count]; */
+	/* struct trie_node *children[chars_16bit_count]; */
+};
+#define NODE_CHARS8(node) \
+	(uint8_t *)(node + 1)
+#define NODE_CHILDREN8(node) \
+	(struct trie_node **) \
+		((char *)((node) + 1) + \
+		 ALIGN(sizeof(uint8_t) * ((node)->chars_8bit_count)))
+#define NODE_CHARS16(node) \
+	(uint16_t *)((char *)NODE_CHILDREN8(node) + \
+		     sizeof(struct trie_node *) * ((node)->chars_8bit_count))
+#define NODE_CHILDREN16(node) \
+	(struct trie_node **) \
+		((char *)NODE_CHARS16(node) + \
+		 ALIGN(sizeof(uint16_t) * ((node)->chars_16bit_count)))
+
+static int
+squat_trie_compress_node(struct squat_trie *trie, struct trie_node *node,
+			 unsigned int level);
+static int trie_write_node(struct squat_trie *trie, unsigned int level,
+			   struct trie_node *node);
+
+static int chr_8bit_cmp(const void *_key, const void *_chr)
+{
+	const uint8_t *key = _key, *chr = _chr;
+
+	return *key - *chr;
+}
+
+static int chr_16bit_cmp(const void *_key, const void *_chr)
+{
+	const uint16_t *key = _key, *chr = _chr;
+
+	return *key - *chr;
+}
+
+void _squat_trie_pack_num(buffer_t *buffer, uint32_t num)
+{
+	uint8_t c;
+
+	/* number continues as long as the highest bit is set */
+	while (num >= 0x80) {
+		c = (num & 0x7f) | 0x80;
+		num >>= 7;
+
+		buffer_append(buffer, &c, 1);
+	}
+
+	c = num;
+	buffer_append(buffer, &c, 1);
+}
+
+uint32_t _squat_trie_unpack_num(const uint8_t **p, const uint8_t *end)
+{
+	const uint8_t *c = *p;
+	uint32_t value = 0;
+	unsigned int bits = 0;
+
+	while (c != end && *c >= 0x80) {
+		value |= (*c & 0x7f) << bits;
+		bits += 7;
+		c++;
+	}
+
+	if (c == end) {
+		/* last number shouldn't end with high bit */
+		return 0;
+	}
+	if (bits > 32-7) {
+		/* we have only 32bit numbers */
+		return 0;
+	}
+
+	value |= (*c & 0x7f) << bits;
+	*p = c + 1;
+	return value;
+}
+
+static const void *data_normalize(const void *data, size_t size, buffer_t *dest)
+{
+	const uint8_t *src = data;
+	size_t i;
+
+	buffer_set_used_size(dest, 0);
+	for (i = 0; i < size; i++) {
+		uint16_t chr;
+
+		if (src[i] <= 32)
+			chr = 0;
+		else if (src[i] > 'z')
+			chr = src[i] - 32 - 26;
+		else
+			chr = i_toupper(src[i]) - 32;
+		buffer_append(dest, &chr, sizeof(chr));
+	}
+	return dest->data;
+}
+
+static void
+squat_trie_set_syscall_error(struct squat_trie *trie, const char *function)
+{
+	i_error("%s failed with index search file %s: %m",
+		function, trie->filepath);
+}
+
+void squat_trie_set_corrupted(struct squat_trie *trie, const char *reason)
+{
+	i_error("Corrupted index search file %s: %s", trie->filepath, reason);
+
+	(void)unlink(trie->filepath);
+	trie->corrupted = TRUE;
+}
+
+static void
+trie_map_node_save_children(unsigned int level, const uint32_t *src_idx,
+			    unsigned int count, struct trie_node **children)
+{
+	unsigned int i, file_bit;
+
+	file_bit = level == BLOCK_SIZE ? 0 : 1;
+
+#ifndef ALLOW_UNALIGNED_ACCESS
+	if ((POINTER_CAST_TO(src_idx, size_t) & (sizeof(uint32_t)-1)) == 0) {
+#endif
+		for (i = 0; i < count; i++) {
+			children[i] = src_idx[i] == 0 ? NULL :
+				POINTER_CAST(src_idx[i] | file_bit);
+		}
+#ifndef ALLOW_UNALIGNED_ACCESS
+	} else {
+		/* unaligned access */
+		uint32_t idx;
+
+		for (i = 0; i < count; i++) {
+			memcpy(&idx, &src_idx[i], sizeof(idx));
+			children[i] = idx == 0 ? NULL :
+				POINTER_CAST(idx | file_bit);
+		}
+	}
+#endif
+}
+
+static int
+trie_map_node(struct squat_trie *trie, uint32_t offset, unsigned int level,
+	      struct trie_node **node_r)
+{
+	struct trie_node *node;
+	const uint8_t *p, *end, *chars8_src, *chars16_src;
+	uint32_t num, chars8_count, chars16_count;
+	unsigned int chars8_size, chars16_size;
+
+	i_assert(trie->fd != -1);
+
+	if (offset >= trie->mmap_size) {
+		squat_trie_set_corrupted(trie, "trie offset too large");
+		return -1;
+	}
+
+	p = CONST_PTR_OFFSET(trie->mmap_base, offset);
+	end = CONST_PTR_OFFSET(trie->mmap_base, trie->mmap_size);
+
+	/* get 8bit char count and check that it's valid */
+	num = _squat_trie_unpack_num(&p, end);
+	chars8_count = num >> 1;
+
+	if (chars8_count > 256 || p + chars8_count >= end) {
+		squat_trie_set_corrupted(trie, "trie offset broken");
+		return -1;
+	}
+
+	chars8_src = p;
+	chars8_size = ALIGN(chars8_count * sizeof(uint8_t)) +
+		chars8_count * sizeof(struct trie_node *);
+	if ((num & 1) == 0) {
+		/* no 16bit chars */
+		chars16_count = 0;
+		chars16_size = 0;
+		chars16_src = NULL;
+	} else {
+		/* get the 16bit char count and check that it's valid */
+		p = CONST_PTR_OFFSET(p, chars8_count *
+				     (sizeof(uint8_t) + sizeof(uint32_t)));
+		chars16_count = _squat_trie_unpack_num(&p, end);
+		if (chars16_count > 65536 ||
+		    p + chars16_count*sizeof(uint16_t) >= end) {
+			squat_trie_set_corrupted(trie, "trie offset broken");
+			return -1;
+		}
+
+		chars16_src = p;
+		chars16_size = ALIGN(chars16_count * sizeof(uint16_t)) +
+			chars16_count * sizeof(struct trie_node *);
+	}
+
+	node = i_malloc(sizeof(*node) + chars8_size + chars16_size);
+	node->chars_8bit_count = chars8_count;
+	node->chars_16bit_count = chars16_count;
+	node->file_offset = offset;
+
+	{
+		uint8_t *chars8 = NODE_CHARS8(node);
+		uint16_t *chars16 = NODE_CHARS16(node);
+		struct trie_node **children8 = NODE_CHILDREN8(node);
+		struct trie_node **children16 = NODE_CHILDREN16(node);
+		const uint32_t *src_idx;
+		const void *end_offset;
+
+		memcpy(chars8, chars8_src, sizeof(uint8_t) * chars8_count);
+		memcpy(chars16, chars16_src, sizeof(uint16_t) * chars16_count);
+
+		src_idx = CONST_PTR_OFFSET(chars8_src, chars8_count);
+		trie_map_node_save_children(level, src_idx, chars8_count,
+					    children8);
+		if (chars16_count == 0)
+			end_offset = &src_idx[chars8_count];
+		else {
+			src_idx = CONST_PTR_OFFSET(chars16_src,
+						   chars16_count *
+						   sizeof(uint16_t));
+			trie_map_node_save_children(level, src_idx,
+						    chars16_count, children16);
+			end_offset = &src_idx[chars16_count];
+		}
+
+		node->orig_size = ((const char *)end_offset -
+				   (const char *)trie->mmap_base) - offset;
+	}
+
+	*node_r = node;
+	return 0;
+}
+
+static void trie_close(struct squat_trie *trie)
+{
+	if (trie->mmap_base != NULL) {
+		if (munmap(trie->mmap_base, trie->mmap_size) < 0)
+			squat_trie_set_syscall_error(trie, "munmap()");
+		trie->mmap_base = NULL;
+	}
+	trie->mmap_size = 0;
+
+	if (trie->fd != -1) {
+		if (close(trie->fd) < 0)
+			squat_trie_set_syscall_error(trie, "close()");
+		trie->fd = -1;
+	}
+
+	trie->hdr = NULL;
+	trie->corrupted = FALSE;
+}
+
+static int trie_map(struct squat_trie *trie)
+{
+	struct stat st;
+
+	if (fstat(trie->fd, &st) < 0) {
+		squat_trie_set_syscall_error(trie, "fstat()");
+		return -1;
+	}
+	trie->dev = st.st_dev;
+	trie->ino = st.st_ino;
+
+	if (trie->mmap_base != NULL) {
+		if (munmap(trie->mmap_base, trie->mmap_size) < 0)
+			squat_trie_set_syscall_error(trie, "munmap()");
+	}
+	trie->mmap_size = st.st_size;
+
+	trie->mmap_base = mmap(NULL, trie->mmap_size, PROT_READ | PROT_WRITE,
+			       MAP_SHARED, trie->fd, 0);
+	if (trie->mmap_base == MAP_FAILED) {
+		trie->mmap_size = 0;
+		trie->mmap_base = NULL;
+		squat_trie_set_syscall_error(trie, "mmap()");
+		return -1;
+	}
+
+	trie->hdr = trie->mmap_base;
+	if (trie_map_node(trie, trie->hdr->root_offset, 1, &trie->root) < 0) {
+		trie_close(trie);
+		return 0;
+	}
+	trie->node_count = trie->hdr->node_count;
+	return 1;
+}
+
+static int trie_open(struct squat_trie *trie)
+{
+	trie->fd = open(trie->filepath, O_RDWR);
+	if (trie->fd == -1) {
+		if (errno == ENOENT)
+			return 0;
+
+		squat_trie_set_syscall_error(trie, "open()");
+		return -1;
+	}
+
+	return trie_map(trie);
+}
+
+struct squat_trie *squat_trie_open(const char *path)
+{
+	struct squat_trie *trie;
+	const char *uidlist_path;
+
+	trie = i_new(struct squat_trie, 1);
+	trie->fd = -1;
+	trie->filepath = i_strdup(path);
+	uidlist_path = t_strconcat(path, ".uids", NULL);
+	trie->uidlist = squat_uidlist_init(trie, uidlist_path);
+	trie->buf = buffer_create_dynamic(default_pool, 1024);
+	(void)trie_open(trie);
+	return trie;
+}
+
+void squat_trie_close(struct squat_trie *trie)
+{
+	buffer_free(trie->buf);
+	squat_uidlist_deinit(trie->uidlist);
+	i_free(trie->filepath);
+	i_free(trie);
+}
+
+static struct trie_node *
+node_alloc(uint16_t chr, unsigned int level)
+{
+	struct trie_node *node;
+	unsigned int idx_size, idx_offset = sizeof(*node);
+
+	idx_size = level < BLOCK_SIZE ?
+		sizeof(struct trie_node *) : sizeof(uint32_t);
+
+	if (chr < 256) {
+		uint8_t *chrp;
+
+		idx_offset += ALIGN(sizeof(*chrp));
+		node = i_malloc(idx_offset + idx_size);
+		node->chars_8bit_count = 1;
+
+		chrp = PTR_OFFSET(node, sizeof(*node));
+		*chrp = chr;
+	} else {
+		uint16_t *chrp;
+
+		idx_offset += ALIGN(sizeof(*chrp));
+		node = i_malloc(idx_offset + idx_size);
+		node->chars_16bit_count = 1;
+
+		chrp = PTR_OFFSET(node, sizeof(*node));
+		*chrp = chr;
+	}
+
+	node->resized = TRUE;
+	return node;
+}
+
+static struct trie_node *
+node_realloc(struct trie_node *node, uint32_t char_idx, uint16_t chr,
+	     unsigned int level)
+{
+	struct trie_node *new_node;
+	unsigned int old_size_8bit, old_size_16bit, old_idx_offset;
+	unsigned int idx_size, old_size, new_size, new_idx_offset;
+	unsigned int hole1_pos, hole2_pos, skip;
+
+	idx_size = level < BLOCK_SIZE ?
+		sizeof(struct trie_node *) : sizeof(uint32_t);
+
+	old_size_8bit = ALIGN(node->chars_8bit_count) +
+		node->chars_8bit_count * idx_size;
+	old_size_16bit = ALIGN(sizeof(uint16_t) * node->chars_16bit_count) +
+		node->chars_16bit_count * idx_size;
+	old_size = sizeof(*node) + old_size_8bit + old_size_16bit;
+
+	if (chr < 256) {
+		new_idx_offset = sizeof(*node) +
+			ALIGN(node->chars_8bit_count + sizeof(uint8_t));
+		new_size = new_idx_offset + old_size_16bit +
+			(node->chars_8bit_count + 1) * idx_size;
+	} else {
+		new_idx_offset = sizeof(*node) + old_size_8bit +
+			ALIGN((node->chars_16bit_count + 1) * sizeof(uint16_t));
+		new_size = new_idx_offset +
+			(node->chars_16bit_count + 1) * idx_size;
+	}
+
+	new_node = i_malloc(new_size);
+	if (chr < 256) {
+		hole1_pos = sizeof(*node) + char_idx;
+		old_idx_offset = sizeof(*node) + ALIGN(node->chars_8bit_count);
+	} else {
+		hole1_pos = sizeof(*node) + old_size_8bit +
+			char_idx * sizeof(uint16_t);
+		old_idx_offset = sizeof(*node) + old_size_8bit +
+			ALIGN(node->chars_16bit_count * sizeof(uint16_t));
+	}
+	hole2_pos = old_idx_offset + idx_size * char_idx;
+
+	memcpy(new_node, node, hole1_pos);
+	if (chr < 256) {
+		uint8_t *chrp = PTR_OFFSET(new_node, hole1_pos);
+		*chrp = chr;
+		new_node->chars_8bit_count++;
+
+		memcpy(PTR_OFFSET(new_node, hole1_pos + sizeof(uint8_t)),
+		       PTR_OFFSET(node, hole1_pos), old_idx_offset - hole1_pos);
+	} else {
+		uint16_t *chrp = PTR_OFFSET(new_node, hole1_pos);
+		*chrp = chr;
+		new_node->chars_16bit_count++;
+
+		memcpy(PTR_OFFSET(new_node, hole1_pos + sizeof(uint16_t)),
+		       PTR_OFFSET(node, hole1_pos), old_idx_offset - hole1_pos);
+	}
+
+	memcpy(PTR_OFFSET(new_node, new_idx_offset),
+	       PTR_OFFSET(node, old_idx_offset),
+	       hole2_pos - old_idx_offset);
+
+	skip = new_idx_offset - old_idx_offset;
+	memset(PTR_OFFSET(new_node, hole2_pos + skip), 0, idx_size);
+	skip += sizeof(uint32_t);
+	memcpy(PTR_OFFSET(new_node, hole2_pos + skip),
+	       PTR_OFFSET(node, hole2_pos),
+	       old_size - hole2_pos);
+
+	new_node->resized = TRUE;
+	i_free(node);
+	return new_node;
+}
+
+static int
+trie_insert_node(struct squat_trie *trie, struct trie_node **parent,
+		 const uint16_t *data, uint32_t uid, unsigned int level)
+{
+	struct trie_node *node = *parent;
+	uint32_t char_idx, idx_base_offset;
+	bool modified = FALSE;
+	int ret;
+
+	if (*data < 256) {
+		unsigned int count;
+
+		if (node == NULL) {
+			trie->node_count++;
+			node = *parent = node_alloc(*data, level);
+			char_idx = 0;
+			count = 1;
+			modified = TRUE;
+		} else {
+			uint8_t *chars = PTR_OFFSET(node, sizeof(*node));
+			uint8_t *pos;
+
+			count = node->chars_8bit_count;
+			pos = bsearch_insert_pos(data, chars, count,
+						 sizeof(chars[0]),
+						 chr_8bit_cmp);
+			char_idx = pos - chars;
+			if (char_idx == count || *pos != *data) {
+				node = node_realloc(node, char_idx,
+						    *data, level);
+				*parent = node;
+				modified = TRUE;
+				count++;
+			}
+		}
+		idx_base_offset = sizeof(*node) + ALIGN(count);
+	} else {
+		unsigned int offset = sizeof(*node);
+		unsigned int count;
+
+		if (node == NULL) {
+			trie->node_count++;
+			node = *parent = node_alloc(*data, level);
+			char_idx = 0;
+			count = 1;
+			modified = TRUE;
+		} else {
+			unsigned int idx_size;
+			uint16_t *chars, *pos;
+
+			idx_size = level < BLOCK_SIZE ?
+				sizeof(struct trie_node *) : sizeof(uint32_t);
+			offset += ALIGN(node->chars_8bit_count) +
+				idx_size * node->chars_8bit_count;
+			chars = PTR_OFFSET(node, offset);
+
+			count = node->chars_16bit_count;
+			pos = bsearch_insert_pos(data, chars, count,
+						 sizeof(chars[0]),
+						 chr_16bit_cmp);
+			char_idx = pos - chars;
+			if (char_idx == count || *pos != *data) {
+				node = node_realloc(node, char_idx,
+						    *data, level);
+				*parent = node;
+				modified = TRUE;
+				count++;
+			}
+		}
+
+		idx_base_offset = offset + ALIGN(sizeof(uint16_t) * count);
+	}
+
+	if (level < BLOCK_SIZE) {
+		struct trie_node **children = PTR_OFFSET(node, idx_base_offset);
+		size_t child_idx = POINTER_CAST_TO(children[char_idx], size_t);
+
+		if ((child_idx & 1) != 0) {
+			if (trie_map_node(trie, child_idx & ~1, level + 1,
+					  &children[char_idx]) < 0)
+				return -1;
+		}
+		ret = trie_insert_node(trie, &children[char_idx],
+				       data + 1, uid, level + 1);
+		if (ret < 0)
+			return -1;
+		if (ret > 0)
+			node->modified = TRUE;
+	} else {
+		uint32_t *uid_lists = PTR_OFFSET(node, idx_base_offset);
+		if (squat_uidlist_add(trie->uidlist, &uid_lists[char_idx],
+				      uid) < 0)
+			return -1;
+
+		node->modified = TRUE;
+	}
+	return modified ? 1 : 0;
+}
+
+static uint32_t
+trie_lookup_node(struct squat_trie *trie, struct trie_node *node,
+		 const uint16_t *data, unsigned int level)
+{
+	uint32_t char_idx, idx_base_offset;
+
+	if (*data < 256) {
+		const uint8_t *chars, *pos;
+		unsigned int count;
+
+		if (node == NULL)
+			return 0;
+
+		chars = CONST_PTR_OFFSET(node, sizeof(*node));
+		count = node->chars_8bit_count;
+		pos = bsearch(data, chars, count, sizeof(chars[0]),
+			      chr_8bit_cmp);
+		if (pos == NULL || *pos != *data)
+			return 0;
+
+		char_idx = pos - chars;
+		idx_base_offset = sizeof(*node) + ALIGN(count);
+	} else {
+		const uint16_t *chars, *pos;
+		unsigned int count, idx_size, offset;
+
+		if (node == NULL)
+			return 0;
+
+		idx_size = level < BLOCK_SIZE ?
+			sizeof(struct trie_node *) : sizeof(uint32_t);
+		offset = sizeof(*node) + ALIGN(node->chars_8bit_count) +
+			idx_size * node->chars_8bit_count;
+		chars = PTR_OFFSET(node, offset);
+
+		count = node->chars_16bit_count;
+		pos = bsearch(data, chars, count, sizeof(chars[0]),
+			      chr_16bit_cmp);
+		if (pos == NULL || *pos != *data)
+			return 0;
+
+		char_idx = pos - chars;
+		idx_base_offset = offset + ALIGN(sizeof(uint16_t) * count);
+	}
+
+	if (level < BLOCK_SIZE) {
+		struct trie_node **children = PTR_OFFSET(node, idx_base_offset);
+		size_t child_idx = POINTER_CAST_TO(children[char_idx], size_t);
+
+		if ((child_idx & 1) != 0) {
+			/* not mapped to memory yet. do it. */
+			if (trie_map_node(trie, child_idx & ~1, level + 1,
+					  &children[char_idx]) < 0)
+				return -1;
+		}
+
+		return trie_lookup_node(trie, children[char_idx],
+					data + 1, level + 1);
+	} else {
+		const uint32_t *uid_lists =
+			CONST_PTR_OFFSET(node, idx_base_offset);
+
+		return uid_lists[char_idx];
+	}
+}
+
+static bool block_want_add(const uint16_t *data)
+{
+	unsigned int i;
+
+	/* skip all blocks that contain spaces or control characters.
+	   no-one searches them anyway */
+	for (i = 0; i < BLOCK_SIZE; i++) {
+		if (data[i] == 0)
+			return FALSE;
+	}
+	return TRUE;
+}
+
+int squat_trie_add(struct squat_trie *trie, uint32_t uid,
+		   const void *data, size_t size)
+{
+	const uint16_t *str;
+	uint16_t buf[(BLOCK_SIZE-1)*2];
+	unsigned int i, tmp_size;
+
+	t_push();
+	str = data_normalize(data, size, trie->buf);
+
+	if (uid == trie->prev_uid) {
+		/* @UNSAFE: continue from last block */
+		memcpy(buf, trie->prev_added,
+		       sizeof(buf[0]) * trie->prev_added_size);
+		tmp_size = I_MIN(size, BLOCK_SIZE-1);
+		memcpy(buf + trie->prev_added_size, str,
+		       sizeof(buf[0]) * tmp_size);
+
+		tmp_size += trie->prev_added_size;
+		for (i = 0; i + BLOCK_SIZE <= tmp_size; i++) {
+			if (block_want_add(buf+i)) {
+				if (trie_insert_node(trie, &trie->root,
+						     buf + i, uid, 1) < 0) {
+					t_pop();
+					return -1;
+				}
+			}
+		}
+
+		if (size < BLOCK_SIZE) {
+			trie->prev_added_size = I_MIN(tmp_size, BLOCK_SIZE-1);
+			memcpy(trie->prev_added, buf + i,
+			       sizeof(buf[0]) * trie->prev_added_size);
+			t_pop();
+			return 0;
+		}
+	}
+
+	for (i = 0; i + BLOCK_SIZE <= size; i++) {
+		if (block_want_add(str+i)) {
+			if (trie_insert_node(trie, &trie->root,
+					     str + i, uid, 1) < 0) {
+				t_pop();
+				return -1;
+			}
+		}
+	}
+
+	trie->prev_added_size = I_MIN(size, BLOCK_SIZE-1);
+	memcpy(trie->prev_added, str + i,
+	       sizeof(trie->prev_added[0]) * trie->prev_added_size);
+	t_pop();
+	return 0;
+}
+
+static void node_pack_children(buffer_t *buf, struct trie_node **children,
+			       unsigned int count)
+{
+	unsigned int i;
+	size_t child_idx;
+	uint32_t idx;
+
+	for (i = 0; i < count; i++) {
+		child_idx = POINTER_CAST_TO(children[i], size_t);
+		if ((child_idx & 1) != 0)
+			idx = child_idx & ~1;
+		else
+			idx = children[i]->file_offset;
+		buffer_append(buf, &idx, sizeof(idx));
+	}
+}
+
+static void node_pack(buffer_t *buf, struct trie_node *node)
+{
+	uint8_t *chars8 = NODE_CHARS8(node);
+	uint16_t *chars16 = NODE_CHARS16(node);
+	struct trie_node **children8 = NODE_CHILDREN8(node);
+	struct trie_node **children16 = NODE_CHILDREN16(node);
+
+	buffer_set_used_size(buf, 0);
+	_squat_trie_pack_num(buf, (node->chars_8bit_count << 1) |
+			     (node->chars_16bit_count > 0 ? 1 : 0));
+	buffer_append(buf, chars8, node->chars_8bit_count);
+	node_pack_children(buf, children8, node->chars_8bit_count);
+
+	if (node->chars_16bit_count > 0) {
+		_squat_trie_pack_num(buf, node->chars_16bit_count);
+		buffer_append(buf, chars16,
+			      sizeof(*chars16) * node->chars_16bit_count);
+		node_pack_children(buf, children16, node->chars_16bit_count);
+	}
+}
+
+static int node_leaf_finish(struct squat_trie *trie, struct trie_node *node)
+{
+	uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
+	uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node);
+	unsigned int i;
+
+	for (i = 0; i < node->chars_8bit_count; i++) {
+		if (squat_uidlist_finish_list(trie->uidlist, &idx8[i]) < 0)
+			return -1;
+	}
+	for (i = 0; i < node->chars_16bit_count; i++) {
+		if (squat_uidlist_finish_list(trie->uidlist, &idx16[i]) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static void node_pack_leaf(struct squat_trie *trie, struct trie_node *node)
+{
+	uint8_t *chars8 = NODE_CHARS8(node);
+	uint16_t *chars16 = NODE_CHARS16(node);
+	uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
+	uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node);
+
+	buffer_set_used_size(trie->buf, 0);
+	_squat_trie_pack_num(trie->buf, (node->chars_8bit_count << 1) |
+			     (node->chars_16bit_count > 0 ? 1 : 0));
+	buffer_append(trie->buf, chars8, node->chars_8bit_count);
+	buffer_append(trie->buf, idx8, sizeof(*idx8) * node->chars_8bit_count);
+
+	if (node->chars_16bit_count > 0) {
+		_squat_trie_pack_num(trie->buf, node->chars_16bit_count);
+		buffer_append(trie->buf, chars16,
+			      sizeof(*chars16) * node->chars_16bit_count);
+		buffer_append(trie->buf, idx16,
+			      sizeof(*idx16) * node->chars_16bit_count);
+	}
+}
+
+static int
+trie_write_node_children(struct squat_trie *trie, unsigned int level,
+			 struct trie_node **children, unsigned int count)
+{
+	unsigned int i;
+	size_t child_idx;
+
+	for (i = 0; i < count; i++) {
+		child_idx = POINTER_CAST_TO(children[i], size_t);
+		if ((child_idx & 1) == 0) {
+			if (trie_write_node(trie, level, children[i]) < 0)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+static int trie_write_node(struct squat_trie *trie, unsigned int level,
+			   struct trie_node *node)
+{
+	uoff_t offset;
+
+	if (level < BLOCK_SIZE) {
+		struct trie_node **children8 = NODE_CHILDREN8(node);
+		struct trie_node **children16 = NODE_CHILDREN16(node);
+
+		trie_write_node_children(trie, level + 1,
+					 children8, node->chars_8bit_count);
+		trie_write_node_children(trie, level + 1,
+					 children16, node->chars_16bit_count);
+		node_pack(trie->buf, node);
+	} else {
+		if (node_leaf_finish(trie, node) < 0)
+			return -1;
+
+		node_pack_leaf(trie, node);
+	}
+
+	offset = trie->output->offset;
+	if ((offset & 1) != 0) {
+		o_stream_send(trie->output, "", 1);
+		offset++;
+	}
+
+	if (node->resized && node->orig_size != trie->buf->used) {
+		/* append to end of file. the parent node is written later. */
+		node->file_offset = offset;
+		o_stream_send(trie->output, trie->buf->data, trie->buf->used);
+
+		trie->deleted_space += node->orig_size;
+	} else if (node->modified) {
+		/* overwrite node's contents */
+		i_assert(node->file_offset != 0);
+		i_assert(trie->buf->used <= node->orig_size);
+
+		/* FIXME: write only the indexes if !node->resized */
+		o_stream_seek(trie->output, node->file_offset);
+		o_stream_send(trie->output, trie->buf->data, trie->buf->used);
+		o_stream_seek(trie->output, offset);
+
+		trie->deleted_space += trie->buf->used - node->orig_size;
+	}
+	return 0;
+}
+
+static int trie_nodes_write(struct squat_trie *trie, uint32_t *uidvalidity_r)
+{
+	struct squat_trie_header hdr;
+
+	if (trie->fd == -1) {
+		trie->fd = open(trie->filepath,
+				O_RDWR | O_CREAT | O_TRUNC, 0600);
+		if (trie->fd == -1) {
+			squat_trie_set_syscall_error(trie, "open()");
+			return -1;
+		}
+
+		memset(&hdr, 0, sizeof(hdr));
+		hdr.uidvalidity = 0; // FIXME
+		hdr.header_size = sizeof(hdr);
+	} else {
+		hdr = *trie->hdr;
+		if (lseek(trie->fd, hdr.used_file_size, SEEK_SET) < 0) {
+			squat_trie_set_syscall_error(trie, "lseek()");
+			return -1;
+		}
+	}
+
+	trie->output = o_stream_create_file(trie->fd, default_pool, 0, FALSE);
+	if (hdr.used_file_size == 0)
+		o_stream_send(trie->output, &hdr, sizeof(hdr));
+
+	trie->deleted_space = 0;
+	if (trie_write_node(trie, 1, trie->root) < 0)
+		return -1;
+
+	/* update the header */
+	hdr.root_offset = trie->root->file_offset;
+	hdr.used_file_size = trie->output->offset;
+	hdr.deleted_space += trie->deleted_space;
+	hdr.node_count = trie->node_count;
+	o_stream_seek(trie->output, 0);
+	o_stream_send(trie->output, &hdr, sizeof(hdr));
+
+	o_stream_destroy(&trie->output);
+	*uidvalidity_r = hdr.uidvalidity;
+	return 0;
+}
+
+static bool squat_trie_need_compress(struct squat_trie *trie,
+				     unsigned int current_message_count)
+{
+	uint32_t max_del_space;
+
+	if (trie->hdr->used_file_size >= TRIE_COMPRESS_MIN_SIZE) {
+		/* see if we've reached the max. deleted space in file */
+		max_del_space = trie->hdr->used_file_size / 100 *
+			TRIE_COMPRESS_PERCENTAGE;
+		if (trie->hdr->deleted_space > max_del_space)
+			return TRUE;
+	}
+
+	return squat_uidlist_need_compress(trie->uidlist,
+					   current_message_count);
+}
+
+int squat_trie_flush(struct squat_trie *trie)
+{
+	uint32_t uidvalidity;
+
+	if (trie->root == NULL) {
+		/* nothing changed */
+		return 0;
+	}
+
+	if (trie_nodes_write(trie, &uidvalidity) < 0)
+		return -1;
+	if (squat_uidlist_flush(trie->uidlist, uidvalidity) < 0)
+		return -1;
+	if (trie_map(trie) <= 0)
+		return -1;
+
+	if (squat_trie_need_compress(trie, (unsigned int)-1)) {
+		if (squat_trie_compress(trie, NULL) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static void squat_trie_compress_chars8(struct trie_node *node)
+{
+	uint8_t *chars = NODE_CHARS8(node);
+	struct trie_node **child_src = NODE_CHILDREN8(node);
+	struct trie_node **child_dest;
+	unsigned int i, j, old_count;
+
+	old_count = node->chars_8bit_count;
+	for (i = j = 0; i < old_count; i++) {
+		if (child_src[i] != NULL)
+			chars[j++] = chars[i];
+	}
+
+	node->chars_8bit_count = j;
+	child_dest = NODE_CHILDREN8(node);
+
+	for (i = j = 0; i < old_count; i++) {
+		if (child_src[i] != NULL)
+			child_dest[j++] = child_src[i];
+	}
+}
+
+static void squat_trie_compress_chars16(struct trie_node *node)
+{
+	uint16_t *chars = NODE_CHARS16(node);
+	struct trie_node **child_src = NODE_CHILDREN16(node);
+	struct trie_node **child_dest;
+	unsigned int i, j, old_count;
+
+	old_count = node->chars_16bit_count;
+	for (i = j = 0; i < old_count; i++) {
+		if (child_src[i] != NULL)
+			chars[j++] = chars[i];
+	}
+
+	node->chars_16bit_count = j;
+	child_dest = NODE_CHILDREN16(node);
+
+	for (i = j = 0; i < old_count; i++) {
+		if (child_src[i] != NULL)
+			child_dest[j++] = child_src[i];
+	}
+}
+
+static int
+squat_trie_compress_children(struct squat_trie *trie,
+			     struct trie_node **children, unsigned int count,
+			     unsigned int level)
+{
+	struct trie_node *child_node;
+	size_t child_idx;
+	unsigned int i;
+	int ret = 0;
+	bool need_char_compress = FALSE;
+
+	for (i = 0; i < count; i++) {
+		child_idx = POINTER_CAST_TO(children[i], size_t);
+		i_assert((child_idx & 1) != 0);
+		child_idx &= ~1;
+
+		if (trie_map_node(trie, child_idx, level, &child_node) < 0)
+			return -1;
+
+		ret = squat_trie_compress_node(trie, child_node, level);
+		if (child_node->file_offset != 0)
+			children[i] = POINTER_CAST(child_node->file_offset | 1);
+		else {
+			children[i] = NULL;
+			need_char_compress = TRUE;
+		}
+		i_free(child_node);
+
+		if (ret < 0)
+			return -1;
+	}
+	return need_char_compress ? 0 : 1;
+}
+
+static int squat_trie_compress_leaf_uidlist(struct squat_trie *trie,
+					    struct trie_node *node)
+{
+	uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
+	uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node);
+	unsigned int i;
+	int ret;
+	bool compress_chars = FALSE;
+
+	for (i = 0; i < node->chars_8bit_count; i++) {
+		ret = squat_uidlist_compress_next(trie->compress_ctx, &idx8[i]);
+		if (ret < 0)
+			return -1;
+		if (ret == 0) {
+			idx8[i] = 0;
+			compress_chars = TRUE;
+		}
+	}
+	if (compress_chars) {
+		squat_trie_compress_chars8(node);
+		compress_chars = FALSE;
+	}
+	for (i = 0; i < node->chars_16bit_count; i++) {
+		ret = squat_uidlist_compress_next(trie->compress_ctx,
+						  &idx16[i]);
+		if (ret < 0)
+			return -1;
+		if (ret == 0) {
+			idx16[i] = 0;
+			compress_chars = TRUE;
+		}
+	}
+	if (compress_chars) {
+		squat_trie_compress_chars16(node);
+		node->chars_16bit_count = i;
+	}
+	return 0;
+}
+
+static int
+squat_trie_compress_node(struct squat_trie *trie, struct trie_node *node,
+			 unsigned int level)
+{
+	int ret;
+
+	if (level == BLOCK_SIZE) {
+		if (squat_trie_compress_leaf_uidlist(trie, node))
+			return -1;
+
+		if (node->chars_8bit_count == 0 &&
+		    node->chars_16bit_count == 0) {
+			/* everything expunged */
+			trie->node_count--;
+			node->file_offset = 0;
+			return 0;
+		}
+		node_pack_leaf(trie, node);
+	} else {
+		struct trie_node **children8 = NODE_CHILDREN8(node);
+		struct trie_node **children16 = NODE_CHILDREN16(node);
+
+		if ((ret = squat_trie_compress_children(trie, children8,
+							node->chars_8bit_count,
+							level + 1)) < 0)
+			return -1;
+		if (ret == 0)
+			squat_trie_compress_chars8(node);
+		if ((ret = squat_trie_compress_children(trie, children16,
+							node->chars_16bit_count,
+							level + 1)) < 0)
+			return -1;
+		if (ret == 0)
+			squat_trie_compress_chars16(node);
+
+		if (node->chars_8bit_count == 0 &&
+		    node->chars_16bit_count == 0) {
+			/* everything expunged */
+			trie->node_count--;
+			node->file_offset = 0;
+			return 0;
+		}
+		node_pack(trie->buf, node);
+	}
+
+	if ((trie->output->offset & 1) != 0)
+		o_stream_send(trie->output, "", 1);
+	node->file_offset = trie->output->offset;
+
+	o_stream_send(trie->output, trie->buf->data, trie->buf->used);
+	return 0;
+}
+
+int squat_trie_compress(struct squat_trie *trie,
+			const ARRAY_TYPE(seq_range) *existing_uids)
+{
+	struct trie_node *node;
+	struct squat_trie_header hdr;
+	const char *tmp_path;
+	int fd, ret;
+
+	if (trie_map_node(trie, trie->hdr->root_offset, 1, &node) < 0)
+		return -1;
+
+	tmp_path = t_strconcat(trie->filepath, ".tmp", NULL);
+	fd = open(tmp_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
+	if (fd == -1) {
+		i_error("open(%s, O_CREAT) failed: %m", tmp_path);
+		i_free(node);
+		return -1;
+	}
+
+	i_assert(trie->output == NULL);
+	trie->output = o_stream_create_file(fd, default_pool, 0, TRUE);
+
+	/* write a dummy header first */
+	memset(&hdr, 0, sizeof(hdr));
+	o_stream_send(trie->output, &hdr, sizeof(hdr));
+
+	/* do the compression */
+	trie->compress_ctx =
+		squat_uidlist_compress_begin(trie->uidlist, existing_uids);
+	ret = squat_trie_compress_node(trie, node, 1);
+
+	/* write the header */
+	hdr.uidvalidity = trie->hdr->uidvalidity;
+	hdr.header_size = sizeof(hdr);
+	hdr.root_offset = node->file_offset;
+	hdr.used_file_size = trie->output->offset;
+	hdr.node_count = trie->node_count;
+	o_stream_seek(trie->output, 0);
+	o_stream_send(trie->output, &hdr, sizeof(hdr));
+
+	i_free(node);
+
+	/* finish the compression */
+	if (ret == 0)
+		ret = squat_uidlist_compress_commit(&trie->compress_ctx);
+	else
+		squat_uidlist_compress_rollback(&trie->compress_ctx);
+
+	if (ret == 0) {
+		if (rename(tmp_path, trie->filepath) < 0) {
+			i_error("rename(%s, %s) failed: %m",
+				tmp_path, trie->filepath);
+			ret = -1;
+		}
+	}
+	o_stream_destroy(&trie->output);
+
+	if (ret < 0)
+		(void)unlink(tmp_path);
+	else {
+		trie_close(trie);
+		if (trie_open(trie) <= 0)
+			ret = -1;
+	}
+	return ret;
+}
+
+int squat_trie_mark_having_expunges(struct squat_trie *trie,
+				    const ARRAY_TYPE(seq_range) *existing_uids,
+				    unsigned int current_message_count)
+{
+	bool compress;
+	int ret;
+
+	compress = squat_trie_need_compress(trie, current_message_count);
+	ret = squat_uidlist_mark_having_expunges(trie->uidlist, compress);
+
+	if (compress)
+		ret = squat_trie_compress(trie, existing_uids);
+	return ret;
+}
+
+size_t squat_trie_mem_used(struct squat_trie *trie, unsigned int *count_r)
+{
+	*count_r = trie->hdr->node_count;
+
+	return trie->mmap_size;
+}
+
+int squat_trie_lookup(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
+		      const char *str)
+{
+	const uint16_t *data;
+	unsigned int len = strlen(str);
+	uint32_t list;
+
+	if (len < BLOCK_SIZE)
+		return -1;
+
+	data = data_normalize(str, len, trie->buf);
+	list = trie_lookup_node(trie, trie->root, data + len - BLOCK_SIZE, 1);
+	if (list == 0)
+		return 0;
+
+	if (squat_uidlist_get(trie->uidlist, list, result) < 0) {
+		squat_trie_set_corrupted(trie, "uidlist offset broken");
+		return -1;
+	}
+	while (len > BLOCK_SIZE) {
+		len--;
+		list = trie_lookup_node(trie, trie->root,
+					data + len - BLOCK_SIZE, 1);
+		if (list == 0) {
+			array_clear(result);
+			return 0;
+		}
+		if (squat_uidlist_filter(trie->uidlist, list, result) < 0) {
+			squat_trie_set_corrupted(trie, "uidlist offset broken");
+			return -1;
+		}
+	}
+	return array_count(result) > 0 ? 1 : 0;
+}
+
+int squat_trie_filter(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
+		      const char *str)
+{
+	const uint16_t *data;
+	unsigned int len = strlen(str);
+	uint32_t list;
+
+	if (len < BLOCK_SIZE)
+		return -1;
+
+	data = data_normalize(str, len, trie->buf);
+	while (len >= BLOCK_SIZE) {
+		list = trie_lookup_node(trie, trie->root,
+					data + len - BLOCK_SIZE, 1);
+		if (list == 0) {
+			array_clear(result);
+			return 0;
+		}
+		if (squat_uidlist_filter(trie->uidlist, list, result) < 0) {
+			squat_trie_set_corrupted(trie, "uidlist offset broken");
+			return -1;
+		}
+		len--;
+	}
+	return array_count(result) > 0 ? 1 : 0;
+}
+
+struct squat_uidlist *_squat_trie_get_uidlist(struct squat_trie *trie)
+{
+	return trie->uidlist;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-squat/squat-trie.h	Fri Dec 01 23:02:10 2006 +0200
@@ -0,0 +1,33 @@
+#ifndef __SQUAT_TRIE_H
+#define __SQUAT_TRIE_H
+
+#include "seq-range-array.h"
+
+struct squat_trie *squat_trie_open(const char *path);
+void squat_trie_close(struct squat_trie *trie);
+
+int squat_trie_add(struct squat_trie *trie, uint32_t uid,
+		   const void *data, size_t size);
+int squat_trie_flush(struct squat_trie *trie);
+int squat_trie_compress(struct squat_trie *trie,
+			const ARRAY_TYPE(seq_range) *existing_uids);
+
+int squat_trie_mark_having_expunges(struct squat_trie *trie,
+				    const ARRAY_TYPE(seq_range) *existing_uids,
+				    unsigned int current_message_count);
+
+int squat_trie_lookup(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
+		      const char *str);
+int squat_trie_filter(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
+		      const char *str);
+
+size_t squat_trie_mem_used(struct squat_trie *trie, unsigned int *count_r);
+
+struct squat_uidlist *_squat_trie_get_uidlist(struct squat_trie *trie);
+
+void _squat_trie_pack_num(buffer_t *buffer, uint32_t num);
+uint32_t _squat_trie_unpack_num(const uint8_t **p, const uint8_t *end);
+
+void squat_trie_set_corrupted(struct squat_trie *trie, const char *reason);
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-squat/squat-uidlist.c	Fri Dec 01 23:02:10 2006 +0200
@@ -0,0 +1,889 @@
+/* Copyright (C) 2006 Timo Sirainen */
+
+#include "lib.h"
+#include "array.h"
+#include "ostream.h"
+#include "mmap-util.h"
+#include "write-full.h"
+#include "squat-trie.h"
+#include "squat-uidlist.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+#define UIDLIST_COMPRESS_PERCENTAGE 30
+#define UIDLIST_UID_COMPRESS_PERCENTAGE 20
+#define UIDLIST_COMPRESS_MIN_SIZE (1024*8)
+
+#define UID_NODE_PREV_FLAG_OLD 0x00000001
+#define UID_LIST_IDX_FLAG_SINGLE 0x80000000
+
+struct squat_uidlist_header {
+	uint32_t uidvalidity; // FIXME
+	uint32_t header_size;
+	uint32_t used_file_size;
+	uint32_t deleted_space;
+
+	uint32_t uid_max;
+	uint32_t uid_count;
+	uint32_t uids_expunged;
+	uint32_t node_count;
+};
+
+struct uid_node {
+	struct uid_node *prev;
+	uint32_t uid;
+};
+
+struct squat_uidlist_get_context {
+	struct squat_uidlist *uidlist;
+
+	ARRAY_TYPE(seq_range) *result;
+
+	uint32_t filter_pos;
+};
+
+struct squat_uidlist {
+	struct squat_trie *trie;
+
+	char *filepath;
+	int fd;
+	struct ostream *output;
+
+	void *mmap_base;
+	size_t mmap_size;
+	struct squat_uidlist_header hdr;
+
+	ARRAY_DEFINE(lists, struct uid_node);
+	uint32_t first_new_list_idx;
+
+	pool_t node_pool;
+	buffer_t *tmp_buf, *list_buf;
+
+	unsigned int check_expunges:1;
+	unsigned int write_failed:1;
+};
+
+struct squat_uidlist_compress_ctx {
+	struct squat_uidlist *uidlist;
+	const ARRAY_TYPE(seq_range) *existing_uids;
+
+	struct ostream *output;
+	char *tmp_path;
+
+	pool_t node_pool;
+	struct uid_node *last_node;
+	ARRAY_TYPE(seq_range) seen_uids;
+
+	struct squat_uidlist_header hdr;
+
+	unsigned int seen_expunged:1;
+	unsigned int failed:1;
+};
+
+static void
+squat_uidlist_set_syscall_error(struct squat_uidlist *uidlist,
+				const char *function)
+{
+	i_error("%s failed with index search uidlist file %s: %m",
+		function, uidlist->filepath);
+}
+
+static int squat_uidlist_map(struct squat_uidlist *uidlist)
+{
+	struct stat st;
+
+	if (fstat(uidlist->fd, &st) < 0) {
+		squat_uidlist_set_syscall_error(uidlist, "fstat()");
+		return -1;
+	}
+
+	if (st.st_size <= sizeof(uidlist->hdr)) {
+		memset(&uidlist->hdr, 0, sizeof(uidlist->hdr));
+		uidlist->hdr.header_size = sizeof(uidlist->hdr);
+		uidlist->hdr.used_file_size = sizeof(uidlist->hdr);
+		return 0;
+	}
+
+	if (uidlist->mmap_base != NULL) {
+		if (munmap(uidlist->mmap_base, uidlist->mmap_size) < 0)
+			squat_uidlist_set_syscall_error(uidlist, "munmap()");
+	}
+	uidlist->mmap_size = st.st_size;
+
+	uidlist->mmap_base =
+		mmap(NULL, uidlist->mmap_size, PROT_READ | PROT_WRITE,
+		     MAP_SHARED, uidlist->fd, 0);
+	if (uidlist->mmap_base == MAP_FAILED) {
+		uidlist->mmap_size = 0;
+		uidlist->mmap_base = NULL;
+		squat_uidlist_set_syscall_error(uidlist, "mmap()");
+		return -1;
+	}
+
+	memcpy(&uidlist->hdr, uidlist->mmap_base, sizeof(uidlist->hdr));
+	// FIXME: verify header
+
+	if (uidlist->hdr.uids_expunged)
+		uidlist->check_expunges = TRUE;
+
+	uidlist->first_new_list_idx = uidlist->mmap_size;
+	return 1;
+}
+
+static int squat_uidlist_open(struct squat_uidlist *uidlist)
+{
+	i_assert(uidlist->fd == -1);
+
+	uidlist->fd = open(uidlist->filepath, O_RDWR | O_CREAT, 0600);
+	if (uidlist->fd == -1) {
+		squat_uidlist_set_syscall_error(uidlist, "open()");
+		return -1;
+	}
+
+	return squat_uidlist_map(uidlist);
+}
+
+static void squat_uidlist_close(struct squat_uidlist *uidlist)
+{
+	if (uidlist->mmap_base != NULL) {
+		if (munmap(uidlist->mmap_base, uidlist->mmap_size) < 0)
+			squat_uidlist_set_syscall_error(uidlist, "munmap()");
+		uidlist->mmap_base = NULL;
+	}
+	uidlist->mmap_size = 0;
+
+	if (uidlist->fd != -1) {
+		if (close(uidlist->fd) < 0)
+			squat_uidlist_set_syscall_error(uidlist, "close()");
+		uidlist->fd = -1;
+	}
+}
+
+struct squat_uidlist *
+squat_uidlist_init(struct squat_trie *trie, const char *path)
+{
+	struct squat_uidlist *uidlist;
+
+	uidlist = i_new(struct squat_uidlist, 1);
+	uidlist->trie = trie;
+	uidlist->filepath = i_strdup(path);
+	uidlist->fd = -1;
+	uidlist->first_new_list_idx = 1;
+	i_array_init(&uidlist->lists, 65536);
+	uidlist->node_pool =
+		pool_alloconly_create("squat uidlist node pool", 65536);
+	uidlist->tmp_buf = buffer_create_dynamic(default_pool, 16);
+	uidlist->list_buf = buffer_create_dynamic(default_pool, 256);
+	(void)squat_uidlist_open(uidlist);
+	return uidlist;
+}
+
+void squat_uidlist_deinit(struct squat_uidlist *uidlist)
+{
+	squat_uidlist_close(uidlist);
+
+	pool_unref(uidlist->node_pool);
+	array_free(&uidlist->lists);
+	buffer_free(uidlist->tmp_buf);
+	buffer_free(uidlist->list_buf);
+	i_free(uidlist);
+}
+
+int squat_uidlist_add(struct squat_uidlist *uidlist, uint32_t *_uid_list_idx,
+		      uint32_t uid)
+{
+	uint32_t uid_list_idx = *_uid_list_idx;
+	struct uid_node *node, *old_node;
+
+	i_assert(uid >= uidlist->hdr.uid_max);
+
+	if (uid_list_idx == 0) {
+		*_uid_list_idx = uid | UID_LIST_IDX_FLAG_SINGLE;
+		return 0;
+	}
+
+	if (uid > uidlist->hdr.uid_max) {
+		uidlist->hdr.uid_max = uid;
+		uidlist->hdr.uid_count++;
+	}
+
+	if (uid_list_idx < uidlist->first_new_list_idx) {
+		/* continue an existing list in the uidlist file */
+		old_node = POINTER_CAST((uid_list_idx << 1) |
+					UID_NODE_PREV_FLAG_OLD);
+		uid_list_idx = uidlist->first_new_list_idx +
+			array_count(&uidlist->lists);
+		node = array_append_space(&uidlist->lists);
+
+		uidlist->hdr.node_count++;
+	} else if ((uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
+		uint32_t old_uid = uid_list_idx & ~UID_LIST_IDX_FLAG_SINGLE;
+
+		if (uid == old_uid) {
+			/* trying to add the same uid again */
+			return 0;
+		}
+
+		/* convert single UID to a list */
+		old_node = p_new(uidlist->node_pool, struct uid_node, 1);
+		old_node->uid = old_uid;
+
+		uid_list_idx = uidlist->first_new_list_idx +
+			array_count(&uidlist->lists);
+		node = array_append_space(&uidlist->lists);
+
+		uidlist->hdr.node_count++;
+	} else {
+		/* update an in-memory list */
+		uint32_t arr_idx = uid_list_idx - uidlist->first_new_list_idx;
+		if (arr_idx >= array_count(&uidlist->lists)) {
+			/* broken */
+			squat_trie_set_corrupted(uidlist->trie,
+				"corrupted uidlist index (adding)");
+			return -1;
+		}
+
+		node = array_idx_modifiable(&uidlist->lists, arr_idx);
+		if (node->uid == uid) {
+			/* trying to add the same uid again */
+			return 0;
+		}
+
+		old_node = p_new(uidlist->node_pool, struct uid_node, 1);
+		*old_node = *node;
+	}
+
+	node->prev = old_node;
+	node->uid = uid;
+	*_uid_list_idx = uid_list_idx;
+	return 0;
+}
+
+static int
+squat_uidlist_copy_existing(struct squat_uidlist *uidlist,  size_t offset,
+			    uint32_t *prev_uid_r, uint32_t *written_uid_r)
+{
+	const uint8_t *data, *data_start, *end, *p = NULL;
+	uint32_t size, num, prev_uid, next_uid;
+
+	if (offset >= uidlist->mmap_size)
+		return -1;
+
+	data = CONST_PTR_OFFSET(uidlist->mmap_base, offset);
+	end = CONST_PTR_OFFSET(uidlist->mmap_base, uidlist->mmap_size);
+
+	size = _squat_trie_unpack_num(&data, end);
+	if (data + size > end)
+		return -1;
+
+	data_start = data;
+	end = data + size;
+
+	prev_uid = next_uid = _squat_trie_unpack_num(&data, end);
+	p = data;
+	while (data != end) {
+		num = _squat_trie_unpack_num(&data, end);
+		next_uid = prev_uid + (num >> 1) + 1;
+
+		if ((num & 1) != 0) {
+			/* prev_uid..next_uid */
+			if (data == end) {
+				/* try to increase this range */
+				break;
+			}
+
+			/* beginning a new uid/range */
+			num = _squat_trie_unpack_num(&data, end);
+			next_uid += num + 1;
+
+			prev_uid = next_uid;
+			p = data;
+		}
+
+		prev_uid = next_uid;
+		p = data;
+	}
+
+	*written_uid_r = prev_uid;
+	*prev_uid_r = next_uid;
+
+	uidlist->hdr.deleted_space +=
+		(end - (const uint8_t *)uidlist->mmap_base) - offset;
+
+	buffer_append(uidlist->list_buf, data_start, p - data_start);
+	return 0;
+}
+
+static int
+squat_uidlist_write_range(struct squat_uidlist *uidlist,
+			  const struct uid_node *node,
+			  uint32_t *prev_uid_r, uint32_t *written_uid_r,
+			  int level)
+{
+	buffer_t *buffer = uidlist->list_buf;
+	uint32_t written_uid, prev_uid;
+	uint32_t prev_idx = POINTER_CAST_TO(node->prev, uint32_t);
+
+	*prev_uid_r = node->uid;
+
+	if (node->prev == NULL) {
+		/* first UID */
+		_squat_trie_pack_num(buffer, node->uid);
+	} else {
+		if ((prev_idx & UID_NODE_PREV_FLAG_OLD) != 0) {
+			prev_idx >>= 1;
+			if (squat_uidlist_copy_existing(uidlist, prev_idx,
+							&prev_uid,
+							&written_uid) < 0 ||
+			    prev_uid >= node->uid) {
+				squat_trie_set_corrupted(uidlist->trie,
+					"corrupted continued uidlist index");
+				return -1;
+			}
+		} else {
+			if (squat_uidlist_write_range(uidlist, node->prev,
+						      &prev_uid, &written_uid,
+						      level+1) < 0)
+				return -1;
+		}
+
+		/* prev_uid contains the previous node's UID.
+		   written_uid contains the last written UID. */
+		if (prev_uid + 1 == node->uid) {
+			if (level != 0) {
+				/* this node continue the range */
+				*written_uid_r = written_uid;
+				return 0;
+			} else {
+				/* finishing range */
+				_squat_trie_pack_num(buffer, 1 |
+					((node->uid - written_uid - 1) << 1));
+				return 0;
+			}
+		}
+		i_assert(prev_uid < node->uid);
+		if (written_uid != prev_uid) {
+			i_assert(written_uid < prev_uid);
+
+			/* range ends at prev_uid */
+			_squat_trie_pack_num(buffer, 1 |
+				((prev_uid - written_uid - 1) << 1));
+			/* next uid/range */
+			_squat_trie_pack_num(buffer, node->uid - prev_uid - 1);
+		} else {
+			/* no range */
+			_squat_trie_pack_num(buffer,
+					     ((node->uid - prev_uid - 1) << 1));
+		}
+	}
+
+	*written_uid_r = node->uid;
+	return 0;
+}
+
+static void squat_uidlist_write_init(struct squat_uidlist *uidlist)
+{
+	i_assert(uidlist->output == NULL);
+
+	uidlist->output = o_stream_create_file(uidlist->fd, default_pool,
+					       0, FALSE);
+	if (uidlist->hdr.used_file_size < sizeof(uidlist->hdr)) {
+		/* creating a new file, write a dummy header */
+		o_stream_seek(uidlist->output, 0);
+		o_stream_send(uidlist->output, &uidlist->hdr,
+			      sizeof(uidlist->hdr));
+	} else {
+		o_stream_seek(uidlist->output,
+			      uidlist->hdr.used_file_size);
+	}
+}
+
+static int squat_uidlist_write_listbuf(struct squat_uidlist *uidlist,
+				       struct ostream *output)
+{
+	/* write size + buffer */
+	buffer_set_used_size(uidlist->tmp_buf, 0);
+	_squat_trie_pack_num(uidlist->tmp_buf, uidlist->list_buf->used);
+
+	if (o_stream_send(output, uidlist->tmp_buf->data,
+			  uidlist->tmp_buf->used) < 0 ||
+	    o_stream_send(output, uidlist->list_buf->data,
+			  uidlist->list_buf->used) < 0) {
+		return -1;
+	}
+	return 0;
+}
+
+int squat_uidlist_finish_list(struct squat_uidlist *uidlist,
+			      uint32_t *_uid_list_idx)
+{
+	uint32_t uid_list_idx = *_uid_list_idx;
+	const struct uid_node *node;
+	uint32_t prev_uid, written_uid;
+
+	if ((uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
+		/* this is a single UID "list" */
+		return 0;
+	}
+	if (uid_list_idx < uidlist->first_new_list_idx) {
+		/* the list hasn't changed */
+		return 0;
+	}
+
+	uid_list_idx -= uidlist->first_new_list_idx;
+	if (uid_list_idx >= array_count(&uidlist->lists)) {
+		/* broken */
+		squat_trie_set_corrupted(uidlist->trie,
+					 "corrupted uidlist index (finishing)");
+		return -1;
+	}
+
+	/* write the uidlist into a buffer */
+	node = array_idx(&uidlist->lists, uid_list_idx);
+	buffer_set_used_size(uidlist->list_buf, 0);
+	if (squat_uidlist_write_range(uidlist, node,
+				      &prev_uid, &written_uid, 0) < 0) {
+		uidlist->write_failed = TRUE;
+		return -1;
+	}
+
+	if (uidlist->output == NULL)
+		squat_uidlist_write_init(uidlist);
+
+	/* new uidlist index is the offset in uidlist file */
+	*_uid_list_idx = uidlist->output->offset;
+
+	if (squat_uidlist_write_listbuf(uidlist, uidlist->output) < 0)
+		uidlist->write_failed = TRUE;
+	return 0;
+}
+
+static void squat_uidlist_write_header(struct squat_uidlist *uidlist)
+{
+	uidlist->hdr.used_file_size = uidlist->output->offset;
+
+	o_stream_seek(uidlist->output, 0);
+	o_stream_send(uidlist->output, &uidlist->hdr, sizeof(uidlist->hdr));
+}
+
+int squat_uidlist_flush(struct squat_uidlist *uidlist, uint32_t uid_validity)
+{
+	int ret = uidlist->write_failed ? -1 : 0;
+
+	if (uidlist->output != NULL) {
+		if (ret == 0) {
+			uidlist->hdr.uidvalidity = uid_validity;
+			squat_uidlist_write_header(uidlist);
+		}
+		o_stream_destroy(&uidlist->output);
+	}
+
+	array_clear(&uidlist->lists);
+	p_clear(uidlist->node_pool);
+
+	uidlist->write_failed = FALSE;
+
+	(void)squat_uidlist_map(uidlist);
+	return ret;
+}
+
+bool squat_uidlist_need_compress(struct squat_uidlist *uidlist,
+				 unsigned int current_message_count)
+{
+	uint32_t max_del_space, max_uid_del_count;
+
+	if (uidlist->hdr.used_file_size >= UIDLIST_COMPRESS_MIN_SIZE) {
+		/* see if we've reached the max. deleted space in file */
+		max_del_space = uidlist->hdr.used_file_size / 100 *
+			UIDLIST_COMPRESS_PERCENTAGE;
+		if (uidlist->hdr.deleted_space > max_del_space)
+			return TRUE;
+	}
+	if (uidlist->hdr.uid_count > current_message_count) {
+		if (current_message_count == 0)
+			return TRUE;
+
+		max_uid_del_count = uidlist->hdr.uid_count *
+			UIDLIST_UID_COMPRESS_PERCENTAGE / 100;
+		if ((uidlist->hdr.uid_count - current_message_count) >
+		    max_uid_del_count)
+			return TRUE;
+	}
+	return FALSE;
+}
+
+int squat_uidlist_mark_having_expunges(struct squat_uidlist *uidlist,
+				       bool update_disk)
+{
+	uidlist->check_expunges = TRUE;
+
+	if (update_disk) {
+		uidlist->hdr.uids_expunged = TRUE;
+
+		// FIXME: make sure uidlist.hdr is in updated state
+		if (pwrite_full(uidlist->fd, &uidlist->hdr,
+				sizeof(uidlist->hdr), 0) < 0) {
+			squat_uidlist_set_syscall_error(uidlist,
+							"pwrite_full()");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+struct squat_uidlist_compress_ctx *
+squat_uidlist_compress_begin(struct squat_uidlist *uidlist,
+			     const ARRAY_TYPE(seq_range) *existing_uids)
+{
+	struct squat_uidlist_compress_ctx *ctx;
+	int fd;
+
+	ctx = i_new(struct squat_uidlist_compress_ctx, 1);
+	ctx->uidlist = uidlist;
+	ctx->tmp_path = i_strconcat(uidlist->filepath, ".tmp", NULL);
+
+	if (existing_uids != NULL) {
+		ctx->node_pool = pool_alloconly_create("compress node pool",
+						       1024);
+		ctx->existing_uids = existing_uids;
+		i_array_init(&ctx->seen_uids,
+			     I_MIN(128, array_count(existing_uids)));
+	}
+
+	fd = open(ctx->tmp_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
+	if (fd == -1) {
+		ctx->failed = TRUE;
+		i_error("open(%s) failed: %m", ctx->tmp_path);
+	} else {
+		ctx->output = o_stream_create_file(fd, default_pool, 0, TRUE);
+		o_stream_send(ctx->output, &ctx->hdr, sizeof(ctx->hdr));
+	}
+	return ctx;
+}
+
+static bool
+squat_uidlist_is_expunged(struct squat_uidlist_compress_ctx *ctx, uint32_t uid)
+{
+	if (ctx->existing_uids == NULL)
+		return FALSE;
+
+	return !seq_range_exists(ctx->existing_uids, uid);
+}
+
+static void
+squat_uidlist_compress_add_uid(struct squat_uidlist_compress_ctx *ctx,
+			       uint32_t uid)
+{
+	struct uid_node *node;
+
+	if (squat_uidlist_is_expunged(ctx, uid)) {
+		ctx->seen_expunged = TRUE;
+		return;
+	}
+
+	if (!seq_range_exists(&ctx->seen_uids, uid)) {
+		if (uid > ctx->hdr.uid_max)
+			ctx->hdr.uid_max = uid;
+		ctx->hdr.uid_count++;
+		seq_range_array_add(&ctx->seen_uids, 0, uid);
+	}
+
+	node = p_new(ctx->node_pool, struct uid_node, 1);
+	node->prev = ctx->last_node;
+	node->uid = uid;
+
+	ctx->last_node = node;
+}
+
+static int
+squat_uidlist_remove_expunged(struct squat_uidlist_compress_ctx *ctx,
+			      const uint8_t *data, size_t size,
+			      bool *all_expunged_r)
+{
+	const uint8_t *end;
+	uint32_t num, prev_uid, next_uid, written_uid;
+
+	end = data + size;
+
+	p_clear(ctx->node_pool);
+	ctx->seen_expunged = FALSE;
+	ctx->last_node = NULL;
+
+	prev_uid = _squat_trie_unpack_num(&data, end);
+	squat_uidlist_compress_add_uid(ctx, prev_uid);
+
+	while (data != end) {
+		num = _squat_trie_unpack_num(&data, end);
+		next_uid = prev_uid + (num >> 1) + 1;
+		if ((num & 1) != 0) {
+			for (prev_uid++; prev_uid <= next_uid; prev_uid++)
+				squat_uidlist_compress_add_uid(ctx, prev_uid);
+
+			if (data == end)
+				break;
+			num = _squat_trie_unpack_num(&data, end);
+			next_uid += num + 1;
+		}
+		squat_uidlist_compress_add_uid(ctx, next_uid);
+		prev_uid = next_uid;
+	}
+
+	if (!ctx->seen_expunged) {
+		/* no changes */
+		return 0;
+	}
+	if (ctx->last_node == NULL) {
+		/* everything expunged */
+		*all_expunged_r = TRUE;
+		return 1;
+	}
+
+	/* recreate the list and write it */
+	buffer_set_used_size(ctx->uidlist->list_buf, 0);
+	if (squat_uidlist_write_range(ctx->uidlist, ctx->last_node,
+				      &prev_uid, &written_uid, 0) < 0)
+		return -1;
+	if (squat_uidlist_write_listbuf(ctx->uidlist, ctx->output) < 0)
+		return -1;
+	*all_expunged_r = FALSE;
+	return 1;
+}
+
+int squat_uidlist_compress_next(struct squat_uidlist_compress_ctx *ctx,
+				uint32_t *uid_list_idx)
+{
+	struct squat_uidlist *uidlist = ctx->uidlist;
+	const uint8_t *data, *p, *end;
+	uint32_t size;
+	int ret;
+
+	if ((*uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
+		uint32_t uid = *uid_list_idx & ~UID_LIST_IDX_FLAG_SINGLE;
+
+		if (ctx->uidlist->check_expunges) {
+			if (squat_uidlist_is_expunged(ctx, uid))
+				return 0;
+		}
+		return 1;
+	}
+
+	if (ctx->output == NULL)
+		return -1;
+
+	if (*uid_list_idx >= uidlist->mmap_size) {
+		squat_trie_set_corrupted(uidlist->trie,
+			"uidlist index points outside file (compressing)");
+		return -1;
+	}
+
+	data = p = CONST_PTR_OFFSET(uidlist->mmap_base, *uid_list_idx);
+	end = CONST_PTR_OFFSET(uidlist->mmap_base, uidlist->mmap_size);
+
+	size = _squat_trie_unpack_num(&p, end);
+	if (data + size > end) {
+		squat_trie_set_corrupted(uidlist->trie,
+			"corrupted uidlist index (compressing)");
+		return -1;
+	}
+
+	*uid_list_idx = ctx->output->offset;
+
+	if (!ctx->uidlist->check_expunges)
+		ret = 0;
+	else {
+		bool all_expunged;
+
+		ret = squat_uidlist_remove_expunged(ctx, p, size,
+						    &all_expunged);
+		if (ret < 0) {
+			ctx->failed = TRUE;
+			return -1;
+		}
+		if (ret > 0 && all_expunged)
+			return 0;
+	}
+
+	if (ret == 0) {
+		if (o_stream_send(ctx->output, data, p - data + size) < 0) {
+			ctx->failed = TRUE;
+			return -1;
+		}
+	}
+
+	ctx->hdr.node_count++;
+	return 1;
+}
+
+void squat_uidlist_compress_rollback(struct squat_uidlist_compress_ctx **_ctx)
+{
+	struct squat_uidlist_compress_ctx *ctx = *_ctx;
+
+	*_ctx = NULL;
+
+	if (ctx->node_pool != NULL)
+		pool_unref(ctx->node_pool);
+	if (array_is_created(&ctx->seen_uids))
+		array_free(&ctx->seen_uids);
+	if (ctx->output != NULL) {
+		if (ctx->failed)
+			(void)unlink(ctx->tmp_path);
+		o_stream_destroy(&ctx->output);
+	}
+	i_free(ctx->tmp_path);
+	i_free(ctx);
+}
+
+int squat_uidlist_compress_commit(struct squat_uidlist_compress_ctx **_ctx)
+{
+	struct squat_uidlist_compress_ctx *ctx = *_ctx;
+	int ret = 0;
+
+	if (ctx->failed) {
+		squat_uidlist_compress_rollback(_ctx);
+		return -1;
+	}
+
+	/* write the header */
+	ctx->hdr.uidvalidity = ctx->uidlist->hdr.uidvalidity;
+	ctx->hdr.header_size = sizeof(ctx->hdr);
+	ctx->hdr.used_file_size = ctx->output->offset;
+
+	if (ctx->existing_uids == NULL) {
+		ctx->hdr.uid_max = ctx->uidlist->hdr.uid_max;
+		ctx->hdr.uid_count = ctx->uidlist->hdr.uid_count;
+	}
+
+	o_stream_seek(ctx->output, 0);
+	if (o_stream_send(ctx->output, &ctx->hdr, sizeof(ctx->hdr)) < 0)
+		ret = -1;
+
+	if (ret == 0) {
+		if (rename(ctx->tmp_path, ctx->uidlist->filepath) < 0) {
+			i_error("rename(%s, %s) failed: %m",
+				ctx->tmp_path, ctx->uidlist->filepath);
+			ret = -1;
+		} else {
+			/* reopen */
+			ctx->uidlist->check_expunges = FALSE;
+			squat_uidlist_close(ctx->uidlist);
+			(void)squat_uidlist_open(ctx->uidlist);
+		}
+	}
+
+	if (ret < 0)
+		ctx->failed = TRUE;
+
+	squat_uidlist_compress_rollback(_ctx);
+	return ret;
+}
+
+static void
+squat_uidlist_get_add_uid(struct squat_uidlist_get_context *ctx, uint32_t uid)
+{
+	if (ctx->filter_pos == 0) {
+		seq_range_array_add(ctx->result, 0, uid);
+		return;
+	}
+
+	for (; ctx->filter_pos < uid; ctx->filter_pos++)
+		seq_range_array_remove(ctx->result, ctx->filter_pos);
+	ctx->filter_pos++;
+}
+
+static int
+squat_uidlist_get_range_list(struct squat_uidlist_get_context *ctx,
+			     size_t offset)
+{
+	const uint8_t *data, *end;
+	uint32_t size, num, prev_uid, next_uid;
+
+	if (offset >= ctx->uidlist->mmap_size)
+		return -1;
+
+	data = CONST_PTR_OFFSET(ctx->uidlist->mmap_base, offset);
+	end = CONST_PTR_OFFSET(ctx->uidlist->mmap_base,
+			       ctx->uidlist->mmap_size);
+
+	size = _squat_trie_unpack_num(&data, end);
+	if (data + size > end)
+		return -1;
+
+	end = data + size;
+
+	prev_uid = _squat_trie_unpack_num(&data, end);
+	squat_uidlist_get_add_uid(ctx, prev_uid);
+
+	while (data != end) {
+		num = _squat_trie_unpack_num(&data, end);
+		next_uid = prev_uid + (num >> 1) + 1;
+		if ((num & 1) != 0) {
+			for (prev_uid++; prev_uid <= next_uid; prev_uid++)
+				squat_uidlist_get_add_uid(ctx, prev_uid);
+
+			if (data == end)
+				break;
+			num = _squat_trie_unpack_num(&data, end);
+			next_uid += num + 1;
+		}
+		squat_uidlist_get_add_uid(ctx, next_uid);
+		prev_uid = next_uid;
+	}
+	return 0;
+}
+
+static int
+squat_uidlist_get_ctx(struct squat_uidlist_get_context *ctx,
+		      uint32_t uid_list_idx)
+{
+	if ((uid_list_idx & UID_LIST_IDX_FLAG_SINGLE) != 0) {
+		uint32_t uid = uid_list_idx & ~UID_LIST_IDX_FLAG_SINGLE;
+		squat_uidlist_get_add_uid(ctx, uid);
+		return 0;
+	}
+
+	return squat_uidlist_get_range_list(ctx, uid_list_idx);
+}
+
+int squat_uidlist_get(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
+		      ARRAY_TYPE(seq_range) *result)
+{
+	struct squat_uidlist_get_context ctx;
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.uidlist = uidlist;
+	ctx.result = result;
+
+	return squat_uidlist_get_ctx(&ctx, uid_list_idx);
+}
+
+int squat_uidlist_filter(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
+			 ARRAY_TYPE(seq_range) *result)
+{
+	struct squat_uidlist_get_context ctx;
+	const struct seq_range *range;
+	unsigned int count;
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.uidlist = uidlist;
+	ctx.result = result;
+	ctx.filter_pos = 1;
+
+	return squat_uidlist_get_ctx(&ctx, uid_list_idx);
+
+	range = array_get(ctx.result, &count);
+	if (count > 0) {
+		for (; ctx.filter_pos <= range[count-1].seq2; ctx.filter_pos++)
+			seq_range_array_remove(result, ctx.filter_pos);
+	}
+}
+
+size_t squat_uidlist_mem_used(struct squat_uidlist *uidlist,
+			      unsigned int *count_r)
+{
+	*count_r = uidlist->hdr.node_count;
+
+	return uidlist->hdr.used_file_size;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plugins/fts-squat/squat-uidlist.h	Fri Dec 01 23:02:10 2006 +0200
@@ -0,0 +1,53 @@
+#ifndef __SQUAT_UIDLIST_H
+#define __SQUAT_UIDLIST_H
+
+#include "seq-range-array.h"
+
+struct squat_trie;
+struct squat_uidlist;
+
+struct squat_uidlist *
+squat_uidlist_init(struct squat_trie *trie, const char *path);
+void squat_uidlist_deinit(struct squat_uidlist *uidlist);
+
+/* Add new UID to given UID list. The uid_list_idx is updated to contain the
+   new list index. It must be put through _finish_list() before it's actually
+   written to disk. */
+int squat_uidlist_add(struct squat_uidlist *uidlist, uint32_t *uid_list_idx,
+		      uint32_t uid);
+/* Write UID list into disk. The uid_list_idx is updated to contain the new
+   permanent index for it. */
+int squat_uidlist_finish_list(struct squat_uidlist *uidlist,
+			      uint32_t *uid_list_idx);
+int squat_uidlist_flush(struct squat_uidlist *uidlist, uint32_t uid_validity);
+/* Returns TRUE if uidlist should be compressed. current_message_count can be
+   (unsigned int)-1 if you don't want include it in the check. */
+bool squat_uidlist_need_compress(struct squat_uidlist *uidlist,
+				 unsigned int current_message_count);
+/* Mark the uidlist containing expunged messages. update_disk=FALSE should be
+   done when the uidlist is going to be compressed and this function only tells
+   the compression to check for the expunged messages. */
+int squat_uidlist_mark_having_expunges(struct squat_uidlist *uidlist,
+				       bool update_disk);
+
+/* Compress the uidlist file. existing_uids may be NULL if they're not known. */
+struct squat_uidlist_compress_ctx *
+squat_uidlist_compress_begin(struct squat_uidlist *uidlist,
+			     const ARRAY_TYPE(seq_range) *existing_uids);
+int squat_uidlist_compress_next(struct squat_uidlist_compress_ctx *ctx,
+				uint32_t *uid_list_idx);
+void squat_uidlist_compress_rollback(struct squat_uidlist_compress_ctx **ctx);
+int squat_uidlist_compress_commit(struct squat_uidlist_compress_ctx **ctx);
+
+/* Returns UIDs for a given UID list index. */
+int squat_uidlist_get(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
+		      ARRAY_TYPE(seq_range) *result);
+/* Filter out UIDs which don't appear in the given UID list from the given
+   result array */
+int squat_uidlist_filter(struct squat_uidlist *uidlist, uint32_t uid_list_idx,
+			 ARRAY_TYPE(seq_range) *result);
+
+size_t squat_uidlist_mem_used(struct squat_uidlist *uidlist,
+			      unsigned int *count_r);
+
+#endif