view src/plugins/fts-squat/squat-trie.c @ 6418:46d9ee79f292 HEAD

Removed _ prefix from all public APIs.
author Timo Sirainen <tss@iki.fi>
date Sun, 16 Sep 2007 12:43:21 +0300
parents a6a49d5efc59
children 65c69a53a7be
line wrap: on
line source

/* Copyright (C) 2006 Timo Sirainen */

#include "lib.h"
#include "array.h"
#include "bsearch-insert-pos.h"
#include "file-cache.h"
#include "file-lock.h"
#include "istream.h"
#include "ostream.h"
#include "read-full.h"
#include "write-full.h"
#include "mmap-util.h"
#include "unichar.h"
#include "squat-uidlist.h"
#include "squat-trie.h"
#include "squat-trie-private.h"

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>

/* 8bit character counter holds only 255, so we can't use 256. */
#define MAX_8BIT_CHAR_COUNT 255

#define FAST_8BIT_LEVEL 2

#define TRIE_COMPRESS_PERCENTAGE 30
#define TRIE_COMPRESS_MIN_SIZE (1024*50)

#define SQUAT_TRIE_VERSION 1
#define SQUAT_TRIE_LOCK_TIMEOUT 60

/* for non-x86 use memcpy() when accessing unaligned int* addresses */
#if defined(__i386__) || defined(__x86_64__)
#  define ALLOW_UNALIGNED_ACCESS
#endif

#define BLOCK_SIZE 4

#define ALIGN(size) \
	(((size) + sizeof(void *)-1) & ~((unsigned int) sizeof(void *)-1))

struct squat_trie {
	char *filepath;
	int fd;
	dev_t dev;
	ino_t ino;

	enum file_lock_method lock_method;
	struct file_lock *file_lock;
	int lock_count;
	int lock_type; /* F_RDLCK / F_WRLCK */

	struct file_cache *file_cache;
	uint32_t file_cache_modify_counter;

	void *mmap_base; /* NULL with mmap_disable=yes */
	const uint8_t *const_mmap_base;
	size_t mmap_size;

	const struct squat_trie_header *hdr;
	uint32_t uidvalidity;

	char *uidlist_filepath;
	struct squat_uidlist *uidlist;
	struct trie_node *root;
	buffer_t *buf;

	unsigned int corrupted:1;
	unsigned int mmap_disable:1;
};

struct squat_trie_build_context {
	struct squat_trie *trie;

	struct ostream *output;

	uint32_t prev_uid;
	unsigned int prev_added_size;
	uint16_t prev_added[BLOCK_SIZE-1];

	unsigned int node_count;
	unsigned int deleted_space;

	unsigned int modified:1;
	unsigned int failed:1;
	unsigned int locked:1;
};

struct squat_trie_compress_context {
	struct squat_trie *trie;

	const char *tmp_path;
	struct ostream *output;
	int fd;

	struct squat_uidlist_compress_ctx *uidlist_ctx;

	unsigned int node_count;
};

struct trie_node {
	/* new characters have been added to this node */
	uint8_t resized:1;
	/* idx pointers have been updated */
	uint8_t modified:1;
	uint8_t chars_8bit_count;
	uint16_t chars_16bit_count;

	uint32_t file_offset;
	uint32_t orig_size;

	/* the node pointers are valid as long as their lowest bit is 0,
	   otherwise they're offsets to the trie file (>> 1).

	   in leaf nodes the children pointers are uint32_t uid_list_idx[]; */
	/* uint8_t 8bit_chars[chars_8bit_count]; */
	/* struct trie_node *children[chars_8bit_count]; */
	/* uint16_t 16bit_chars[chars_16bit_count]; */
	/* struct trie_node *children[chars_16bit_count]; */
};
#define NODE_CHARS8(node) \
	(uint8_t *)(node + 1)
#define NODE_CHILDREN8(node) \
	(struct trie_node **) \
		((char *)((node) + 1) + \
		 ALIGN(sizeof(uint8_t) * ((node)->chars_8bit_count)))
#define NODE_CHARS16(node, level) \
	(uint16_t *)((char *)NODE_CHILDREN8(node) + \
		((node)->chars_8bit_count) * \
		((level) == BLOCK_SIZE ? \
		 sizeof(uint32_t) : sizeof(struct trie_node *)))
#define NODE_CHILDREN16(node, level) \
	(struct trie_node **) \
		((char *)NODE_CHARS16(node, level) + \
		 ALIGN(sizeof(uint16_t) * ((node)->chars_16bit_count)))

static void free_node(struct trie_node *node, unsigned int level);
static void squat_trie_compress_chars8(struct trie_node *node);
static int
squat_trie_compress_node(struct squat_trie_compress_context *ctx,
			 struct trie_node *node, unsigned int level);
static int trie_write_node(struct squat_trie_build_context *ctx,
			   unsigned int level, struct trie_node *node);
static int
squat_trie_build_flush(struct squat_trie_build_context *ctx, bool finish);

static int chr_8bit_cmp(const void *_key, const void *_chr)
{
	const uint16_t *key = _key;
	const uint8_t *chr = _chr;

	return *key - *chr;
}

static int chr_16bit_cmp(const void *_key, const void *_chr)
{
	const uint16_t *key = _key, *chr = _chr;

	return *key - *chr;
}

void squat_trie_pack_num(buffer_t *buffer, uint32_t num)
{
	uint8_t c;

	/* number continues as long as the highest bit is set */
	while (num >= 0x80) {
		c = (num & 0x7f) | 0x80;
		num >>= 7;

		buffer_append(buffer, &c, 1);
	}

	c = num;
	buffer_append(buffer, &c, 1);
}

uint32_t squat_trie_unpack_num(const uint8_t **p, const uint8_t *end)
{
	const uint8_t *c = *p;
	uint32_t value = 0;
	unsigned int bits = 0;

	while (c != end && *c >= 0x80) {
		value |= (*c & 0x7f) << bits;
		bits += 7;
		c++;
	}

	if (c == end) {
		/* last number shouldn't end with high bit */
		return 0;
	}
	if (bits > 32-7) {
		/* we have only 32bit numbers */
		return 0;
	}

	value |= (*c & 0x7f) << bits;
	*p = c + 1;
	return value;
}

static const uint16_t *
data_normalize(const void *data, size_t size, buffer_t *dest)
{
	const unsigned char *src = data;
	size_t i;

	buffer_set_used_size(dest, 0);
	for (i = 0; i < size; i++) {
		uint16_t chr;

		if (src[i] <= 32)
			chr = 0;
		else if (src[i] <= 'z')
			chr = i_toupper(src[i]) - 32;
		else if (src[i] < 128)
			chr = src[i] - 32 - 26;
		else {
			/* UTF-8 input */
			unichar_t uchr;

			/* FIXME: can we do anything better than just
			   truncate with >16bit values? */
			if (uni_utf8_get_char_n(src+i, size-i, &uchr) <= 0)
				chr = 0;
			else {
				uchr -= 32 - 26;
				chr = uchr < (uint16_t)-1 ? uchr : 0;
			}
			i += uni_utf8_char_bytes(src[i]) - 1;
		}
		buffer_append(dest, &chr, sizeof(chr));
	}

	return dest->data;
}

static void
squat_trie_set_syscall_error(struct squat_trie *trie, const char *function)
{
	i_error("%s failed with index search file %s: %m",
		function, trie->filepath);
}

void squat_trie_set_corrupted(struct squat_trie *trie, const char *reason)
{
	i_error("Corrupted index search file %s: %s", trie->filepath, reason);

	(void)unlink(trie->filepath);
	(void)unlink(trie->uidlist_filepath);
	trie->corrupted = TRUE;
}

static void
trie_map_node_save_leaf(const uint32_t *src_idx, unsigned int count,
			uint32_t *children)
{
	unsigned int i;

#ifndef ALLOW_UNALIGNED_ACCESS
	if ((POINTER_CAST_TO(src_idx, size_t) & (sizeof(uint32_t)-1)) == 0) {
#endif
		for (i = 0; i < count; i++)
			children[i] = src_idx[i];
#ifndef ALLOW_UNALIGNED_ACCESS
	} else {
		/* unaligned access */
		const uint8_t *src_idx8 = (const uint8_t *)src_idx;

		for (i = 0; i < count; i++) {
			memcpy(&children[i], src_idx8 + i * sizeof(uint32_t),
			       sizeof(children[i]));
		}
	}
#endif
}

static void
trie_map_node_save_children(unsigned int level, const uint32_t *src_idx,
			    unsigned int count, struct trie_node **children)
{
	unsigned int i;

	if (level == BLOCK_SIZE) {
		trie_map_node_save_leaf(src_idx, count, (uint32_t *)children);
		return;
	}

#ifndef ALLOW_UNALIGNED_ACCESS
	if ((POINTER_CAST_TO(src_idx, size_t) & (sizeof(uint32_t)-1)) == 0) {
#endif
		for (i = 0; i < count; i++) {
			children[i] = src_idx[i] == 0 ? NULL :
				POINTER_CAST(src_idx[i] | 1);
		}
#ifndef ALLOW_UNALIGNED_ACCESS
	} else {
		/* unaligned access */
		const uint8_t *src_idx8 = (const uint8_t *)src_idx;
		uint32_t idx;

		for (i = 0; i < count; i++) {
			memcpy(&idx, src_idx8 + i * sizeof(uint32_t),
			       sizeof(idx));
			children[i] = idx == 0 ? NULL : POINTER_CAST(idx | 1);
		}
	}
#endif
}

static int trie_map_area(struct squat_trie *trie, uoff_t offset, size_t len)
{
	ssize_t ret;

	if (trie->file_cache == NULL)
		return 0;

	ret = file_cache_read(trie->file_cache, offset, len);
	if (ret < 0) {
		squat_trie_set_syscall_error(trie, "file_cache_read()");
		return -1;
	}
	trie->const_mmap_base =
		file_cache_get_map(trie->file_cache, &trie->mmap_size);
	trie->hdr = (const void *)trie->const_mmap_base;
	return 0;
}

static void
trie_map_fix_fast_node(struct trie_node *node, unsigned int chars8_count)
{
	uint8_t *chars = NODE_CHARS8(node);
	struct trie_node **children = NODE_CHILDREN8(node);
	int i, j;

	i_assert(node->chars_8bit_count == MAX_8BIT_CHAR_COUNT);

	j = chars8_count - 1;
	for (i = node->chars_8bit_count - 1; i >= 0; i--) {
		if (j >= 0 && i == chars[j])
			children[i] = children[j--];
		else
			children[i] = NULL;
		chars[i] = i;
	}
}

static int
trie_map_node(struct squat_trie *trie, uint32_t offset, unsigned int level,
	      struct trie_node **node_r)
{
	struct trie_node *node;
	const uint8_t *p, *end, *chars8_src, *chars16_src;
	uint32_t num, chars8_count, chars16_count;
	unsigned int chars8_offset, chars8_size, chars8_memsize;
	unsigned int chars16_offset, chars16_size, chars16_memsize;
	unsigned int idx_size, alloced_chars8_count;

	i_assert(trie->fd != -1);

	if (trie_map_area(trie, offset, 2+256) < 0)
		return -1;

	if (offset >= trie->mmap_size) {
		squat_trie_set_corrupted(trie, "trie offset too large");
		return -1;
	}

	p = trie->const_mmap_base + offset;
	end = trie->const_mmap_base + trie->mmap_size;

	/* get 8bit char count and check that it's valid */
	num = squat_trie_unpack_num(&p, end);
	chars8_count = num >> 1;

	chars8_offset = p - trie->const_mmap_base;
	chars8_size = chars8_count * (sizeof(uint8_t) + sizeof(uint32_t));

	if (trie_map_area(trie, chars8_offset, chars8_size + 8) < 0)
		return -1;

	if (chars8_count > MAX_8BIT_CHAR_COUNT ||
	    chars8_offset + chars8_size > trie->mmap_size) {
		squat_trie_set_corrupted(trie, "trie offset broken");
		return -1;
	}

	idx_size = level == BLOCK_SIZE ?
		sizeof(uint32_t) : sizeof(struct trie_node *);

	alloced_chars8_count = level <= FAST_8BIT_LEVEL ?
		MAX_8BIT_CHAR_COUNT : chars8_count;
	chars8_memsize = ALIGN(alloced_chars8_count * sizeof(uint8_t)) +
		alloced_chars8_count * idx_size;

	if ((num & 1) == 0) {
		/* no 16bit chars */
		chars16_count = 0;
		chars16_memsize = 0;
		chars16_offset = 0;
	} else {
		/* get the 16bit char count */
		p = trie->const_mmap_base + chars8_offset + chars8_size;
		end = trie->const_mmap_base + trie->mmap_size;

		chars16_count = squat_trie_unpack_num(&p, end);
		if (chars16_count > 65536) {
			squat_trie_set_corrupted(trie, "trie offset broken");
			return -1;
		}
		chars16_offset = p - trie->const_mmap_base;

		/* map the required area size and make sure it exists */
		chars16_size = chars16_count *
			(sizeof(uint16_t) + sizeof(uint32_t));
		if (trie_map_area(trie, chars16_offset, chars16_size) < 0)
			return -1;

		if (chars16_offset + chars16_size > trie->mmap_size) {
			squat_trie_set_corrupted(trie, "trie offset broken");
			return -1;
		}

		chars16_memsize = ALIGN(chars16_count * sizeof(uint16_t)) +
			chars16_count * idx_size;
	}

	node = i_malloc(sizeof(*node) + chars8_memsize + chars16_memsize);
	node->chars_8bit_count = alloced_chars8_count;
	node->chars_16bit_count = chars16_count;
	node->file_offset = offset;

	{
		uint8_t *chars8 = NODE_CHARS8(node);
		uint16_t *chars16 = NODE_CHARS16(node, level);
		struct trie_node **children8 = NODE_CHILDREN8(node);
		struct trie_node **children16 = NODE_CHILDREN16(node, level);
		const uint32_t *src_idx;
		const void *end_offset;

		chars8_src = trie->const_mmap_base + chars8_offset;
		chars16_src = trie->const_mmap_base + chars16_offset;

		memcpy(chars8, chars8_src, sizeof(uint8_t) * chars8_count);
		memcpy(chars16, chars16_src, sizeof(uint16_t) * chars16_count);

		src_idx = CONST_PTR_OFFSET(chars8_src, chars8_count);
		trie_map_node_save_children(level, src_idx, chars8_count,
					    children8);

		if (alloced_chars8_count != chars8_count)
			trie_map_fix_fast_node(node, chars8_count);
		if (chars16_count == 0)
			end_offset = &src_idx[chars8_count];
		else {
			src_idx = CONST_PTR_OFFSET(chars16_src,
						   chars16_count *
						   sizeof(uint16_t));
			trie_map_node_save_children(level, src_idx,
						    chars16_count, children16);
			end_offset = &src_idx[chars16_count];
		}

		node->orig_size = ((const uint8_t *)end_offset -
				   trie->const_mmap_base) - offset;
	}

	*node_r = node;
	return 0;
}

static void free_children(unsigned int level, struct trie_node **children,
			  unsigned int count)
{
	unsigned int i;
	uint32_t child_idx;

	for (i = 0; i < count; i++) {
		child_idx = POINTER_CAST_TO(children[i], size_t);
		if ((child_idx & 1) == 0 && children[i] != NULL)
			free_node(children[i], level);
	}
}

static void free_node(struct trie_node *node, unsigned int level)
{
	if (level < BLOCK_SIZE) {
		struct trie_node **children8 = NODE_CHILDREN8(node);
		struct trie_node **children16 = NODE_CHILDREN16(node, level);

		free_children(level + 1, children8, node->chars_8bit_count);
		free_children(level + 1, children16, node->chars_16bit_count);
	}
	i_free(node);
}

static void squat_trie_unmap(struct squat_trie *trie)
{
	if (trie->file_cache != NULL)
		file_cache_invalidate(trie->file_cache, 0, (uoff_t)-1);

	if (trie->mmap_base != NULL) {
		if (munmap(trie->mmap_base, trie->mmap_size) < 0)
			squat_trie_set_syscall_error(trie, "munmap()");
		trie->mmap_base = NULL;
	}

	trie->mmap_size = 0;
	trie->hdr = NULL;
	trie->const_mmap_base = NULL;

	if (trie->root != NULL) {
		free_node(trie->root, 1);
		trie->root = NULL;
	}
}

static void trie_file_close(struct squat_trie *trie)
{
	if (trie->file_cache != NULL)
		file_cache_free(&trie->file_cache);
	if (trie->file_lock != NULL)
		file_lock_free(&trie->file_lock);

	squat_trie_unmap(trie);
	if (trie->fd != -1) {
		if (close(trie->fd) < 0)
			squat_trie_set_syscall_error(trie, "close()");
		trie->fd = -1;
	}

	trie->hdr = NULL;
	trie->corrupted = FALSE;
}

static int
trie_map_check_header(struct squat_trie *trie,
		      const struct squat_trie_header *hdr, uoff_t file_size)
{
	if (hdr->version != SQUAT_TRIE_VERSION)
		return -1;

	if (hdr->used_file_size > file_size) {
		squat_trie_set_corrupted(trie, "used_file_size too large");
		return -1;
	}
	if (hdr->root_offset != 0 &&
	    (hdr->root_offset > file_size ||
	     hdr->root_offset < sizeof(*hdr))) {
		squat_trie_set_corrupted(trie, "invalid root_offset");
		return -1;
	}
	if (hdr->uidvalidity != trie->uidvalidity) {
		squat_trie_set_corrupted(trie, "uidvalidity changed");
		return -1;
	}

	return 0;
}

static int squat_trie_file_was_modified(struct squat_trie *trie)
{
	struct squat_trie_header hdr;
	int ret;

	ret = pread_full(trie->fd, &hdr.modify_counter,
			 sizeof(hdr.modify_counter),
			 offsetof(struct squat_trie_header, modify_counter));
	if (ret < 0) {
		squat_trie_set_syscall_error(trie, "pread_full()");
		return -1;
	}
	if (ret == 0) {
		/* broken file, treat as modified */
		return 1;
	}
	return hdr.modify_counter == trie->file_cache_modify_counter ? 0 : 1;
}

static int squat_trie_map(struct squat_trie *trie)
{
	const struct squat_trie_header *hdr;
	struct stat st;
	ssize_t ret;

	if (trie->hdr != NULL) {
		if (!trie->mmap_disable) {
			if (trie->hdr->used_file_size <= trie->mmap_size) {
				/* everything is already mapped */
				return 1;
			}
		} else {
			ret = squat_trie_file_was_modified(trie);
			if (ret <= 0)
				return ret < 0 ? -1 : 1;
		}
	}

	if (fstat(trie->fd, &st) < 0) {
		squat_trie_set_syscall_error(trie, "fstat()");
		return -1;
	}
	trie->dev = st.st_dev;
	trie->ino = st.st_ino;

	squat_trie_unmap(trie);

	if (!trie->mmap_disable) {
		trie->mmap_size = st.st_size;
		trie->mmap_base = mmap(NULL, trie->mmap_size,
				       PROT_READ | PROT_WRITE,
				       MAP_SHARED, trie->fd, 0);
		if (trie->mmap_base == MAP_FAILED) {
			trie->mmap_size = 0;
			trie->mmap_base = NULL;
			squat_trie_set_syscall_error(trie, "mmap()");
			return -1;
		}
		trie->const_mmap_base = trie->mmap_base;
	} else {
		ret = file_cache_read(trie->file_cache, 0, sizeof(*trie->hdr));
		if (ret < 0) {
			squat_trie_set_syscall_error(trie, "file_cache_read()");
			return -1;
		}
		if ((size_t)ret < sizeof(*trie->hdr)) {
			squat_trie_set_corrupted(trie, "file too small");
			return -1;
		}
		trie->const_mmap_base =
			file_cache_get_map(trie->file_cache, &trie->mmap_size);
	}

	hdr = (const void *)trie->const_mmap_base;
	if (trie_map_check_header(trie, hdr, st.st_size) < 0)
		return -1;
	trie->hdr = hdr;
	trie->file_cache_modify_counter = trie->hdr->modify_counter;

	if (trie->hdr->root_offset != 0) {
		if (trie_map_node(trie, trie->hdr->root_offset,
				  1, &trie->root) < 0)
			return 0;
	}
	return 1;
}

static void trie_file_open_fd(struct squat_trie *trie, int fd)
{
	struct stat st;

	if (fstat(fd, &st) < 0) {
		/* don't bother adding complexity by trying to handle this
		   error here. we'll break later anyway in easier error
		   handling paths. */
		squat_trie_set_syscall_error(trie, "fstat()");
		trie->ino = 0;
	} else {
		trie->dev = st.st_dev;
		trie->ino = st.st_ino;
	}
	trie->fd = fd;

	if (trie->mmap_disable)
		trie->file_cache = file_cache_new(trie->fd);
}

static int trie_file_open(struct squat_trie *trie, bool create)
{
	int fd;

	i_assert(trie->fd == -1);

	fd = open(trie->filepath, O_RDWR | (create ? O_CREAT : 0), 0660);
	if (fd == -1) {
		if (errno == ENOENT)
			return 0;

		squat_trie_set_syscall_error(trie, "open()");
		return -1;
	}
	trie_file_open_fd(trie, fd);
	return 1;
}

static int trie_file_create_finish(struct squat_trie *trie)
{
	struct squat_trie_header hdr;
	struct stat st;

	if (fstat(trie->fd, &st) < 0) {
		squat_trie_set_syscall_error(trie, "fstat()");
		return -1;
	}

	if (st.st_size <= (off_t)sizeof(hdr)) {
		memset(&hdr, 0, sizeof(hdr));
		hdr.version = SQUAT_TRIE_VERSION;
		hdr.uidvalidity = trie->uidvalidity;
		hdr.used_file_size = sizeof(hdr);

		if (pwrite_full(trie->fd, &hdr, sizeof(hdr), 0) < 0) {
			squat_trie_set_syscall_error(trie, "pwrite_full()");
			return -1;
		}
	}

	return 0;
}

struct squat_trie *
squat_trie_open(const char *path, uint32_t uidvalidity,
		enum file_lock_method lock_method, bool mmap_disable)
{
	struct squat_trie *trie;

	trie = i_new(struct squat_trie, 1);
	trie->fd = -1;
	trie->filepath = i_strdup(path);
	trie->uidvalidity = uidvalidity;
	trie->lock_method = lock_method;
	trie->mmap_disable = mmap_disable;
	trie->buf = buffer_create_dynamic(default_pool, 1024);

	trie->uidlist_filepath = i_strconcat(path, ".uids", NULL);
	trie->uidlist =
		squat_uidlist_init(trie, trie->uidlist_filepath,
				   uidvalidity, mmap_disable);
	return trie;
}

void squat_trie_close(struct squat_trie *trie)
{
	squat_trie_unmap(trie);
	buffer_free(&trie->buf);
	squat_uidlist_deinit(trie->uidlist);
	i_free(trie->uidlist_filepath);
	i_free(trie->filepath);
	i_free(trie);
}

int squat_trie_get_last_uid(struct squat_trie *trie, uint32_t *uid_r)
{
	int ret;

	if (trie->fd == -1) {
		if ((ret = trie_file_open(trie, FALSE)) < 0)
			return ret;
		if (ret == 0) {
			*uid_r = 0;
			return 0;
		}
	}

	if (squat_trie_lock(trie, F_RDLCK) <= 0)
		return -1;

	ret = squat_uidlist_get_last_uid(trie->uidlist, uid_r);
	squat_trie_unlock(trie);
	return ret;
}

static int squat_trie_is_file_stale(struct squat_trie *trie)
{
	struct stat st;

	if (stat(trie->filepath, &st) < 0) {
		if (errno == ENOENT)
			return 1;

		squat_trie_set_syscall_error(trie, "stat()");
		return -1;
	}

	return st.st_ino == trie->ino &&
		CMP_DEV_T(st.st_dev, trie->dev) ? 0 : 1;
}

static int
squat_trie_file_lock(struct squat_trie *trie, int fd, const char *path,
		     int lock_type, struct file_lock **lock_r)
{
	int ret;

	ret = file_wait_lock(fd, path, lock_type, trie->lock_method,
			     SQUAT_TRIE_LOCK_TIMEOUT, lock_r);
	if (ret == 0)
		squat_trie_set_syscall_error(trie, "file_wait_lock()");
	return ret;
}

int squat_trie_lock(struct squat_trie *trie, int lock_type)
{
	bool created = FALSE;
	int ret;

	i_assert(lock_type == F_RDLCK || lock_type == F_WRLCK);

	if (trie->lock_count > 0) {
		/* read lock -> write lock would deadlock */
		i_assert(trie->lock_type == lock_type || lock_type == F_RDLCK);

		trie->lock_count++;
		return 1;
	}

	if (trie->fd == -1 || trie->corrupted) {
		trie_file_close(trie);
		if (lock_type == F_WRLCK) {
			if ((ret = trie_file_open(trie, FALSE)) < 0)
				return -1;
			if (ret == 0) {
				if (trie_file_open(trie, TRUE) < 0)
					return -1;
				created = TRUE;
			}
		} else {
			if (trie_file_open(trie, FALSE) <= 0)
				return -1;
		}
	}

	for (;;) {
		i_assert(trie->file_lock == NULL);
		ret = squat_trie_file_lock(trie, trie->fd, trie->filepath,
					   lock_type, &trie->file_lock);
		if (ret <= 0)
			return ret;

		/* if the trie has been compressed, we need to reopen the
		   file and try to lock again */
		ret = squat_trie_is_file_stale(trie);
		if (ret == 0)
			break;

		file_unlock(&trie->file_lock);
		if (ret < 0)
			return -1;

		trie_file_close(trie);
		if (trie_file_open(trie, FALSE) <= 0)
			return -1;
	}

	if (created) {
		/* we possibly created this file. now that we've locked the
		   file, we can safely check if someone else already wrote the
		   header or if we should do it now */
		if (trie_file_create_finish(trie) < 0) {
			file_unlock(&trie->file_lock);
			return -1;
		}
	}

	if (squat_trie_map(trie) <= 0) {
		file_unlock(&trie->file_lock);
		return -1;
	}
	if (squat_uidlist_refresh(trie->uidlist) < 0) {
		file_unlock(&trie->file_lock);
		return -1;
	}

	trie->lock_count++;
	trie->lock_type = lock_type;
	return 1;
}

void squat_trie_unlock(struct squat_trie *trie)
{
	i_assert(trie->lock_count > 0);

	if (--trie->lock_count > 0)
		return;

	file_unlock(&trie->file_lock);
}

static struct trie_node *
node_alloc(uint16_t chr, unsigned int level)
{
	struct trie_node *node;
	unsigned int i, idx_size, idx_offset = sizeof(*node);

	idx_size = level < BLOCK_SIZE ?
		sizeof(struct trie_node *) : sizeof(uint32_t);

	if (level <= FAST_8BIT_LEVEL) {
		uint8_t *chars;
		unsigned int chars16_count = chr >= MAX_8BIT_CHAR_COUNT ? 1 : 0;

		node = i_malloc(sizeof(*node) +
				ALIGN(MAX_8BIT_CHAR_COUNT) +
				ALIGN(sizeof(uint16_t) * chars16_count) +
				(MAX_8BIT_CHAR_COUNT + chars16_count) *
				idx_size);
		node->chars_8bit_count = MAX_8BIT_CHAR_COUNT;

		chars = NODE_CHARS8(node);
		for (i = 0; i < MAX_8BIT_CHAR_COUNT; i++)
			chars[i] = i;

		if (chars16_count > 0) {
			uint16_t *chars16 = NODE_CHARS16(node, 0);

			node->chars_16bit_count = chars16_count;
			chars16[0] = chr;
		}
	} else if (chr < MAX_8BIT_CHAR_COUNT) {
		uint8_t *chrp;

		idx_offset += ALIGN(sizeof(*chrp));
		node = i_malloc(idx_offset + idx_size);
		node->chars_8bit_count = 1;

		chrp = PTR_OFFSET(node, sizeof(*node));
		*chrp = chr;
	} else {
		uint16_t *chrp;

		idx_offset += ALIGN(sizeof(*chrp));
		node = i_malloc(idx_offset + idx_size);
		node->chars_16bit_count = 1;

		chrp = PTR_OFFSET(node, sizeof(*node));
		*chrp = chr;
	}

	node->modified = TRUE;
	node->resized = TRUE;
	return node;
}

static struct trie_node *
node_realloc(struct trie_node *node, uint32_t char_idx, uint16_t chr,
	     unsigned int level)
{
	struct trie_node *new_node;
	unsigned int old_size_8bit, old_size_16bit, old_idx_offset;
	unsigned int idx_size, old_size, new_size, new_idx_offset;
	unsigned int hole1_pos, hole2_pos, skip;

	idx_size = level < BLOCK_SIZE ?
		sizeof(struct trie_node *) : sizeof(uint32_t);

	old_size_8bit = ALIGN(node->chars_8bit_count) +
		node->chars_8bit_count * idx_size;
	old_size_16bit = ALIGN(sizeof(uint16_t) * node->chars_16bit_count) +
		node->chars_16bit_count * idx_size;
	old_size = sizeof(*node) + old_size_8bit + old_size_16bit;

	if (chr < MAX_8BIT_CHAR_COUNT) {
		new_idx_offset = sizeof(*node) +
			ALIGN(node->chars_8bit_count + 1);
		new_size = new_idx_offset + old_size_16bit +
			(node->chars_8bit_count + 1) * idx_size;
	} else {
		new_idx_offset = sizeof(*node) + old_size_8bit +
			ALIGN((node->chars_16bit_count + 1) * sizeof(uint16_t));
		new_size = new_idx_offset +
			(node->chars_16bit_count + 1) * idx_size;
	}

	new_node = t_buffer_get(new_size);
	if (chr < MAX_8BIT_CHAR_COUNT) {
		hole1_pos = sizeof(*node) + char_idx;
		old_idx_offset = sizeof(*node) + ALIGN(node->chars_8bit_count);
	} else {
		hole1_pos = sizeof(*node) + old_size_8bit +
			char_idx * sizeof(uint16_t);
		old_idx_offset = sizeof(*node) + old_size_8bit +
			ALIGN(node->chars_16bit_count * sizeof(uint16_t));
	}
	hole2_pos = old_idx_offset + idx_size * char_idx;

	/* 0..character position */
	memcpy(new_node, node, hole1_pos);
	if (chr < MAX_8BIT_CHAR_COUNT) {
		uint8_t *chrp = PTR_OFFSET(new_node, hole1_pos);
		*chrp = chr;
		new_node->chars_8bit_count++;

		/* rest of the characters */
		memcpy(PTR_OFFSET(new_node, hole1_pos + sizeof(uint8_t)),
		       PTR_OFFSET(node, hole1_pos), old_idx_offset - hole1_pos);
	} else {
		uint16_t *chrp = PTR_OFFSET(new_node, hole1_pos);
		*chrp = chr;
		new_node->chars_16bit_count++;

		/* rest of the characters */
		memcpy(PTR_OFFSET(new_node, hole1_pos + sizeof(uint16_t)),
		       PTR_OFFSET(node, hole1_pos), old_idx_offset - hole1_pos);
	}

	/* indexes from 0 to character position */
	memcpy(PTR_OFFSET(new_node, new_idx_offset),
	       PTR_OFFSET(node, old_idx_offset),
	       hole2_pos - old_idx_offset);

	/* zero the inserted character index */
	skip = char_idx * idx_size;
	memset(PTR_OFFSET(new_node, new_idx_offset + skip), 0, idx_size);

	/* rest of the indexes */
	skip += idx_size;
	memcpy(PTR_OFFSET(new_node, new_idx_offset + skip),
	       PTR_OFFSET(node, hole2_pos),
	       old_size - hole2_pos);

	new_node->resized = TRUE;

	node = i_realloc(node, 0, new_size);
	memcpy(node, new_node, new_size);
	return node;
}

static int
trie_insert_node(struct squat_trie_build_context *ctx,
		 struct trie_node **parent,
		 const uint16_t *data, uint32_t uid, unsigned int level)
{
	struct squat_trie *trie = ctx->trie;
	struct trie_node *node = *parent;
	struct trie_node **children;
	uint32_t char_idx;
	bool match, modified = FALSE;
	int ret;

	if (*data < MAX_8BIT_CHAR_COUNT) {
		unsigned int count;

		if (node == NULL) {
			ctx->node_count++;
			node = *parent = node_alloc(*data, level);
			char_idx = level <= FAST_8BIT_LEVEL ? *data : 0;
			modified = TRUE;
		} else if (level <= FAST_8BIT_LEVEL) {
			char_idx = *data;
		} else {
			uint8_t *chars = NODE_CHARS8(node);

			count = node->chars_8bit_count;
			match = bsearch_insert_pos(data, chars, count,
						   sizeof(chars[0]),
						   chr_8bit_cmp,
						   &char_idx);
			if (!match) {
				node = node_realloc(node, char_idx,
						    *data, level);
				*parent = node;
				modified = TRUE;
			}
		}
		children = NODE_CHILDREN8(node);
	} else {
		unsigned int offset = sizeof(*node);
		unsigned int count;

		if (node == NULL) {
			ctx->node_count++;
			node = *parent = node_alloc(*data, level);
			char_idx = 0;
			modified = TRUE;
		} else {
			unsigned int idx_size;
			uint16_t *chars;

			idx_size = level < BLOCK_SIZE ?
				sizeof(struct trie_node *) : sizeof(uint32_t);
			offset += ALIGN(node->chars_8bit_count) +
				idx_size * node->chars_8bit_count;
			chars = PTR_OFFSET(node, offset);

			count = node->chars_16bit_count;
			match = bsearch_insert_pos(data, chars, count,
						   sizeof(chars[0]),
						   chr_16bit_cmp,
						   &char_idx);
			if (!match) {
				node = node_realloc(node, char_idx,
						    *data, level);
				*parent = node;
				modified = TRUE;
			}
		}

		children = NODE_CHILDREN16(node, level);
	}

	if (level < BLOCK_SIZE) {
		size_t child_idx = POINTER_CAST_TO(children[char_idx], size_t);

		if ((child_idx & 1) != 0) {
			if (trie_map_node(trie, child_idx & ~1, level + 1,
					  &children[char_idx]) < 0)
				return -1;
		}

		if (children[char_idx] == NULL)
			node->resized = TRUE;

		ret = trie_insert_node(ctx, &children[char_idx],
				       data + 1, uid, level + 1);
		if (ret < 0)
			return -1;
		if (ret > 0)
			node->modified = TRUE;
	} else {
		uint32_t *uid_lists = (uint32_t *)children;

		if (uid_lists[char_idx] == 0)
			node->resized = TRUE;

		if (squat_uidlist_add(trie->uidlist, &uid_lists[char_idx],
				      uid) < 0)
			return -1;

		node->modified = TRUE;
	}
	return modified ? 1 : 0;
}

static uint32_t
trie_lookup_node(struct squat_trie *trie, struct trie_node *node,
		 const uint16_t *data, unsigned int level)
{
	struct trie_node **children;
	uint32_t char_idx;

	if (node == NULL)
		return 0;

	if (*data < MAX_8BIT_CHAR_COUNT) {
		if (level <= FAST_8BIT_LEVEL)
			char_idx = *data;
		else {
			const uint8_t *chars, *pos;
			chars = NODE_CHARS8(node);
			pos = bsearch(data, chars, node->chars_8bit_count,
				      sizeof(chars[0]), chr_8bit_cmp);
			if (pos == NULL || *pos != *data)
				return 0;

			char_idx = pos - chars;
		}
		children = NODE_CHILDREN8(node);
	} else {
		const uint16_t *chars, *pos;

		chars = NODE_CHARS16(node, level);
		pos = bsearch(data, chars, node->chars_16bit_count,
			      sizeof(chars[0]), chr_16bit_cmp);
		if (pos == NULL || *pos != *data)
			return 0;

		char_idx = pos - chars;
		children = NODE_CHILDREN16(node, level);
	}

	if (level < BLOCK_SIZE) {
		size_t child_idx = POINTER_CAST_TO(children[char_idx], size_t);

		if ((child_idx & 1) != 0) {
			/* not mapped to memory yet. do it. */
			if (trie_map_node(trie, child_idx & ~1, level + 1,
					  &children[char_idx]) < 0)
				return -1;
		}

		return trie_lookup_node(trie, children[char_idx],
					data + 1, level + 1);
	} else {
		const uint32_t *uid_lists = (const uint32_t *)children;

		return uid_lists[char_idx];
	}
}

static bool block_want_add(const uint16_t *data)
{
	unsigned int i;

	/* skip all blocks that contain spaces or control characters.
	   no-one searches them anyway */
	for (i = 0; i < BLOCK_SIZE; i++) {
		if (data[i] == 0)
			return FALSE;
	}
	return TRUE;
}

struct squat_trie_build_context *
squat_trie_build_init(struct squat_trie *trie, uint32_t *last_uid_r)
{
	struct squat_trie_build_context *ctx;

	ctx = i_new(struct squat_trie_build_context, 1);
	ctx->trie = trie;

	if (squat_trie_lock(trie, F_WRLCK) <= 0)
		ctx->failed = TRUE;
	else {
		ctx->locked = TRUE;
		ctx->node_count = trie->hdr->node_count;

		if (squat_uidlist_get_last_uid(trie->uidlist, last_uid_r) < 0)
			ctx->failed = TRUE;
	}

	if (ctx->failed)
		*last_uid_r = 0;
	return ctx;
}

int squat_trie_build_deinit(struct squat_trie_build_context *ctx)
{
	int ret = ctx->failed ? -1 : 0;

	if (ret == 0)
		ret = squat_trie_build_flush(ctx, TRUE);

	if (ctx->locked)
		squat_trie_unlock(ctx->trie);

	i_free(ctx);
	return ret;
}

int squat_trie_build_more(struct squat_trie_build_context *ctx, uint32_t uid,
			  const unsigned char *data, size_t size)
{
	const uint16_t *str;
	uint16_t buf[(BLOCK_SIZE-1)*2];
	unsigned int i, tmp_size, str_len;

	if (ctx->failed)
		return -1;

	t_push();
	str = data_normalize(data, size, ctx->trie->buf);
	str_len = ctx->trie->buf->used / sizeof(*str);

	if (uid == ctx->prev_uid) {
		/* @UNSAFE: continue from last block */
		memcpy(buf, ctx->prev_added,
		       sizeof(buf[0]) * ctx->prev_added_size);
		tmp_size = I_MIN(str_len, BLOCK_SIZE-1);
		memcpy(buf + ctx->prev_added_size, str,
		       sizeof(buf[0]) * tmp_size);

		tmp_size += ctx->prev_added_size;
		for (i = 0; i + BLOCK_SIZE <= tmp_size; i++) {
			if (block_want_add(buf+i)) {
				if (trie_insert_node(ctx,
						     &ctx->trie->root,
						     buf + i, uid, 1) < 0) {
					t_pop();
					return -1;
				}
			}
		}

		if (str_len < BLOCK_SIZE) {
			ctx->prev_added_size = I_MIN(tmp_size, BLOCK_SIZE-1);
			memcpy(ctx->prev_added, buf + i,
			       sizeof(buf[0]) * ctx->prev_added_size);
			t_pop();
			return 0;
		}
	} else if (squat_uidlist_want_flush(ctx->trie->uidlist)) {
		if (squat_trie_build_flush(ctx, FALSE) < 0) {
			ctx->failed = TRUE;
			t_pop();
			return -1;
		}
		str = data_normalize(data, size, ctx->trie->buf);
		str_len = ctx->trie->buf->used / sizeof(*str);
	}

	ctx->prev_uid = uid;
	for (i = 0; i + BLOCK_SIZE <= str_len; i++) {
		if (block_want_add(str+i)) {
			if (trie_insert_node(ctx, &ctx->trie->root,
					     str + i, uid, 1) < 0) {
				t_pop();
				return -1;
			}
		}
	}
	ctx->prev_added_size = I_MIN(str_len - i, BLOCK_SIZE-1);
	memcpy(ctx->prev_added, str + i,
	       sizeof(ctx->prev_added[0]) * ctx->prev_added_size);

	t_pop();
	return 0;
}

static void node_pack_children(buffer_t *buf, struct trie_node **children,
			       unsigned int count)
{
	unsigned int i;
	size_t child_idx;
	uint32_t idx;

	for (i = 0; i < count; i++) {
		if (children[i] == NULL)
			continue;

		child_idx = POINTER_CAST_TO(children[i], size_t);
		if ((child_idx & 1) != 0)
			idx = child_idx & ~1;
		else
			idx = children[i]->file_offset;
		buffer_append(buf, &idx, sizeof(idx));
	}
}

static void node_pack(buffer_t *buf, struct trie_node *node)
{
	uint8_t *chars8 = NODE_CHARS8(node);
	uint16_t *chars16 = NODE_CHARS16(node, 0);
	struct trie_node **children8 = NODE_CHILDREN8(node);
	struct trie_node **children16 = NODE_CHILDREN16(node, 0);

	buffer_set_used_size(buf, 0);
	squat_trie_pack_num(buf, (node->chars_8bit_count << 1) |
			    (node->chars_16bit_count > 0 ? 1 : 0));
	buffer_append(buf, chars8, node->chars_8bit_count);
	node_pack_children(buf, children8, node->chars_8bit_count);

	if (node->chars_16bit_count > 0) {
		squat_trie_pack_num(buf, node->chars_16bit_count);
		buffer_append(buf, chars16,
			      sizeof(*chars16) * node->chars_16bit_count);
		node_pack_children(buf, children16, node->chars_16bit_count);
	}
}

static int node_leaf_finish(struct squat_trie *trie, struct trie_node *node)
{
	uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
	uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node, BLOCK_SIZE);
	unsigned int i;

	for (i = 0; i < node->chars_8bit_count; i++) {
		if (squat_uidlist_finish_list(trie->uidlist, &idx8[i]) < 0)
			return -1;
	}
	for (i = 0; i < node->chars_16bit_count; i++) {
		if (squat_uidlist_finish_list(trie->uidlist, &idx16[i]) < 0)
			return -1;
	}
	return 0;
}

static void node_pack_leaf(buffer_t *buf, struct trie_node *node)
{
	uint8_t *chars8 = NODE_CHARS8(node);
	uint16_t *chars16 = NODE_CHARS16(node, BLOCK_SIZE);
	uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
	uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node, BLOCK_SIZE);

	buffer_set_used_size(buf, 0);
	squat_trie_pack_num(buf, (node->chars_8bit_count << 1) |
			    (node->chars_16bit_count > 0 ? 1 : 0));
	buffer_append(buf, chars8, node->chars_8bit_count);
	buffer_append(buf, idx8, sizeof(*idx8) * node->chars_8bit_count);

	if (node->chars_16bit_count > 0) {
		squat_trie_pack_num(buf, node->chars_16bit_count);
		buffer_append(buf, chars16,
			      sizeof(*chars16) * node->chars_16bit_count);
		buffer_append(buf, idx16,
			      sizeof(*idx16) * node->chars_16bit_count);
	}
}

static int
trie_write_node_children(struct squat_trie_build_context *ctx,
			 unsigned int level, struct trie_node **children,
			 unsigned int count)
{
	unsigned int i;
	size_t child_idx;

	for (i = 0; i < count; i++) {
		child_idx = POINTER_CAST_TO(children[i], size_t);
		if ((child_idx & 1) == 0 && children[i] != NULL) {
			if (trie_write_node(ctx, level, children[i]) < 0)
				return -1;
		}
	}
	return 0;
}

static int trie_write_node(struct squat_trie_build_context *ctx,
			   unsigned int level, struct trie_node *node)
{
	struct squat_trie *trie = ctx->trie;
	uoff_t offset;

	if (level < BLOCK_SIZE) {
		struct trie_node **children8 = NODE_CHILDREN8(node);
		struct trie_node **children16 = NODE_CHILDREN16(node, level);

		if (trie_write_node_children(ctx, level + 1,
					     children8,
					     node->chars_8bit_count) < 0)
			return -1;
		if (trie_write_node_children(ctx, level + 1,
					     children16,
					     node->chars_16bit_count) < 0)
			return -1;
	}

	if (!node->modified)
		return 0;

	if (level < BLOCK_SIZE) {
		if (level <= FAST_8BIT_LEVEL)
			squat_trie_compress_chars8(node);
		node_pack(trie->buf, node);
	} else {
		if (node_leaf_finish(trie, node) < 0)
			return -1;

		node_pack_leaf(trie->buf, node);
	}

	offset = ctx->output->offset;
	if ((offset & 1) != 0) {
		o_stream_send(ctx->output, "", 1);
		offset++;
	}

	if (node->resized && node->orig_size < trie->buf->used) {
		/* append to end of file. the parent node is written later. */
		node->file_offset = offset;
		o_stream_send(ctx->output, trie->buf->data, trie->buf->used);

		ctx->deleted_space += node->orig_size;
	} else {
		/* overwrite node's contents */
		i_assert(node->file_offset != 0);
		i_assert(trie->buf->used <= node->orig_size);

		/* FIXME: write only the indexes if !node->resized */
		o_stream_seek(ctx->output, node->file_offset);
		o_stream_send(ctx->output, trie->buf->data, trie->buf->used);
		o_stream_seek(ctx->output, offset);

		ctx->deleted_space += trie->buf->used - node->orig_size;
	}

	ctx->modified = TRUE;
	return 0;
}

static int
trie_nodes_write(struct squat_trie_build_context *ctx, uint32_t *uidvalidity_r)
{
	struct squat_trie *trie = ctx->trie;
	struct squat_trie_header hdr;

	hdr = *trie->hdr;
	ctx->output = o_stream_create_fd_file(trie->fd, (uoff_t)-1, FALSE);
	o_stream_seek(ctx->output, hdr.used_file_size);
	o_stream_cork(ctx->output);
	if (hdr.used_file_size == 0) {
		o_stream_send(ctx->output, &hdr, sizeof(hdr));
		ctx->modified = TRUE;
	}

	ctx->deleted_space = 0;
	if (trie_write_node(ctx, 1, trie->root) < 0)
		return -1;

	if (ctx->modified) {
		/* update the header */
		hdr.root_offset = trie->root->file_offset;
		hdr.used_file_size = ctx->output->offset;
		hdr.deleted_space += ctx->deleted_space;
		hdr.node_count = ctx->node_count;
		hdr.modify_counter++;
		o_stream_seek(ctx->output, 0);
		o_stream_send(ctx->output, &hdr, sizeof(hdr));
	}

	o_stream_destroy(&ctx->output);
	*uidvalidity_r = hdr.uidvalidity;
	return 0;
}

static bool squat_trie_need_compress(struct squat_trie *trie,
				     unsigned int current_message_count)
{
	uint32_t max_del_space;

	if (trie->hdr->used_file_size >= TRIE_COMPRESS_MIN_SIZE) {
		/* see if we've reached the max. deleted space in file */
		max_del_space = trie->hdr->used_file_size / 100 *
			TRIE_COMPRESS_PERCENTAGE;
		if (trie->hdr->deleted_space > max_del_space)
			return TRUE;
	}

	return squat_uidlist_need_compress(trie->uidlist,
					   current_message_count);
}

static int
squat_trie_build_flush(struct squat_trie_build_context *ctx, bool finish)
{
	struct squat_trie *trie = ctx->trie;
	uint32_t uidvalidity;

	if (trie->root == NULL) {
		/* nothing changed */
		return 0;
	}

	if (trie->corrupted)
		return -1;

	if (trie_nodes_write(ctx, &uidvalidity) < 0)
		return -1;
	if (squat_uidlist_flush(trie->uidlist, uidvalidity) < 0)
		return -1;

	squat_trie_unmap(trie);
	if (squat_trie_map(trie) <= 0)
		return -1;

	if (squat_trie_need_compress(trie, (unsigned int)-1)) {
		if (ctx->locked && finish) {
			squat_trie_unlock(ctx->trie);
			ctx->locked = FALSE;
		}

		if (squat_trie_compress(trie, NULL) < 0)
			return -1;
	}
	return 0;
}

static void squat_trie_compress_chars8(struct trie_node *node)
{
	uint8_t *chars = NODE_CHARS8(node);
	uint16_t *chars16, *old_chars16 = NODE_CHARS16(node, 0);
	struct trie_node **child_src = NODE_CHILDREN8(node);
	struct trie_node **child_dest;
	unsigned int i, j, old_count;

	old_count = node->chars_8bit_count;
	for (i = j = 0; i < old_count; i++) {
		if (child_src[i] != NULL)
			chars[j++] = chars[i];
	}

	node->chars_8bit_count = j;
	child_dest = NODE_CHILDREN8(node);

	for (i = j = 0; i < old_count; i++) {
		if (child_src[i] != NULL)
			child_dest[j++] = child_src[i];
	}

	if (node->chars_16bit_count > 0) {
		chars16 = NODE_CHARS16(node, 0);
		memmove(chars16, old_chars16,
			ALIGN(sizeof(*chars16) * node->chars_16bit_count) +
			sizeof(*child_src) * node->chars_16bit_count);
	}
}

static void squat_trie_compress_chars16(struct trie_node *node)
{
	uint16_t *chars = NODE_CHARS16(node, 0);
	struct trie_node **child_src = NODE_CHILDREN16(node, 0);
	struct trie_node **child_dest;
	unsigned int i, j, old_count;

	old_count = node->chars_16bit_count;
	for (i = j = 0; i < old_count; i++) {
		if (child_src[i] != NULL)
			chars[j++] = chars[i];
	}

	node->chars_16bit_count = j;
	child_dest = NODE_CHILDREN16(node, 0);

	for (i = j = 0; i < old_count; i++) {
		if (child_src[i] != NULL)
			child_dest[j++] = child_src[i];
	}
}

static void squat_trie_compress_leaf_chars8(struct trie_node *node)
{
	uint8_t *chars = NODE_CHARS8(node);
	uint32_t *child_src = (uint32_t *)NODE_CHILDREN8(node);
	uint32_t *child_dest;
	unsigned int i, j, old_count;

	old_count = node->chars_8bit_count;
	for (i = j = 0; i < old_count; i++) {
		if (child_src[i] != 0)
			chars[j++] = chars[i];
	}

	node->chars_8bit_count = j;
	child_dest = (uint32_t *)NODE_CHILDREN8(node);

	for (i = j = 0; i < old_count; i++) {
		if (child_src[i] != 0)
			child_dest[j++] = child_src[i];
	}
}

static void squat_trie_compress_leaf_chars16(struct trie_node *node)
{
	uint16_t *chars = NODE_CHARS16(node, BLOCK_SIZE);
	uint32_t *child_src = (uint32_t *)NODE_CHILDREN16(node, BLOCK_SIZE);
	uint32_t *child_dest;
	unsigned int i, j, old_count;

	old_count = node->chars_16bit_count;
	for (i = j = 0; i < old_count; i++) {
		if (child_src[i] != 0)
			chars[j++] = chars[i];
	}

	node->chars_16bit_count = j;
	child_dest = (uint32_t *)NODE_CHILDREN16(node, BLOCK_SIZE);

	for (i = j = 0; i < old_count; i++) {
		if (child_src[i] != 0)
			child_dest[j++] = child_src[i];
	}
}

static int
squat_trie_compress_children(struct squat_trie_compress_context *ctx,
			     struct trie_node **children, unsigned int count,
			     unsigned int level)
{
	struct trie_node *child_node;
	size_t child_idx;
	unsigned int i;
	int ret = 0;
	bool need_char_compress = FALSE;

	for (i = 0; i < count; i++) {
		if (children[i] == NULL) {
			need_char_compress = TRUE;
			continue;
		}

		child_idx = POINTER_CAST_TO(children[i], size_t);
		i_assert((child_idx & 1) != 0);
		child_idx &= ~1;

		if (trie_map_node(ctx->trie, child_idx, level, &child_node) < 0)
			return -1;

		ret = squat_trie_compress_node(ctx, child_node, level);
		if (child_node->file_offset != 0)
			children[i] = POINTER_CAST(child_node->file_offset | 1);
		else {
			children[i] = NULL;
			need_char_compress = TRUE;
		}
		i_free(child_node);

		if (ret < 0)
			return -1;
	}
	return need_char_compress ? 0 : 1;
}

static int
squat_trie_compress_leaf_uidlist(struct squat_trie_compress_context *ctx,
				 struct trie_node *node)
{
	uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node);
	uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node, BLOCK_SIZE);
	unsigned int i;
	int ret;
	bool compress_chars = FALSE;

	for (i = 0; i < node->chars_8bit_count; i++) {
		ret = squat_uidlist_compress_next(ctx->uidlist_ctx, &idx8[i]);
		if (ret < 0)
			return -1;
		if (ret == 0) {
			idx8[i] = 0;
			compress_chars = TRUE;
		}
	}
	if (compress_chars) {
		squat_trie_compress_leaf_chars8(node);
		compress_chars = FALSE;
	}
	for (i = 0; i < node->chars_16bit_count; i++) {
		ret = squat_uidlist_compress_next(ctx->uidlist_ctx, &idx16[i]);
		if (ret < 0)
			return -1;
		if (ret == 0) {
			idx16[i] = 0;
			compress_chars = TRUE;
		}
	}
	if (compress_chars) {
		squat_trie_compress_leaf_chars16(node);
		node->chars_16bit_count = i;
	}
	return 0;
}

static int
squat_trie_compress_node(struct squat_trie_compress_context *ctx,
			 struct trie_node *node, unsigned int level)
{
	struct squat_trie *trie = ctx->trie;
	int ret;

	if (level == BLOCK_SIZE) {
		if (squat_trie_compress_leaf_uidlist(ctx, node))
			return -1;

		if (node->chars_8bit_count == 0 &&
		    node->chars_16bit_count == 0) {
			/* everything expunged */
			ctx->node_count--;
			node->file_offset = 0;
			return 0;
		}
		node_pack_leaf(trie->buf, node);
	} else {
		struct trie_node **children8 = NODE_CHILDREN8(node);
		struct trie_node **children16;

		if ((ret = squat_trie_compress_children(ctx, children8,
							node->chars_8bit_count,
							level + 1)) < 0)
			return -1;
		if (ret == 0)
			squat_trie_compress_chars8(node);

		children16 = NODE_CHILDREN16(node, 0);
		if ((ret = squat_trie_compress_children(ctx, children16,
							node->chars_16bit_count,
							level + 1)) < 0)
			return -1;
		if (ret == 0)
			squat_trie_compress_chars16(node);

		if (node->chars_8bit_count == 0 &&
		    node->chars_16bit_count == 0) {
			/* everything expunged */
			ctx->node_count--;
			node->file_offset = 0;
			return 0;
		}

		node_pack(trie->buf, node);
	}

	if ((ctx->output->offset & 1) != 0)
		o_stream_send(ctx->output, "", 1);
	node->file_offset = ctx->output->offset;

	o_stream_send(ctx->output, trie->buf->data, trie->buf->used);
	return 0;
}

static int squat_trie_compress_init(struct squat_trie_compress_context *ctx,
				    struct squat_trie *trie)
{
	struct squat_trie_header hdr;

	memset(ctx, 0, sizeof(*ctx));

	ctx->tmp_path = t_strconcat(trie->filepath, ".tmp", NULL);
	ctx->fd = open(ctx->tmp_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
	if (ctx->fd == -1) {
		i_error("open(%s, O_CREAT) failed: %m", ctx->tmp_path);
		return -1;
	}

	ctx->trie = trie;
	ctx->output = o_stream_create_fd_file(ctx->fd, 0, FALSE);
	ctx->node_count = trie->hdr->node_count;

	/* write a dummy header first */
	memset(&hdr, 0, sizeof(hdr));
	o_stream_send(ctx->output, &hdr, sizeof(hdr));
	return 0;
}

static void
squat_trie_compress_write_header(struct squat_trie_compress_context *ctx,
				 struct trie_node *root_node)
{
	struct squat_trie_header hdr;

	memset(&hdr, 0, sizeof(hdr));
	hdr.version = SQUAT_TRIE_VERSION;
	hdr.uidvalidity = ctx->trie->uidvalidity;
	hdr.root_offset = root_node->file_offset;
	hdr.used_file_size = ctx->output->offset;
	hdr.node_count = ctx->node_count;

	o_stream_seek(ctx->output, 0);
	o_stream_send(ctx->output, &hdr, sizeof(hdr));
}

int squat_trie_compress(struct squat_trie *trie,
			const ARRAY_TYPE(seq_range) *existing_uids)
{
	struct squat_trie_compress_context ctx;
	struct trie_node *node;
	struct file_lock *file_lock = NULL;
	unsigned int orig_lock_count;
	int ret;

	orig_lock_count = trie->lock_count;
	if (squat_trie_lock(trie, F_WRLCK) <= 0)
		return -1;

	if (squat_trie_compress_init(&ctx, trie) < 0) {
		squat_trie_unlock(trie);
		return -1;
	}

	ret = trie_map_node(trie, trie->hdr->root_offset, 1, &node);
	if (ret == 0) {
		/* do the compression */
		ctx.uidlist_ctx = squat_uidlist_compress_begin(trie->uidlist,
							       existing_uids);
		if ((ret = squat_trie_compress_node(&ctx, node, 1)) < 0)
			squat_uidlist_compress_rollback(&ctx.uidlist_ctx);
		else {
			ret = squat_uidlist_compress_commit(&ctx.uidlist_ctx);

			squat_trie_compress_write_header(&ctx, node);
		}
	}

	if (ret == 0 && orig_lock_count > 0) {
		/* lock the file before renaming so we can keep it locked. */
		if (squat_trie_file_lock(trie, ctx.fd, ctx.tmp_path, F_WRLCK,
					 &file_lock) <= 0)
			ret = -1;
	}

	if (ret == 0) {
		if (rename(ctx.tmp_path, trie->filepath) < 0) {
			i_error("rename(%s, %s) failed: %m",
				ctx.tmp_path, trie->filepath);
			ret = -1;
		}
	}

	o_stream_destroy(&ctx.output);
	squat_trie_unlock(trie);

	if (ret < 0) {
		if (file_lock != NULL)
			file_lock_free(&file_lock);
		(void)close(ctx.fd);
		(void)unlink(ctx.tmp_path);
	} else {
		trie_file_close(trie);
		trie_file_open_fd(trie, ctx.fd);

		trie->file_lock = file_lock;
		if (squat_trie_map(trie) <= 0)
			return -1;
	}
	return ret;
}

int squat_trie_mark_having_expunges(struct squat_trie *trie,
				    const ARRAY_TYPE(seq_range) *existing_uids,
				    unsigned int current_message_count)
{
	bool compress;
	int ret;

	if ((ret = squat_trie_lock(trie, F_RDLCK)) <= 0)
		return ret;
	compress = squat_trie_need_compress(trie, current_message_count);
	squat_trie_unlock(trie);

	ret = squat_uidlist_mark_having_expunges(trie->uidlist, compress);

	if (compress)
		ret = squat_trie_compress(trie, existing_uids);
	return ret;
}

size_t squat_trie_mem_used(struct squat_trie *trie, unsigned int *count_r)
{
	*count_r = trie->hdr == NULL ? 0 : trie->hdr->node_count;

	return trie->mmap_size;
}

static int squat_trie_lookup_init(struct squat_trie *trie, const char *str,
				  const uint16_t **data_r, unsigned int *len_r)
{
	const uint16_t *data;
	unsigned int len = strlen(str);

	if (len < BLOCK_SIZE)
		return -1;

	data = data_normalize(str, len, trie->buf);

	/* skip the blocks that can't exist */
	while (!block_want_add(data + len - BLOCK_SIZE)) {
		if (--len < BLOCK_SIZE)
			return -1;
	}

	if (squat_trie_lock(trie, F_RDLCK) <= 0)
		return -1;

	*data_r = data;
	*len_r = len;
	return 0;
}

static int
squat_trie_lookup_locked(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
			 const uint16_t *data, unsigned int len)
{
	uint32_t list;

	list = trie_lookup_node(trie, trie->root, data + len - BLOCK_SIZE, 1);
	if (list == 0)
		return 0;

	if (squat_uidlist_get(trie->uidlist, list, result) < 0) {
		squat_trie_set_corrupted(trie, "uidlist offset broken");
		return -1;
	}
	while (len > BLOCK_SIZE) {
		len--;

		if (!block_want_add(data + len - BLOCK_SIZE))
			continue;

		list = trie_lookup_node(trie, trie->root,
					data + len - BLOCK_SIZE, 1);
		if (list == 0) {
			array_clear(result);
			return 0;
		}
		if (squat_uidlist_filter(trie->uidlist, list, result) < 0) {
			squat_trie_set_corrupted(trie, "uidlist offset broken");
			return -1;
		}
	}
	return array_count(result) > 0 ? 1 : 0;
}

int squat_trie_lookup(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
		      const char *str)
{
	const uint16_t *data;
	unsigned int len;
	int ret;

	if (squat_trie_lookup_init(trie, str, &data, &len) < 0)
		return -1;

	ret = squat_trie_lookup_locked(trie, result, data, len);
	squat_trie_unlock(trie);
	return ret;
}

static int
squat_trie_filter_locked(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
			 const uint16_t *data, unsigned int len)
{
	uint32_t list;

	for (; len >= BLOCK_SIZE; len--) {
		if (!block_want_add(data + len - BLOCK_SIZE))
			continue;

		list = trie_lookup_node(trie, trie->root,
					data + len - BLOCK_SIZE, 1);
		if (list == 0) {
			array_clear(result);
			return 0;
		}
		if (squat_uidlist_filter(trie->uidlist, list, result) < 0) {
			squat_trie_set_corrupted(trie, "uidlist offset broken");
			return -1;
		}
	}
	return array_count(result) > 0 ? 1 : 0;
}

int squat_trie_filter(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result,
		      const char *str)
{
	const uint16_t *data;
	unsigned int len;
	int ret;

	if (squat_trie_lookup_init(trie, str, &data, &len) < 0)
		return -1;
	ret = squat_trie_filter_locked(trie, result, data, len);
	squat_trie_unlock(trie);
	return ret;
}

struct squat_uidlist *squat_trie_get_uidlist(struct squat_trie *trie)
{
	return trie->uidlist;
}