changeset 5689:c2362f144f15 HEAD

Initial commit for major index file code cleanup. - dovecot.index file isn't anymore required to be updated when syncing. - Getting the latest index file mapping is now done always by reading dovecot.index and then reading the latest changes from dovecot.index.log. - mmap()ing dovecot.index file is slower than reading it, so it's not currently done unless the file is at 256kB. This may change though. - Some things are still broken.
author Timo Sirainen <tss@iki.fi>
date Mon, 11 Jun 2007 14:50:10 +0300
parents 5a37076852d4
children c1f51c3510ca
files src/lib-index/mail-index-fsck.c src/lib-index/mail-index-lock.c src/lib-index/mail-index-map.c src/lib-index/mail-index-private.h src/lib-index/mail-index-sync-ext.c src/lib-index/mail-index-sync-keywords.c src/lib-index/mail-index-sync-private.h src/lib-index/mail-index-sync-update.c src/lib-index/mail-index-sync.c src/lib-index/mail-index-view-private.h src/lib-index/mail-index-view-sync.c src/lib-index/mail-index-view.c src/lib-index/mail-index.c src/lib-index/mail-index.h src/lib-index/mail-transaction-log-append.c src/lib-index/mail-transaction-log-file.c src/lib-index/mail-transaction-log-private.h src/lib-index/mail-transaction-log-view.c src/lib-index/mail-transaction-log.c src/lib-index/mail-transaction-log.h src/util/idxview.c
diffstat 21 files changed, 1773 insertions(+), 2355 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-index/mail-index-fsck.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-fsck.c	Mon Jun 11 14:50:10 2007 +0300
@@ -23,43 +23,12 @@
 				      map->hdr.field, hdr.field); \
 	}
 
-static void
-mail_index_fsck_locked(struct mail_index *index, struct mail_index_header *hdr)
-{
-	uint32_t log_seq;
-	uoff_t log_offset;
-
-	mail_transaction_log_get_head(index->log, &log_seq, &log_offset);
-
-	if (hdr->log_file_int_offset > hdr->log_file_ext_offset) {
-		mail_index_fsck_error(index,
-			"log_file_int_offset > log_file_ext_offset");
-		hdr->log_file_int_offset = hdr->log_file_ext_offset;
-	}
-
-	if ((hdr->log_file_seq == log_seq &&
-	     hdr->log_file_ext_offset > log_offset) ||
-	    (hdr->log_file_seq != log_seq &&
-	     !mail_transaction_log_is_head_prev(index->log,
-						hdr->log_file_seq,
-						hdr->log_file_ext_offset))) {
-		mail_index_fsck_error(index,
-			"log file sync pos %u,%u -> %u, %"PRIuUOFF_T,
-			hdr->log_file_seq, hdr->log_file_ext_offset,
-			log_seq, log_offset);
-		hdr->log_file_seq = log_seq;
-		hdr->log_file_int_offset =
-			hdr->log_file_ext_offset = log_offset;
-	}
-}
-
 static int
 mail_index_fsck_map(struct mail_index *index, struct mail_index_map *map,
 		    const char **error_r)
 {
 	struct mail_index_header hdr;
 	const struct mail_index_record *rec;
-	unsigned int records_count;
 	uint32_t i, last_uid;
 
 	*error_r = NULL;
@@ -72,13 +41,6 @@
 		return 0;
 	}
 
-	if (!index->log_locked)
-		records_count = map->hdr.messages_count;
-	else {
-		records_count = map->records_count;
-		mail_index_fsck_locked(index, &hdr);
-	}
-
 	hdr.flags &= ~MAIL_INDEX_HDR_FLAG_FSCK;
 
 	hdr.messages_count = 0;
@@ -149,30 +111,14 @@
 {
 	const char *error;
 	unsigned int lock_id;
-	uint32_t file_seq;
-	uoff_t file_offset;
 	int ret;
-	bool lock_log;
-
-	if (index->sync_update) {
-		/* we're modifying index, don't do anything */
-		return 1;
-	}
 
 	i_warning("fscking index file %s", index->filepath);
-        lock_log = !index->log_locked;
-	if (lock_log) {
-		if (mail_transaction_log_sync_lock(index->log, &file_seq,
-						   &file_offset) < 0)
-			return -1;
-	}
-	if (mail_index_lock_exclusive(index, &lock_id) < 0) {
-                mail_transaction_log_sync_unlock(index->log);
-		return -1;
-	}
 
+	// FIXME: should we be fscking a given map instead? anyway we probably
+	// want to rewrite the main index after fsck is finished.
 	error = NULL;
-	ret = mail_index_map(index, TRUE);
+	ret = mail_index_map(index, MAIL_INDEX_SYNC_HANDLER_HEAD, &lock_id);
 	if (ret > 0) {
 		ret = mail_index_fsck_map(index, index->map, &error);
 		if (ret > 0) {
@@ -183,8 +129,6 @@
 	}
 
 	mail_index_unlock(index, lock_id);
-	if (lock_log)
-		mail_transaction_log_sync_unlock(index->log);
 
 	if (error != NULL) {
 		mail_index_set_error(index, "Corrupted index file %s: %s",
--- a/src/lib-index/mail-index-lock.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-lock.c	Mon Jun 11 14:50:10 2007 +0300
@@ -3,34 +3,23 @@
 /*
    Locking should never fail or timeout. Exclusive locks must be kept as short
    time as possible. Shared locks can be long living, so if we can't get
-   exclusive lock directly within 2 seconds, we'll replace the index file with
-   a copy of it. That means the shared lock holders can keep using the old file
-   while we're modifying the new file.
+   exclusive lock directly, we'll recreate the index. That means the shared
+   lock holders can keep using the old file.
 
    lock_id is used to figure out if acquired lock is still valid. When index
    file is reopened, the lock_id can become invalid. It doesn't matter however,
    as no-one's going to modify the old file anymore.
 
-   lock_id also tells if we're referring to shared or exclusive lock. This
-   allows us to drop back to shared locking once all exclusive locks are
-   dropped. Shared locks have even numbers, exclusive locks have odd numbers.
+   lock_id also tells us if we're referring to a shared or an exclusive lock.
+   This allows us to drop back to shared locking once all exclusive locks
+   are dropped. Shared locks have even numbers, exclusive locks have odd numbers.
    The number is increased by two every time the lock is dropped or index file
    is reopened.
 */
 
 #include "lib.h"
-#include "buffer.h"
-#include "mmap-util.h"
-#include "write-full.h"
 #include "mail-index-private.h"
 
-#include <stdio.h>
-#include <sys/stat.h>
-
-#ifdef HAVE_FLOCK
-#  include <sys/file.h>
-#endif
-
 int mail_index_lock_fd(struct mail_index *index, const char *path, int fd,
 		       int lock_type, unsigned int timeout_secs,
 		       struct file_lock **lock_r)
@@ -45,10 +34,9 @@
 }
 
 static int mail_index_lock(struct mail_index *index, int lock_type,
-			   unsigned int timeout_secs, int update_index,
-			   unsigned int *lock_id_r)
+			   unsigned int timeout_secs, unsigned int *lock_id_r)
 {
-	int ret, ret2;
+	int ret;
 
 	i_assert(lock_type == F_RDLCK || lock_type == F_WRLCK);
 
@@ -64,20 +52,6 @@
 		ret = 0;
 	}
 
-	if (update_index && index->excl_lock_count == 0) {
-		/* we wish to have the latest available index file. */
-		i_assert(index->lock_type != F_WRLCK);
-		if ((ret2 = mail_index_reopen_if_needed(index)) < 0)
-			return -1;
-		if (ret > 0 && ret2 == 0) {
-			/* no new file and the old file is already locked */
-			i_assert(lock_type == F_RDLCK);
-			i_assert(index->lock_type == F_RDLCK);
-			return 1;
-		}
-		ret = 0;
-	}
-
 	if (ret > 0) {
 		/* file is already locked */
 		return 1;
@@ -90,10 +64,6 @@
 		   locks then, though */
 		if (lock_type == F_WRLCK)
 			return 0;
-		if (update_index && index->lock_type == F_UNLCK) {
-			if (mail_index_reopen_if_needed(index) < 0)
-				return -1;
-		}
 
 		index->shared_lock_count++;
 		index->lock_type = F_RDLCK;
@@ -147,13 +117,11 @@
 	return 1;
 }
 
-int mail_index_lock_shared(struct mail_index *index, bool update_index,
-			   unsigned int *lock_id_r)
+int mail_index_lock_shared(struct mail_index *index, unsigned int *lock_id_r)
 {
 	int ret;
 
-	ret = mail_index_lock(index, F_RDLCK, MAIL_INDEX_LOCK_SECS,
-			      update_index, lock_id_r);
+	ret = mail_index_lock(index, F_RDLCK, MAIL_INDEX_LOCK_SECS, lock_id_r);
 	if (ret > 0)
 		return 0;
 	if (ret < 0)
@@ -166,235 +134,14 @@
 	return -1;
 }
 
-static int mail_index_copy(struct mail_index *index, const char **path_r)
-{
-	struct mail_index_map *map = index->map;
-	unsigned int base_size;
-	const char *path;
-	int ret, fd;
-
-	i_assert(!MAIL_INDEX_IS_IN_MEMORY(index));
-
-	fd = mail_index_create_tmp_file(index, &path);
-	if (fd == -1)
-		return -1;
-
-	/* write base header */
-	base_size = I_MIN(map->hdr.base_header_size, sizeof(map->hdr));
-	ret = write_full(fd, &map->hdr, base_size);
-	if (ret == 0) {
-		/* write extended headers */
-		ret = write_full(fd, CONST_PTR_OFFSET(map->hdr_base, base_size),
-				 map->hdr.header_size - base_size);
-	}
-
-	if (ret < 0 || write_full(fd, map->records, map->records_count *
-				  map->hdr.record_size) < 0) {
-		mail_index_file_set_syscall_error(index, path, "write_full()");
-		(void)close(fd);
-		(void)unlink(path);
-		fd = -1;
-	} else {
-		*path_r = path;
-	}
-
-	return fd;
-}
-
-static int mail_index_lock_exclusive_copy(struct mail_index *index)
-{
-	struct mail_index_map *map;
-
-	i_assert(index->log_locked);
-        i_assert(index->excl_lock_count == 0);
-
-	map = mail_index_map_clone(index->map, index->map->hdr.record_size);
-	mail_index_unmap(index, &index->map);
-	index->map = map;
-	index->hdr = &map->hdr;
-
-	map->write_atomic = TRUE;
-	map->write_to_disk = TRUE;
-
-	index->excl_lock_count++;
-	index->lock_type = F_WRLCK;
-	return 0;
-}
-
-int mail_index_lock_exclusive(struct mail_index *index,
-			      unsigned int *lock_id_r)
-{
-	int ret;
-
-	/* exclusive transaction log lock protects exclusive locking
-	   for the main index file */
-	i_assert(index->log_locked);
-
-	/* if header size is smaller than what we have, we'll have to recreate
-	   the index to grow it. so don't even try regular locking. */
-	if (index->map->hdr.base_header_size >= sizeof(*index->hdr) ||
-	    index->excl_lock_count > 0) {
-		/* wait two seconds for exclusive lock */
-		ret = mail_index_lock(index, F_WRLCK, 2, TRUE, lock_id_r);
-		if (ret > 0)
-			return 0;
-		if (ret < 0)
-			return -1;
-	}
-	if (mail_index_lock_exclusive_copy(index) < 0)
-		return -1;
-	*lock_id_r = index->lock_id + 1;
-	return 0;
-}
-
-static int
-mail_index_copy_lock_finish(struct mail_index *index, const char *path)
-{
-	int ret = 0;
-
-	if (!index->fsync_disable) {
-		if (fsync(index->fd) < 0) {
-			mail_index_file_set_syscall_error(index, path,
-							  "fsync()");
-			ret = -1;
-		}
-	}
-
-	if (ret == 0 && rename(path, index->filepath) < 0) {
-		mail_index_set_error(index, "rename(%s, %s) failed: %m",
-				     path, index->filepath);
-		ret = -1;
-	}
-	if (ret < 0) {
-		if (unlink(path) < 0) {
-			mail_index_set_error(index, "unlink(%s) failed: %m",
-					     path);
-		}
-		return -1;
-	}
-	return 0;
-}
-
-static int mail_index_write_map_over(struct mail_index *index)
+int mail_index_try_lock_exclusive(struct mail_index *index,
+				  unsigned int *lock_id_r)
 {
-	struct mail_index_map *map = index->map;
-	unsigned int base_size;
-
-	if (MAIL_INDEX_IS_IN_MEMORY(index))
-		return 0;
-
-	/* write records. */
-	if (map->write_seq_first != 0) {
-		size_t rec_offset =
-			(map->write_seq_first-1) * map->hdr.record_size;
-
-		if (pwrite_full(index->fd,
-				CONST_PTR_OFFSET(map->records, rec_offset),
-				(map->write_seq_last -
-				 map->write_seq_first + 1) *
-				map->hdr.record_size,
-				map->hdr.header_size + rec_offset) < 0)
-			return -1;
-	}
-
-	/* write base header */
-	base_size = I_MIN(map->hdr.base_header_size, sizeof(map->hdr));
-	if (pwrite_full(index->fd, &map->hdr, base_size, 0) < 0)
-		return -1;
-
-	/* write extended headers */
-	if (pwrite_full(index->fd, CONST_PTR_OFFSET(map->hdr_base, base_size),
-			map->hdr.header_size - base_size, base_size) < 0)
-		return -1;
-	return 0;
-}
-
-static int
-mail_index_copy_and_reopen(struct mail_index *index, const char **path_r)
-{
-	const char *path = NULL;
-	int fd;
-
-	fd = mail_index_copy(index, &path);
-	if (fd == -1) {
-		if (!index->nodiskspace)
-			return -1;
-
-		return mail_index_move_to_memory(index);
-	}
-
-	if (mail_index_reopen(index, fd) < 0) {
-		(void)close(fd);
-		return -1;
-	}
-	*path_r = path;
-	return 0;
-}
-
-static void
-mail_index_write_map(struct mail_index *index, const char **path_r)
-{
-	struct mail_index_map *map = index->map;
-
-	if (map->write_atomic || index->fd == -1) {
-		/* write by recreating the index */
-		i_assert(index->log_locked);
-
-		if (!MAIL_INDEX_IS_IN_MEMORY(index)) {
-			if (mail_index_copy_and_reopen(index, path_r) < 0)
-				mail_index_set_inconsistent(index);
-		}
-
-	} else {
-		/* write the modified parts. header is small enough to be
-		   always written, write_seq_* specifies the record range. */
-		if (mail_index_write_map_over(index) < 0) {
-			mail_index_set_error(index,
-				"pwrite_full(%s) failed: %m", index->filepath);
-			mail_index_set_inconsistent(index);
-		}
-	}
-
-	map->write_to_disk = FALSE;
-	map->write_atomic = FALSE;
-	map->write_seq_first = map->write_seq_last = 0;
-}
-
-static bool mail_index_excl_unlock_finish(struct mail_index *index)
-{
-	const char *path = NULL;
-
-	if (index->map != NULL && index->map->write_to_disk)
-		mail_index_write_map(index, &path);
-
-	if (index->shared_lock_count > 0 &&
-	    index->lock_method != FILE_LOCK_METHOD_DOTLOCK) {
-		/* leave ourself shared locked. */
-		i_assert(index->lock_type == F_WRLCK);
-		index->lock_type = F_RDLCK;
-
-		if (!MAIL_INDEX_IS_IN_MEMORY(index))
-			(void)file_lock_try_update(index->file_lock, F_RDLCK);
-	}
-
-	if (path != NULL) {
-		i_assert(index->log_locked);
-
-		if (mail_index_copy_lock_finish(index, path) < 0)
-			mail_index_set_inconsistent(index);
-
-		/* We may still have shared locks for the old file, but they
-		   don't matter. They're invalidated when we re-open the new
-		   index file. */
-		return FALSE;
-	}
-	return TRUE;
+	return mail_index_lock(index, F_WRLCK, 0, lock_id_r);
 }
 
 void mail_index_unlock(struct mail_index *index, unsigned int lock_id)
 {
-	bool unlock = TRUE;
-
 	if ((lock_id & 1) == 0) {
 		/* shared lock */
 		if (!mail_index_is_locked(index, lock_id)) {
@@ -410,15 +157,19 @@
 		i_assert(lock_id == index->lock_id + 1);
 		i_assert(index->excl_lock_count > 0);
 		i_assert(index->lock_type == F_WRLCK);
-		if (--index->excl_lock_count == 0)
-			unlock = mail_index_excl_unlock_finish(index);
+		if (--index->excl_lock_count == 0 &&
+		    index->shared_lock_count > 0) {
+			/* drop back to a shared lock. */
+			index->lock_type = F_RDLCK;
+			(void)file_lock_try_update(index->file_lock, F_RDLCK);
+		}
 	}
 
 	if (index->shared_lock_count == 0 && index->excl_lock_count == 0) {
 		index->lock_id += 2;
 		index->lock_type = F_UNLCK;
 		if (index->lock_method != FILE_LOCK_METHOD_DOTLOCK) {
-			if (unlock && !MAIL_INDEX_IS_IN_MEMORY(index))
+			if (!MAIL_INDEX_IS_IN_MEMORY(index))
 				file_unlock(&index->file_lock);
 		}
 		i_assert(index->file_lock == NULL);
@@ -427,7 +178,7 @@
 
 bool mail_index_is_locked(struct mail_index *index, unsigned int lock_id)
 {
-	if ((index->lock_id ^ lock_id) <= 1) {
+	if ((index->lock_id ^ lock_id) <= 1 && lock_id != 0) {
 		i_assert(index->lock_type != F_UNLCK);
 		return TRUE;
 	}
--- a/src/lib-index/mail-index-map.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-map.c	Mon Jun 11 14:50:10 2007 +0300
@@ -133,7 +133,7 @@
 	for (i = 0; i < old_count; i++)
 		array_append(&map->ext_id_map, &ext_id, 1);
 
-	while (offset < map->hdr.header_size) {
+	for (i = 0; offset < map->hdr.header_size; i++) {
 		ext_hdr = CONST_PTR_OFFSET(map->hdr_base, offset);
 
 		/* Extension header contains:
@@ -170,6 +170,15 @@
 			return -1;
 		}
 
+		if ((ext_hdr->record_size == 0 && ext_hdr->hdr_size == 0) ||
+		    ext_hdr->record_align == 0 || *name == '\0') {
+			mail_index_set_error(index, "Corrupted index file %s: "
+					     "Broken header extension %s",
+					     index->filepath, *name == '\0' ?
+					     t_strdup_printf("#%d", i) : name);
+			t_pop();
+			return -1;
+		}
 		if (map->hdr.record_size <
 		    ext_hdr->record_offset + ext_hdr->record_size) {
 			mail_index_set_error(index, "Corrupted index file %s: "
@@ -189,6 +198,7 @@
 			t_pop();
 			return -1;
 		}
+
 		mail_index_map_register_ext(index, map, name,
 					    offset, ext_hdr->hdr_size,
 					    ext_hdr->record_offset,
@@ -202,7 +212,9 @@
 	return 1;
 }
 
-static bool mail_index_check_header_compat(const struct mail_index_header *hdr)
+static bool mail_index_check_header_compat(struct mail_index *index,
+					   const struct mail_index_header *hdr,
+					   uoff_t file_size)
 {
         enum mail_index_header_compat_flags compat_flags = 0;
 
@@ -214,15 +226,34 @@
 		/* major version change - handle silently(?) */
 		return FALSE;
 	}
+	if ((hdr->flags & MAIL_INDEX_HDR_FLAG_CORRUPTED) != 0) {
+		/* we've already complained about it */
+		return FALSE;
+	}
+
 	if (hdr->compat_flags != compat_flags) {
-		/* architecture change - handle silently(?) */
+		/* architecture change */
+		mail_index_set_error(index, "Rebuilding index file %s: "
+				     "CPU architecture changed",
+				     index->filepath);
 		return FALSE;
 	}
 
-	if ((hdr->flags & MAIL_INDEX_HDR_FLAG_CORRUPTED) != 0) {
-		/* we've already complained about it */
+	if (hdr->base_header_size < MAIL_INDEX_HEADER_MIN_SIZE ||
+	    hdr->header_size < hdr->base_header_size) {
+		mail_index_set_error(index, "Corrupted index file %s: "
+				     "Corrupted header sizes (base %u, full %u)",
+				     index->filepath, hdr->base_header_size,
+				     hdr->header_size);
 		return FALSE;
 	}
+	if (hdr->header_size > file_size) {
+		mail_index_set_error(index, "Corrupted index file %s: "
+				     "Corrupted header size (%u > %"PRIuUOFF_T")",
+				     index->filepath, hdr->header_size,
+				     file_size);
+		return 0;
+	}
 
 	return TRUE;
 }
@@ -232,7 +263,7 @@
 {
 	const struct mail_index_header *hdr = &map->hdr;
 
-	if (!mail_index_check_header_compat(hdr))
+	if (!mail_index_check_header_compat(index, hdr, (uoff_t)-1))
 		return -1;
 
 	/* following some extra checks that only take a bit of CPU */
@@ -276,7 +307,7 @@
 			return 0;
 	}
 
-	return mail_index_parse_extensions(index, map);
+	return 1;
 }
 
 static void mail_index_map_clear(struct mail_index *index,
@@ -302,24 +333,6 @@
 	}
 }
 
-void mail_index_unmap(struct mail_index *index, struct mail_index_map **_map)
-{
-	struct mail_index_map *map = *_map;
-
-	*_map = NULL;
-	if (--map->refcount > 0)
-		return;
-
-	i_assert(map->refcount == 0);
-	mail_index_map_clear(index, map);
-	if (map->extension_pool != NULL)
-		pool_unref(map->extension_pool);
-	if (array_is_created(&map->keyword_idx_map))
-		array_free(&map->keyword_idx_map);
-	buffer_free(map->hdr_copy_buf);
-	i_free(map);
-}
-
 static void mail_index_map_copy_hdr(struct mail_index_map *map,
 				    const struct mail_index_header *hdr)
 {
@@ -333,27 +346,33 @@
 	}
 }
 
-static int mail_index_mmap(struct mail_index *index, struct mail_index_map *map)
+static int mail_index_mmap(struct mail_index *index, struct mail_index_map *map,
+			   uoff_t file_size)
 {
 	const struct mail_index_header *hdr;
 	unsigned int records_count;
 
-	i_assert(!map->write_to_disk);
-
 	if (map->buffer != NULL) {
 		/* we had temporarily used a buffer, eg. for updating index */
 		buffer_free(map->buffer);
 		map->buffer = NULL;
 	}
 
-	map->mmap_base = index->readonly ?
-		mmap_ro_file(index->fd, &map->mmap_size) :
-		mmap_rw_file(index->fd, &map->mmap_size);
+	if (file_size > SSIZE_T_MAX) {
+		/* too large file to map into memory */
+		mail_index_set_error(index, "Index file too large: %s",
+				     index->filepath);
+		return -1;
+	}
+
+	map->mmap_base =
+		mmap(NULL, file_size, PROT_READ, MAP_SHARED, index->fd, 0);
 	if (map->mmap_base == MAP_FAILED) {
 		map->mmap_base = NULL;
 		mail_index_set_syscall_error(index, "mmap()");
 		return -1;
 	}
+	map->mmap_size = file_size;
 
 	hdr = map->mmap_base;
 	if (map->mmap_size >
@@ -370,7 +389,7 @@
 		return 0;
 	}
 
-	if (!mail_index_check_header_compat(hdr)) {
+	if (!mail_index_check_header_compat(index, hdr, map->mmap_size)) {
 		/* Can't use this file */
 		return 0;
 	}
@@ -396,23 +415,47 @@
 	return 1;
 }
 
+static int mail_index_read_header(struct mail_index *index,
+				  void *buf, size_t buf_size, size_t *pos_r)
+{
+	size_t pos;
+	int ret;
+
+	memset(buf, 0, sizeof(struct mail_index_header));
+
+        /* try to read the whole header, but it's not necessarily an error to
+	   read less since the older versions of the index format could be
+	   smaller. Request reading up to buf_size, but accept if we only got
+	   the header. */
+        pos = 0;
+	do {
+		ret = pread(index->fd, PTR_OFFSET(buf, pos),
+			    buf_size - pos, pos);
+		if (ret > 0)
+			pos += ret;
+	} while (ret > 0 && pos < sizeof(struct mail_index_header));
+
+	*pos_r = pos;
+	return ret;
+}
+
 static int
-mail_index_read_map(struct mail_index *index, struct mail_index_map *map,
-		    bool *retry_r, bool try_retry)
+mail_index_try_read_map(struct mail_index *index, struct mail_index_map *map,
+			uoff_t file_size, bool *retry_r, bool try_retry)
 {
 	const struct mail_index_header *hdr;
-	struct stat st;
-	unsigned char buf[512];
+	unsigned char read_buf[4096];
+	const void *buf;
 	void *data = NULL;
 	ssize_t ret;
-	size_t pos, records_size;
-	unsigned int records_count;
+	size_t pos, records_size, initial_buf_pos = 0;
+	unsigned int records_count, extra;
 
 	i_assert(map->mmap_base == NULL);
 
 	*retry_r = FALSE;
-	ret = mail_index_read_header(index, buf, sizeof(buf), &pos);
-	hdr = (const struct mail_index_header *)buf;
+	ret = mail_index_read_header(index, read_buf, sizeof(read_buf), &pos);
+	buf = read_buf; hdr = buf;
 
 	if (pos > (ssize_t)offsetof(struct mail_index_header, major_version) &&
 	    hdr->major_version != MAIL_INDEX_MAJOR_VERSION) {
@@ -420,34 +463,14 @@
 		return 0;
 	}
 
-	if (fstat(index->fd, &st) < 0) {
-		mail_index_set_syscall_error(index, "fstat()");
-		return -1;
-	}
-
 	if (ret >= 0 && pos >= MAIL_INDEX_HEADER_MIN_SIZE &&
 	    (ret > 0 || pos >= hdr->base_header_size)) {
-		if (!mail_index_check_header_compat(hdr)) {
+		if (!mail_index_check_header_compat(index, hdr, file_size)) {
 			/* Can't use this file */
 			return 0;
 		}
 
-		if (hdr->base_header_size < MAIL_INDEX_HEADER_MIN_SIZE ||
-		    hdr->header_size < hdr->base_header_size) {
-			mail_index_set_error(index, "Corrupted index file %s: "
-				"Corrupted header sizes (base %u, full %u)",
-				index->filepath, hdr->base_header_size,
-				hdr->header_size);
-			return 0;
-		}
-		if (hdr->header_size > (uoff_t)st.st_size) {
-			mail_index_set_error(index, "Corrupted index file %s: "
-				"Corrupted header size (%u > %"PRIuUOFF_T")",
-				index->filepath, hdr->header_size,
-				st.st_size);
-			return 0;
-		}
-
+		initial_buf_pos = pos;
 		if (pos > hdr->header_size)
 			pos = hdr->header_size;
 
@@ -469,10 +492,10 @@
 		/* header read, read the records now. */
 		records_size = (size_t)hdr->messages_count * hdr->record_size;
 
-		if ((uoff_t)st.st_size - hdr->header_size < records_size ||
+		if (file_size - hdr->header_size < records_size ||
 		    (hdr->record_size != 0 &&
 		     records_size / hdr->record_size != hdr->messages_count)) {
-			records_count = (st.st_size - hdr->header_size) /
+			records_count = (file_size - hdr->header_size) /
 				hdr->record_size;
 			mail_index_set_error(index, "Corrupted index file %s: "
 				"messages_count too large (%u > %u)",
@@ -488,10 +511,20 @@
 
 		/* @UNSAFE */
 		buffer_set_used_size(map->buffer, 0);
-		data = buffer_append_space_unsafe(map->buffer, records_size);
-
-		ret = pread_full(index->fd, data, records_size,
-				 hdr->header_size);
+		if (initial_buf_pos <= hdr->header_size)
+			extra = 0;
+		else {
+			extra = initial_buf_pos - hdr->header_size;
+			buffer_append(map->buffer,
+				      CONST_PTR_OFFSET(buf, hdr->header_size),
+				      extra);
+		}
+		if (records_size > extra) {
+			data = buffer_append_space_unsafe(map->buffer,
+							  records_size - extra);
+			ret = pread_full(index->fd, data, records_size - extra,
+					 hdr->header_size + extra);
+		}
 	}
 
 	if (ret < 0) {
@@ -510,161 +543,23 @@
 		return 0;
 	}
 
-	map->records = data;
+	map->records = buffer_get_modifiable_data(map->buffer, NULL);
 	map->records_count = hdr->messages_count;
 
 	mail_index_map_copy_hdr(map, hdr);
 	map->hdr_base = map->hdr_copy_buf->data;
-
-	index->sync_log_file_seq = hdr->log_file_seq;
-	index->sync_log_file_offset = hdr->log_file_int_offset;
 	return 1;
 }
 
-static int mail_index_sync_from_transactions(struct mail_index *index,
-					     struct mail_index_map **map,
-					     bool sync_to_index)
-{
-	const struct mail_index_header *map_hdr = &(*map)->hdr;
-	struct mail_index_view *view;
-	struct mail_index_sync_map_ctx sync_map_ctx;
-	struct mail_index_header hdr;
-	const struct mail_transaction_header *thdr;
-	const void *tdata;
-	uint32_t prev_seq, max_seq;
-	uoff_t prev_offset, max_offset;
-	size_t pos;
-	int ret;
-	bool skipped, check_ext_offsets, broken;
-
-	if (sync_to_index) {
-		/* read the real log position where we are supposed to be
-		   synced */
-		ret = mail_index_read_header(index, &hdr, sizeof(hdr), &pos);
-		if (ret < 0 && errno != ESTALE) {
-			mail_index_set_syscall_error(index, "pread()");
-			return -1;
-		}
-		if (pos < MAIL_INDEX_HEADER_MIN_SIZE)
-			return 0;
-
-		if (map_hdr->log_file_seq == hdr.log_file_seq &&
-		    map_hdr->log_file_int_offset == hdr.log_file_int_offset) {
-			/* nothing to do */
-			return 1;
-		}
-
-		if (map_hdr->log_file_seq > hdr.log_file_seq ||
-		    (map_hdr->log_file_seq == hdr.log_file_seq &&
-		     map_hdr->log_file_int_offset > hdr.log_file_int_offset)) {
-			/* we went too far, have to re-read the file */
-			return 0;
-		}
-		if (map_hdr->log_file_ext_offset !=
-		    map_hdr->log_file_int_offset ||
-		    hdr.log_file_ext_offset != hdr.log_file_int_offset) {
-			/* too much trouble to get this right. */
-			return 0;
-		}
-		max_seq = hdr.log_file_seq;
-		max_offset = hdr.log_file_int_offset;
-	} else {
-		/* sync everything there is */
-		max_seq = (uint32_t)-1;
-		max_offset = (uoff_t)-1;
-	}
-
-	index->map = *map;
-
-	view = mail_index_view_open(index);
-	if (mail_transaction_log_view_set(view->log_view,
-					  map_hdr->log_file_seq,
-					  map_hdr->log_file_int_offset,
-					  max_seq, max_offset,
-					  MAIL_TRANSACTION_TYPE_MASK) <= 0) {
-		/* can't use it. sync by re-reading index. */
-		mail_index_view_close(&view);
-		index->map = NULL;
-		return 0;
-	}
-
-	mail_index_sync_map_init(&sync_map_ctx, view,
-				 MAIL_INDEX_SYNC_HANDLER_HEAD);
-
-	check_ext_offsets = TRUE; broken = FALSE;
-	while ((ret = mail_transaction_log_view_next(view->log_view, &thdr,
-						     &tdata, &skipped)) > 0) {
-		if ((thdr->type & MAIL_TRANSACTION_EXTERNAL) != 0 &&
-		    check_ext_offsets) {
-			if (mail_index_is_ext_synced(view->log_view,
-						     index->map))
-				continue;
-			check_ext_offsets = FALSE;
-		}
-
-		if (mail_index_sync_record(&sync_map_ctx, thdr, tdata) < 0) {
-			ret = 0;
-			broken = TRUE;
-			break;
-		}
-	}
-	if (ret == 0 && !broken)
-		ret = 1;
-
-	mail_transaction_log_view_get_prev_pos(view->log_view, &prev_seq,
-					       &prev_offset);
-	i_assert(prev_seq <= max_seq &&
-		 (prev_seq != max_seq || prev_offset <= max_offset));
-
-	index->map->hdr.log_file_seq = prev_seq;
-	index->map->hdr.log_file_int_offset =
-		index->map->hdr.log_file_ext_offset = prev_offset;
-
-	mail_index_sync_map_deinit(&sync_map_ctx);
-	mail_index_view_close(&view);
-
-	*map = index->map;
-	index->map = NULL;
-
-	if (sync_to_index && ret > 0) {
-		/* make sure we did everything right. note that although the
-		   message counts should be equal, the flag counters may not */
-		i_assert(hdr.messages_count == (*map)->hdr.messages_count);
-		i_assert(hdr.log_file_seq == (*map)->hdr.log_file_seq);
-		i_assert(hdr.log_file_int_offset == (*map)->hdr.log_file_int_offset);
-		i_assert(hdr.log_file_ext_offset == (*map)->hdr.log_file_ext_offset);
-	}
-
-	return ret;
-}
-
-static int mail_index_read_map_with_retry(struct mail_index *index,
-					  struct mail_index_map **map,
-					  bool sync_to_index)
+static int
+mail_index_read_map(struct mail_index *index, struct mail_index_map *map,
+		    uoff_t file_size)
 {
 	mail_index_sync_lost_handler_t *const *handlers;
+	struct stat st;
 	unsigned int i, count;
 	int ret;
-	bool retry;
-
-	if (index->log_locked) {
-		/* we're most likely syncing the index and we really don't
-		   want to read more than what was synced last time. */
-		sync_to_index = TRUE;
-	}
-
-	if ((*map)->hdr.indexid != 0 && index->log != NULL) {
-		/* we're not creating the index, or opening transaction log.
-		   sync this as a view from transaction log. */
-		index->sync_update = TRUE;
-		ret = mail_index_sync_from_transactions(index, map,
-							sync_to_index);
-		index->sync_update = FALSE;
-		if (ret != 0)
-			return ret;
-
-		/* transaction log lost/broken, fallback to re-reading it */
-	}
+	bool try_retry, retry;
 
 	/* notify all "sync lost" handlers */
 	handlers = array_get(&index->sync_lost_handlers, &count);
@@ -672,10 +567,17 @@
 		(*handlers[i])(index);
 
 	for (i = 0;; i++) {
-		ret = mail_index_read_map(index, *map, &retry,
-					  i < MAIL_INDEX_ESTALE_RETRY_COUNT);
+		try_retry = i < MAIL_INDEX_ESTALE_RETRY_COUNT;
+		if (file_size == (uoff_t)-1) {
+			/* fstat() below failed */
+			ret = 0;
+			retry = try_retry;
+		} else {
+			ret = mail_index_try_read_map(index, map, file_size,
+						      &retry, try_retry);
+		}
 		if (ret != 0 || !retry)
-			return ret;
+			break;
 
 		/* ESTALE - reopen index file */
                 if (close(index->fd) < 0)
@@ -691,193 +593,281 @@
 			}
 			return -1;
 		}
+		if (fstat(index->fd, &st) == 0)
+			file_size = st.st_size;
+		else {
+			if (errno != ESTALE) {
+				mail_index_set_syscall_error(index, "fstat()");
+				return -1;
+			}
+			file_size = (uoff_t)-1;
+		}
 	}
+	return ret;
+}
+
+static void mail_index_header_init(struct mail_index *index,
+				   struct mail_index_header *hdr)
+{
+	i_assert(index->indexid != 0);
+	i_assert((sizeof(*hdr) % sizeof(uint64_t)) == 0);
+
+	memset(hdr, 0, sizeof(*hdr));
+
+	hdr->major_version = MAIL_INDEX_MAJOR_VERSION;
+	hdr->minor_version = MAIL_INDEX_MINOR_VERSION;
+	hdr->base_header_size = sizeof(*hdr);
+	hdr->header_size = sizeof(*hdr);
+	hdr->record_size = sizeof(struct mail_index_record);
+
+#ifndef WORDS_BIGENDIAN
+	hdr->compat_flags |= MAIL_INDEX_COMPAT_LITTLE_ENDIAN;
+#endif
+
+	hdr->indexid = index->indexid;
+	hdr->log_file_seq = 1;
+	hdr->next_uid = 1;
 }
 
-static int mail_index_map_try_existing(struct mail_index *index)
+struct mail_index_map *mail_index_map_alloc(struct mail_index *index)
 {
-	struct mail_index_map *map = index->map;
-	const struct mail_index_header *hdr;
-	size_t used_size;
+	struct mail_index_map tmp_map;
+
+	memset(&tmp_map, 0, sizeof(tmp_map));
+	mail_index_header_init(index, &tmp_map.hdr);
+	tmp_map.hdr_base = &tmp_map.hdr;
+
+	/* a bit kludgy way to do this, but it initializes everything
+	   nicely and correctly */
+	return mail_index_map_clone(&tmp_map);
+}
+
+static int mail_index_map_latest_file(struct mail_index *index,
+				      struct mail_index_map **map,
+				      unsigned int *lock_id_r)
+{
+	struct mail_index_map *new_map;
+	struct stat st;
+	uoff_t file_size;
+	bool use_mmap;
 	int ret;
 
-	if (MAIL_INDEX_MAP_IS_IN_MEMORY(map))
-		return 0;
+	ret = mail_index_reopen_if_changed(index);
+	if (ret <= 0) {
+		if (ret < 0)
+			return -1;
 
-	hdr = map->mmap_base;
+		/* the index file is lost/broken. let's hope that we can
+		   build it from the transaction log. */
+		return 0;
+	}
 
-	/* always check corrupted-flag to avoid errors later */
-	if ((hdr->flags & MAIL_INDEX_HDR_FLAG_CORRUPTED) != 0)
+	/* the index file is still open, lock it */
+	if (mail_index_lock_shared(index, lock_id_r) < 0)
 		return -1;
 
-	used_size = hdr->header_size + hdr->messages_count * hdr->record_size;
-	if (map->mmap_size >= used_size && map->hdr_base == hdr) {
-		map->records_count = hdr->messages_count;
-		mail_index_map_copy_hdr(map, hdr);
+	if (fstat(index->fd, &st) == 0)
+		file_size = st.st_size;
+	else {
+		if (errno != ESTALE) {
+			mail_index_set_syscall_error(index, "fstat()");
+			return -1;
+		}
+		file_size = (uoff_t)-1;
+	}
+
+	/* mmaping seems to be slower than just reading the file, so even if
+	   mmap isn't disabled don't use it unless the file is large enough */
+	use_mmap = !index->mmap_disable && file_size != (uoff_t)-1 &&
+		file_size > MAIL_INDEX_MMAP_MIN_SIZE;
 
-		/* make sure the header is still valid. it also re-parses
-		   extensions although they shouldn't change without the whole
-		   index being recreated */
-		ret = mail_index_check_header(index, map);
-		if (ret > 0)
-			return 1;
-		/* broken. fallback to re-mmaping which will catch it */
+	new_map = mail_index_map_alloc(index);
+	ret = use_mmap ? mail_index_mmap(index, new_map, file_size) :
+		mail_index_read_map(index, new_map, file_size);
+	if (ret > 0) {
+		/* make sure the header is ok before using this mapping */
+		ret = mail_index_check_header(index, new_map);
+		if (ret >= 0)
+			ret = mail_index_parse_extensions(index, new_map);
+		if (ret++ == 0)
+			index->fsck = TRUE;
 	}
-	return 0;
+	if (ret <= 0) {
+		mail_index_unmap(index, &new_map);
+		return ret;
+	}
+
+	index->last_read_log_file_index_offset =
+		new_map->hdr.log_file_index_int_offset;
+	mail_index_unmap(index, map);
+	*map = new_map;
+	return 1;
 }
 
-int mail_index_map(struct mail_index *index, bool force)
+static int
+mail_index_map_update(struct mail_index *index, struct mail_index_map **_map,
+		      enum mail_index_sync_handler_type type,
+		      unsigned int *lock_id_r)
 {
-	struct mail_index_map *map;
+	struct mail_index_map *map = *_map;
+	unsigned int lock_id = 0;
 	int ret;
 
 	i_assert(!index->mapping);
-	i_assert(index->map == NULL || index->map->refcount > 0);
-	i_assert(index->lock_type != F_UNLCK);
+	i_assert(map->refcount > 0);
 
-	if (MAIL_INDEX_IS_IN_MEMORY(index)) {
-		if (index->map == NULL)
-			mail_index_create_in_memory(index, NULL);
-		return 1;
-	}
-
+	*lock_id_r = 0;
 	index->mapping = TRUE;
 
-	if (!force && index->map != NULL) {
-		i_assert(index->hdr != NULL);
-		ret = mail_index_map_try_existing(index);
-		if (ret != 0) {
-			index->mapping = FALSE;
-			return ret;
-		}
-
-		if (index->lock_type == F_WRLCK) {
-			/* we're syncing, don't break the mapping */
-			index->mapping = FALSE;
-			return 1;
-		}
+	/* first try updating the existing mapping from transaction log. */
+	if (map->hdr.indexid != 0) {
+		/* we're not creating the index, or opening transaction log.
+		   sync this as a view from transaction log. */
+		ret = mail_index_sync_map(index, &map, type, FALSE);
+	} else {
+		ret = 0;
 	}
 
-	if (index->map != NULL && index->map->refcount > 1) {
-		/* this map is already used by some views and they may have
-		   pointers into it. leave them and create a new mapping. */
-		if (!index->mmap_disable) {
-			map = NULL;
-		} else {
-			/* create a copy of the mapping instead so we don't
-			   have to re-read it */
-			map = mail_index_map_clone(index->map,
-						   index->map->hdr.record_size);
-		}
-		index->map->refcount--;
-		index->map = NULL;
-	} else {
-		map = index->map;
+	if (ret == 0) {
+		/* try to open and read the latest index. if it fails for
+		   any reason, we'll fallback to updating the existing mapping
+		   from transaction logs (which we'll also do even if the
+		   reopening succeeds) */
+		(void)mail_index_map_latest_file(index, &map, &lock_id);
+
+		/* and update the map with the latest changes from
+		   transaction log */
+		ret = mail_index_sync_map(index, &map, type, TRUE);
+
+		/* we need the lock only if we didn't move the map to memory */
+		if (!MAIL_INDEX_MAP_IS_IN_MEMORY(map))
+			*lock_id_r = lock_id;
+		else
+			mail_index_unlock(index, lock_id);
 	}
 
-	if (map == NULL) {
-		map = i_new(struct mail_index_map, 1);
-		map->refcount = 1;
-		map->hdr_copy_buf =
-			buffer_create_dynamic(default_pool, sizeof(map->hdr));
-	} else if (MAIL_INDEX_MAP_IS_IN_MEMORY(map)) {
-		i_assert(!map->write_to_disk);
-	} else if (map->mmap_base != NULL) {
-		i_assert(map->buffer == NULL);
-		if (munmap(map->mmap_base, map->mmap_size) < 0)
-			mail_index_set_syscall_error(index, "munmap()");
-		map->mmap_base = NULL;
+	if (ret <= 0) {
+		/* broken index */
+		mail_index_map_clear(index, map);
+		mail_index_unmap(index, &map);
 	}
 
+	*_map = map;
+	index->mapping = FALSE;
+	return ret;
+}
+
+int mail_index_map(struct mail_index *index,
+		   enum mail_index_sync_handler_type type,
+		   unsigned int *lock_id_r)
+{
+	struct mail_index_map *map = index->map;
+	int ret;
+
+	i_assert(index->lock_type != F_WRLCK);
+
+	if (map == NULL)
+		map = mail_index_map_alloc(index);
+
 	index->hdr = NULL;
 	index->map = NULL;
 
-	if (!index->mmap_disable)
-		ret = mail_index_mmap(index, map);
-	else
-		ret = mail_index_read_map_with_retry(index, &map, force);
+	ret = mail_index_map_update(index, &map, type, lock_id_r);
 	i_assert(index->map == NULL);
 
 	if (ret > 0) {
-		ret = mail_index_check_header(index, map);
-		if (ret < 0)
-			ret = 0;
-		else if (ret == 0) {
-			index->fsck = TRUE;
-			ret = 1;
+		i_assert(map->hdr.messages_count == map->records_count);
+		index->hdr = &map->hdr;
+		index->map = map;
+	} else {
+		if (map != NULL)
+			mail_index_unmap(index, &map);
+	}
+	return ret;
+}
+
+void mail_index_unmap(struct mail_index *index, struct mail_index_map **_map)
+{
+	struct mail_index_map *map = *_map;
+
+	*_map = NULL;
+	if (--map->refcount > 0)
+		return;
+
+	i_assert(map->refcount == 0);
+	mail_index_map_clear(index, map);
+	if (map->extension_pool != NULL)
+		pool_unref(map->extension_pool);
+	if (array_is_created(&map->keyword_idx_map))
+		array_free(&map->keyword_idx_map);
+	buffer_free(map->hdr_copy_buf);
+	i_free(map);
+}
+
+static void mail_index_map_copy(struct mail_index_map *dest,
+				const struct mail_index_map *src)
+{
+	const struct mail_index_header *src_hdr;
+	size_t size;
+
+	src_hdr = src->mmap_base != NULL ? src->mmap_base : src->hdr_base;
+
+	/* copy records */
+	size = src->records_count * src->hdr.record_size;
+	dest->buffer = buffer_create_dynamic(default_pool, size);
+	buffer_append(dest->buffer, src->records, size);
+
+	dest->records = buffer_get_modifiable_data(dest->buffer, NULL);
+	dest->records_count = src->records_count;
+
+	if (src->mmap_base == NULL)
+		dest->hdr = src->hdr;
+	else {
+		/* refresh the header */
+		memcpy(&dest->hdr, src_hdr, src->hdr.base_header_size);
+		/* fix base header size if needed */
+		if (dest->hdr.base_header_size < sizeof(dest->hdr)) {
+			dest->hdr.base_header_size = sizeof(dest->hdr);
+			dest->hdr.header_size +=
+				sizeof(dest->hdr) - dest->hdr.base_header_size;
 		}
 	}
 
-	if (ret <= 0) {
-		mail_index_map_clear(index, map);
-		mail_index_unmap(index, &map);
-		index->mapping = FALSE;
-		return ret;
+	/* copy header */
+	if (dest->hdr_copy_buf != NULL)
+		buffer_set_used_size(dest->hdr_copy_buf, 0);
+	else {
+		dest->hdr_copy_buf =
+			buffer_create_dynamic(default_pool,
+					      dest->hdr.header_size);
 	}
-
-	index->hdr = &map->hdr;
-	index->map = map;
-	i_assert(map->hdr.messages_count == map->records_count);
-	index->mapping = FALSE;
-	return 1;
+	buffer_append(dest->hdr_copy_buf, &dest->hdr, sizeof(dest->hdr));
+	buffer_append(dest->hdr_copy_buf,
+		      CONST_PTR_OFFSET(src_hdr, src_hdr->base_header_size),
+		      src_hdr->header_size - src_hdr->base_header_size);
+	dest->hdr_base = buffer_get_modifiable_data(dest->hdr_copy_buf, NULL);
 }
 
-struct mail_index_map *
-mail_index_map_clone(const struct mail_index_map *map, uint32_t new_record_size)
+struct mail_index_map *mail_index_map_clone(const struct mail_index_map *map)
 {
 	struct mail_index_map *mem_map;
-	struct mail_index_header *hdr;
 	struct mail_index_ext *extensions;
-	void *src, *dest;
-	size_t size, copy_size;
 	unsigned int i, count;
 
-        size = map->records_count * new_record_size;
-
 	mem_map = i_new(struct mail_index_map, 1);
 	mem_map->refcount = 1;
-	mem_map->buffer = buffer_create_dynamic(default_pool, size);
-	if (map->hdr.record_size == new_record_size)
-		buffer_append(mem_map->buffer, map->records, size);
-	else {
-		copy_size = I_MIN(map->hdr.record_size, new_record_size);
-		src = map->records;
-		for (i = 0; i < map->records_count; i++) {
-			dest = buffer_append_space_unsafe(mem_map->buffer,
-							  new_record_size);
-			memcpy(dest, src, copy_size);
-			src = PTR_OFFSET(src, map->hdr.record_size);
-		}
-	}
 
-	mem_map->records = buffer_get_modifiable_data(mem_map->buffer, NULL);
-	mem_map->records_count = map->records_count;
+	mail_index_map_copy(mem_map, map);
 
-	mem_map->hdr_copy_buf =
-		buffer_create_dynamic(default_pool, map->hdr.header_size);
-	if (map->hdr.base_header_size < sizeof(*hdr))
-		buffer_append_zero(mem_map->hdr_copy_buf, sizeof(*hdr));
-	buffer_write(mem_map->hdr_copy_buf, 0,
-		     &map->hdr, map->hdr.base_header_size);
-	buffer_append(mem_map->hdr_copy_buf,
-		      CONST_PTR_OFFSET(map->hdr_base,
-				       map->hdr.base_header_size),
-		      map->hdr.header_size - map->hdr.base_header_size);
-
-	hdr = buffer_get_modifiable_data(mem_map->hdr_copy_buf, NULL);
-	if (hdr->base_header_size < sizeof(*hdr))
-		hdr->base_header_size = sizeof(*hdr);
-	hdr->record_size = new_record_size;
-	mem_map->hdr = *hdr;
-	mem_map->hdr_base = hdr;
-
-	/* if we're syncing transaction log into memory and later use the
-	   mapping for updating the index, we need to remember what has
-	   changed */
-	mem_map->write_atomic = map->write_atomic;
-	if (map->write_to_disk) {
+	/* if the map is ever written back to disk, we need to keep track of
+	   what has changed. */
+	if (map->write_atomic)
+		mem_map->write_atomic = TRUE;
+	else {
 		mem_map->write_seq_first = map->write_seq_first;
 		mem_map->write_seq_last = map->write_seq_last;
+		mem_map->write_base_header = map->write_base_header;
+		mem_map->write_ext_header = map->write_ext_header;
 	}
 
 	/* copy extensions */
@@ -892,7 +882,8 @@
 		extensions = array_get_modifiable(&mem_map->extensions, &count);
 		for (i = 0; i < count; i++) {
 			i_assert(extensions[i].record_offset +
-				 extensions[i].record_size <= hdr->record_size);
+				 extensions[i].record_size <=
+				 mem_map->hdr.record_size);
 			extensions[i].name = p_strdup(mem_map->extension_pool,
 						      extensions[i].name);
 		}
@@ -901,6 +892,18 @@
 	return mem_map;
 }
 
+void mail_index_map_move_to_memory(struct mail_index_map *map)
+{
+	if (map->mmap_base == NULL)
+		return;
+
+	mail_index_map_copy(map, map);
+
+	if (munmap(map->mmap_base, map->mmap_size) < 0)
+		i_error("munmap(index map) failed: %m");
+	map->mmap_base = NULL;
+}
+
 int mail_index_map_get_ext_idx(struct mail_index_map *map,
 			       uint32_t ext_id, uint32_t *idx_r)
 {
--- a/src/lib-index/mail-index-private.h	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-private.h	Mon Jun 11 14:50:10 2007 +0300
@@ -10,11 +10,10 @@
 struct mail_transaction_log_view;
 struct mail_index_sync_map_ctx;
 
+/* How large index files to mmap() instead of reading to memory. */
+#define MAIL_INDEX_MMAP_MIN_SIZE (1024*256)
 /* How many seconds to wait a lock for index file. */
 #define MAIL_INDEX_LOCK_SECS 120
-/* Index file is grown exponentially when we're adding less than this many
-   records. */
-#define MAIL_INDEX_MAX_POWER_GROW (1024*1024 / sizeof(struct mail_index_record))
 /* How many times to retry opening index files if read/fstat returns ESTALE.
    This happens with NFS when the file has been deleted (ie. index file was
    rewritten by another computer than us). */
@@ -121,14 +120,15 @@
 
 	ARRAY_DEFINE(keyword_idx_map, unsigned int); /* file -> index */
 
-	/* If write_to_disk=TRUE and write_atomic=FALSE, these sequences
-	   specify the range that needs to be written. Header should always
-	   be rewritten. */
+	/* If this mapping is written to disk and write_atomic=FALSE,
+	   write_seq_* specify the message sequence range that needs to be
+	   written. */
 	uint32_t write_seq_first, write_seq_last;
 
 	unsigned int keywords_read:1;
-	unsigned int write_to_disk:1;
-	unsigned int write_atomic:1; /* copy to new file and rename() */
+	unsigned int write_base_header:1;
+	unsigned int write_ext_header:1;
+	unsigned int write_atomic:1; /* write to a new file and rename() */
 };
 
 struct mail_index_module_register {
@@ -156,9 +156,10 @@
 	char *filepath;
 	int fd;
 
-        struct mail_index_map *map;
 	const struct mail_index_header *hdr;
+	struct mail_index_map *map;
 	uint32_t indexid;
+	uint32_t last_read_log_file_index_offset;
 
 	int lock_type, shared_lock_count, excl_lock_count;
 	unsigned int lock_id;
@@ -167,17 +168,11 @@
 	struct file_lock *file_lock;
 	struct dotlock *dotlock;
 
-	/* These are typically same as map->hdr->log_file_*, but with
-	   mmap_disable we may have synced more than index */
-	uint32_t sync_log_file_seq;
-	uoff_t sync_log_file_offset;
-
 	pool_t keywords_pool;
 	ARRAY_TYPE(keywords) keywords;
 	struct hash_table *keywords_hash; /* name -> idx */
 
 	uint32_t keywords_ext_id;
-	unsigned int last_grow_count;
 
 	/* Module-specific contexts. */
 	ARRAY_DEFINE(module_contexts, union mail_index_module_context *);
@@ -193,7 +188,6 @@
 	unsigned int use_excl_dotlocks:1;
 	unsigned int readonly:1;
 	unsigned int fsck:1;
-	unsigned int sync_update:1;
 	unsigned int mapping:1;
 };
 
@@ -216,25 +210,19 @@
 void mail_index_unregister_sync_lost_handler(struct mail_index *index,
 					mail_index_sync_lost_handler_t *cb);
 
-int mail_index_read_header(struct mail_index *index,
-			   void *buf, size_t buf_size, size_t *pos_r);
 int mail_index_write_base_header(struct mail_index *index,
 				 const struct mail_index_header *hdr);
 
-int mail_index_try_open_only(struct mail_index *index);
-int mail_index_reopen(struct mail_index *index, int fd);
-void mail_index_create_in_memory(struct mail_index *index,
-				 const struct mail_index_header *hdr);
 int mail_index_create_tmp_file(struct mail_index *index, const char **path_r);
 
-/* Returns 0 = ok, -1 = error. If update_index is TRUE, reopens the index
-   file if needed to get later version of it (not necessarily latest due to
-   races, unless transaction log is exclusively locked). */
-int mail_index_lock_shared(struct mail_index *index, bool update_index,
-			   unsigned int *lock_id_r);
+int mail_index_try_open_only(struct mail_index *index);
+int mail_index_reopen_if_changed(struct mail_index *index);
+
 /* Returns 0 = ok, -1 = error. */
-int mail_index_lock_exclusive(struct mail_index *index,
-			      unsigned int *lock_id_r);
+int mail_index_lock_shared(struct mail_index *index, unsigned int *lock_id_r);
+/* Returns 1 = ok, 0 = already locked, -1 = error. */
+int mail_index_try_lock_exclusive(struct mail_index *index,
+				  unsigned int *lock_id_r);
 void mail_index_unlock(struct mail_index *index, unsigned int lock_id);
 /* Returns TRUE if given lock_id is valid. */
 bool mail_index_is_locked(struct mail_index *index, unsigned int lock_id);
@@ -243,24 +231,29 @@
 		       int lock_type, unsigned int timeout_secs,
 		       struct file_lock **lock_r);
 
-/* Reopen index file if it has changed. */
-int mail_index_reopen_if_needed(struct mail_index *index);
+/* Allocate a new empty map. */
+struct mail_index_map *mail_index_map_alloc(struct mail_index *index);
+/* Replace index->map with the latest index changes. This may reopen the index
+   file and/or it may read the latest changes from transaction log. The log is
+   read up to EOF, but non-synced expunges are skipped.
+
+   If mapping required reading the index file, it's shared locked and lock_id
+   is returned. Otherwise returned lock_id is 0.
 
-/* Map index file to memory, replacing the previous mapping for index.
-   Returns 1 = ok, 0 = corrupted, -1 = error. If index needs fscking, it
-   returns 1 but sets index->fsck = TRUE. */
-int mail_index_map(struct mail_index *index, bool force);
-/* Read the latest available header. Normally this is pretty much the same as
-   calling mail_index_map(), but with mmap_disable the header can be generated
-   by reading just log files, so eg. log_file_*_offset values can be wrong.
-   Returns 1 = ok, 0 = EOF, -1 = error. */
-int mail_index_get_latest_header(struct mail_index *index,
-				 struct mail_index_header *hdr_r);
+   Returns 1 = ok, 0 = corrupted, -1 = error. If non-fatal problems were found,
+   1 is returned but index->fsck=TRUE is set. */
+int mail_index_map(struct mail_index *index,
+		   enum mail_index_sync_handler_type type,
+		   unsigned int *lock_id_r);
+/* Return the latest index file's header. This should be used only when you
+   don't want to see later changes from transaction log.
+   Returns 1 = ok, 0 = corrupted, -1 = error. */
+int mail_index_get_last_written_header(struct mail_index *index,
+				       struct mail_index_header *hdr_r);
 /* Unreference given mapping and unmap it if it's dropped to zero. */
 void mail_index_unmap(struct mail_index *index, struct mail_index_map **map);
-struct mail_index_map *
-mail_index_map_clone(const struct mail_index_map *map,
-		     uint32_t new_record_size);
+struct mail_index_map *mail_index_map_clone(const struct mail_index_map *map);
+void mail_index_map_move_to_memory(struct mail_index_map *map);
 
 uint32_t mail_index_map_lookup_ext(struct mail_index_map *map,
 				   const char *name);
@@ -279,11 +272,6 @@
 int mail_index_map_parse_keywords(struct mail_index *index,
                                   struct mail_index_map *map);
 
-int mail_index_fix_header(struct mail_index *index, struct mail_index_map *map,
-			  struct mail_index_header *hdr, const char **error_r);
-bool mail_index_is_ext_synced(struct mail_transaction_log_view *log_view,
-			      struct mail_index_map *map);
-
 void mail_index_view_transaction_ref(struct mail_index_view *view);
 void mail_index_view_transaction_unref(struct mail_index_view *view);
 
--- a/src/lib-index/mail-index-sync-ext.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-sync-ext.c	Mon Jun 11 14:50:10 2007 +0300
@@ -145,31 +145,42 @@
 	return (int)(*e2)->record_align - (int)(*e1)->record_align;
 }
 
-static struct mail_index_map *
-sync_ext_reorder(struct mail_index_map *map, uint32_t ext_id, uint16_t old_size)
+static void sync_ext_reorder(struct mail_index_map *map, uint32_t ext_id,
+			     uint16_t old_ext_size)
 {
-	struct mail_index_map *new_map;
 	struct mail_index_ext *ext, **sorted;
 	struct mail_index_ext_header *ext_hdr;
-	uint16_t *old_offsets, min_align, max_align;
-	uint32_t offset, old_records_count, rec_idx;
+	uint16_t *old_offsets, *copy_sizes, min_align, max_align;
+	uint32_t offset, new_record_size, rec_idx;
 	unsigned int i, count;
 	const void *src;
+	buffer_t *new_buffer;
+	size_t new_buffer_size;
+
+	i_assert(MAIL_INDEX_MAP_IS_IN_MEMORY(map) && map->refcount == 1);
 
 	t_push();
 	ext = array_get_modifiable(&map->extensions, &count);
 
 	/* @UNSAFE */
 	old_offsets = t_new(uint16_t, count);
+	copy_sizes = t_new(uint16_t, count);
 	sorted = t_new(struct mail_index_ext *, count);
 	for (i = 0; i < count; i++) {
 		old_offsets[i] = ext[i].record_offset;
+		copy_sizes[i] = ext[i].record_size;
 		ext[i].record_offset = 0;
 		sorted[i] = &ext[i];
 	}
 	qsort(sorted, count, sizeof(struct mail_index_ext *),
 	      mail_index_ext_align_cmp);
 
+	if (copy_sizes[ext_id] > old_ext_size) {
+		/* we are growing the extension record. remember this
+		   so we don't write extra data while copying the record */
+		copy_sizes[ext_id] = old_ext_size;
+	}
+
 	/* we simply try to use the extensions with largest alignment
 	   requirement first. FIXME: if the extension sizes don't match
 	   alignmentation, this may not give the minimal layout. */
@@ -208,56 +219,47 @@
 		/* keep record size divisible with maximum alignment */
 		offset += max_align - (offset % max_align);
 	}
-
-	/* create a new mapping without records. a bit kludgy. */
-	old_records_count = map->records_count;
-	map->records_count = 0;
-	new_map = mail_index_map_clone(map, offset);
-	map->records_count = old_records_count;
+	new_record_size = offset;
 
-	if (old_size > ext[ext_id].record_size) {
-		/* we are shrinking the record */
-		old_size = ext[ext_id].record_size;
-	}
-
-	/* now copy the records to new mapping */
+	/* copy the records to new buffer */
+	new_buffer_size = map->records_count * new_record_size;
+	new_buffer = buffer_create_dynamic(default_pool, new_buffer_size);
 	src = map->records;
 	offset = 0;
-	for (rec_idx = 0; rec_idx < old_records_count; rec_idx++) {
-		buffer_write(new_map->buffer, offset, src,
+	for (rec_idx = 0; rec_idx < map->records_count; rec_idx++) {
+		/* write the base record */
+		buffer_write(new_buffer, offset, src,
 			     sizeof(struct mail_index_record));
+
+		/* write extensions */
 		for (i = 0; i < count; i++) {
-			buffer_write(new_map->buffer,
-				     offset + ext[i].record_offset,
+			buffer_write(new_buffer, offset + ext[i].record_offset,
 				     CONST_PTR_OFFSET(src, old_offsets[i]),
-				     i == ext_id ? old_size :
-				     ext[i].record_size);
+				     copy_sizes[i]);
 		}
 		src = CONST_PTR_OFFSET(src, map->hdr.record_size);
-		offset += new_map->hdr.record_size;
+		offset += new_record_size;
 	}
 
-	if (new_map->buffer->used !=
-	    old_records_count * new_map->hdr.record_size) {
+	if (new_buffer->used != new_buffer_size) {
 		/* we didn't fully write the last record */
-		size_t space = old_records_count * new_map->hdr.record_size -
-			new_map->buffer->used;
-		i_assert(space < new_map->hdr.record_size);
-		buffer_append_zero(new_map->buffer, space);
+		size_t space = new_buffer_size - new_buffer->used;
+		i_assert(space < new_record_size);
+		buffer_append_zero(new_buffer, space);
 	}
 
-	new_map->records = buffer_get_modifiable_data(new_map->buffer, NULL);
-	new_map->records_count = old_records_count;
-	i_assert(new_map->records_count == new_map->hdr.messages_count);
+	buffer_free(map->buffer);
+	map->buffer = new_buffer;
+	map->records = buffer_get_modifiable_data(map->buffer, NULL);
+	map->hdr.record_size = new_record_size;
 
 	/* update record offsets in headers */
 	for (i = 0; i < count; i++) {
-                ext_hdr = get_ext_header(new_map, &ext[i]);
+                ext_hdr = get_ext_header(map, &ext[i]);
 		ext_hdr->record_offset = ext[i].record_offset;
 	}
 
 	t_pop();
-	return new_map;
 }
 
 static void
@@ -300,6 +302,7 @@
 		i_assert((map->hdr_copy_buf->used % sizeof(uint64_t)) == 0);
 		map->hdr_base = map->hdr_copy_buf->data;
 		map->hdr.header_size = map->hdr_copy_buf->used;
+		map->write_base_header = map->write_ext_header = TRUE;
 
 		ext_hdr = get_ext_header(map, ext);
 		ext_hdr->reset_id = ext->reset_id;
@@ -320,12 +323,11 @@
 	}
 
 	if (old_record_size != u->record_size) {
-		map = sync_ext_reorder(map, ext_id, old_record_size);
-		mail_index_sync_replace_map(ctx, map);
+		map = mail_index_sync_get_atomic_map(ctx);
+		sync_ext_reorder(map, ext_id, old_record_size);
 	} else if (modified) {
 		/* header size changed. recreate index file. */
-		map = mail_index_map_clone(map, map->hdr.record_size);
-		mail_index_sync_replace_map(ctx, map);
+		map = mail_index_sync_get_atomic_map(ctx);
 	}
 }
 
@@ -381,17 +383,10 @@
 		return 1;
 	}
 
-	if (map->refcount != 1) {
-		/* below we'll first add the extension to the mapping, and then
-		   call sync_ext_reorder() which clones the map. that however
-		   leaves this mapping with the new extension, but without
-		   a resized record_size. if the mapping is still used
-		   elsewhere, it will create problems. so here we'll just make
-		   sure that the partially updated mapping will get destroyed
-		   once the resize is complete. */
-		map = mail_index_map_clone(map, map->hdr.record_size);
-		mail_index_sync_replace_map(ctx, map);
-	}
+	/* be sure to get a unique mapping before we modify the extensions,
+	   otherwise other map users will see the new extension but not the
+	   data records that sync_ext_reorder() adds. */
+	map = mail_index_sync_get_atomic_map(ctx);
 
 	hdr_buf = map->hdr_copy_buf;
 	if (MAIL_INDEX_HEADER_SIZE_ALIGN(hdr_buf->used) != hdr_buf->used) {
@@ -403,7 +398,7 @@
 
 	/* register record offset initially using zero,
 	   sync_ext_reorder() will fix it. */
-	hdr_offset = map->hdr_copy_buf->used + sizeof(ext_hdr) + strlen(name);
+	hdr_offset = hdr_buf->used + sizeof(ext_hdr) + strlen(name);
 	hdr_offset = MAIL_INDEX_HEADER_SIZE_ALIGN(hdr_offset);
 	ext_id = mail_index_map_register_ext(ctx->view->index, map, name,
 					     hdr_offset, u->hdr_size, 0,
@@ -431,14 +426,12 @@
 	i_assert((hdr_buf->used % sizeof(uint64_t)) == 0);
 
 	map->hdr.header_size = hdr_buf->used;
-	map->hdr_base = map->hdr_copy_buf->data;
+	map->hdr_base = hdr_buf->data;
 
 	t_pop();
 
         mail_index_sync_init_handlers(ctx);
-
-	map = sync_ext_reorder(map, ext_id, 0);
-	mail_index_sync_replace_map(ctx, map);
+	sync_ext_reorder(map, ext_id, 0);
 
 	ctx->cur_ext_ignore = FALSE;
 	ctx->cur_ext_id = ext_id;
@@ -463,12 +456,9 @@
 	if (ctx->cur_ext_ignore)
 		return 1;
 
-	if (!map->write_to_disk || map->refcount != 1) {
-		/* a new index file will be created, so the old data won't be
-		   accidentally used by other processes. */
-		map = mail_index_map_clone(map, map->hdr.record_size);
-		mail_index_sync_replace_map(ctx, map);
-	}
+	/* a new index file will be created, so the old data won't be
+	   accidentally used by other processes. */
+	map = mail_index_sync_get_atomic_map(ctx);
 
 	ext = array_idx_modifiable(&map->extensions, ctx->cur_ext_id);
 	ext->reset_id = u->new_reset_id;
@@ -510,6 +500,8 @@
 	buffer_write(map->hdr_copy_buf, ext->hdr_offset + u->offset,
 		     u + 1, u->size);
 	map->hdr_base = map->hdr_copy_buf->data;
+
+	map->write_ext_header = TRUE;
 	return 1;
 }
 
@@ -533,6 +525,7 @@
 
 	if (seq == 0)
 		return 1;
+	mail_index_sync_move_to_private(ctx);
 
 	ext = array_idx(&view->map->extensions, ctx->cur_ext_id);
 
@@ -553,10 +546,7 @@
 			return ret;
 	}
 
-	if (view->map->write_seq_first == 0 || view->map->write_seq_first > seq)
-		view->map->write_seq_first = seq;
-	if (view->map->write_seq_last < seq)
-                view->map->write_seq_last = seq;
+	mail_index_sync_write_seq_update(ctx, seq, seq);
 
 	/* @UNSAFE */
 	memcpy(old_data, u + 1, ext->record_size);
--- a/src/lib-index/mail-index-sync-keywords.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-sync-keywords.c	Mon Jun 11 14:50:10 2007 +0300
@@ -120,14 +120,10 @@
 	unsigned int keywords_count;
 	int ret;
 
-	if (!map->write_to_disk) {
-		/* if we crash in the middle of writing the header, the
-		   keywords are more or less corrupted. avoid that by
-		   making sure the header is updated atomically. */
-		map = mail_index_map_clone(map, map->hdr.record_size);
-		mail_index_sync_replace_map(ctx, map);
-	}
-	i_assert(MAIL_INDEX_MAP_IS_IN_MEMORY(map));
+	/* if we crash in the middle of writing the header, the
+	   keywords are more or less corrupted. avoid that by
+	   making sure the header is updated atomically. */
+	map = mail_index_sync_get_atomic_map(ctx);
 
 	ext_id = mail_index_map_lookup_ext(map, "keywords");
 	if (ext_id != (uint32_t)-1) {
@@ -199,12 +195,12 @@
 }
 
 static int
-keywords_update_records(struct mail_index_view *view,
+keywords_update_records(struct mail_index_sync_map_ctx *ctx,
 			const struct mail_index_ext *ext,
-			unsigned int keyword_idx,
-			enum modify_type type,
+			unsigned int keyword_idx, enum modify_type type,
 			uint32_t uid1, uint32_t uid2)
 {
+	struct mail_index_view *view = ctx->view;
 	struct mail_index_record *rec;
 	unsigned char *data, data_mask;
 	unsigned int data_offset;
@@ -218,11 +214,8 @@
 	if (seq1 == 0)
 		return 1;
 
-	if (view->map->write_seq_first == 0 ||
-	    view->map->write_seq_first > seq1)
-		view->map->write_seq_first = seq1;
-	if (view->map->write_seq_last < seq2)
-		view->map->write_seq_last = seq2;
+	mail_index_sync_move_to_private(ctx);
+	mail_index_sync_write_seq_update(ctx, seq1, seq2);
 
 	data_offset = keyword_idx / CHAR_BIT;
 	data_mask = 1 << (keyword_idx % CHAR_BIT);
@@ -320,7 +313,7 @@
 			return -1;
 		}
 
-		ret = keywords_update_records(ctx->view, ext, keyword_idx,
+		ret = keywords_update_records(ctx, ext, keyword_idx,
 					      rec->modify_type,
 					      uid[0], uid[1]);
 		if (ret <= 0)
@@ -358,11 +351,8 @@
 		if (seq1 == 0)
 			continue;
 
-		if (map->write_seq_first == 0 || map->write_seq_first > seq1)
-			map->write_seq_first = seq1;
-		if (map->write_seq_last < seq2)
-			map->write_seq_last = seq2;
-
+		mail_index_sync_move_to_private(ctx);
+		mail_index_sync_write_seq_update(ctx, seq1, seq2);
 		for (seq1--; seq1 < seq2; seq1++) {
 			rec = MAIL_INDEX_MAP_IDX(map, seq1);
 			memset(PTR_OFFSET(rec, ext->record_offset),
--- a/src/lib-index/mail-index-sync-private.h	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-sync-private.h	Mon Jun 11 14:50:10 2007 +0300
@@ -8,26 +8,6 @@
 };
 ARRAY_DEFINE_TYPE(uid_range, struct uid_range);
 
-struct mail_index_sync_ctx {
-	struct mail_index *index;
-	struct mail_index_view *view;
-	struct mail_index_transaction *sync_trans, *ext_trans;
-
-	const struct mail_transaction_header *hdr;
-	const void *data;
-
-	ARRAY_DEFINE(sync_list, struct mail_index_sync_list);
-	uint32_t next_uid;
-
-	uint32_t append_uid_first, append_uid_last;
-
-	unsigned int lock_id;
-
-	unsigned int sync_appends:1;
-	unsigned int sync_recent:1;
-	unsigned int sync_dirty:1;
-};
-
 struct mail_index_sync_list {
 	const ARRAY_TYPE(uid_range) *array;
 	unsigned int idx;
@@ -69,15 +49,18 @@
 			      struct mail_index_view *view,
 			      enum mail_index_sync_handler_type type);
 void mail_index_sync_map_deinit(struct mail_index_sync_map_ctx *sync_map_ctx);
-int mail_index_sync_update_index(struct mail_index_sync_ctx *sync_ctx,
-				 bool sync_only_external);
+int mail_index_sync_map(struct mail_index *index, struct mail_index_map **map,
+			enum mail_index_sync_handler_type type, bool force);
 
 int mail_index_sync_record(struct mail_index_sync_map_ctx *ctx,
 			   const struct mail_transaction_header *hdr,
 			   const void *data);
 
-void mail_index_sync_replace_map(struct mail_index_sync_map_ctx *ctx,
-				 struct mail_index_map *map);
+void mail_index_sync_move_to_private(struct mail_index_sync_map_ctx *ctx);
+struct mail_index_map *
+mail_index_sync_get_atomic_map(struct mail_index_sync_map_ctx *ctx);
+void mail_index_sync_write_seq_update(struct mail_index_sync_map_ctx *ctx,
+				      uint32_t seq1, uint32_t seq2);
 
 void mail_index_sync_init_expunge_handlers(struct mail_index_sync_map_ctx *ctx);
 void
--- a/src/lib-index/mail-index-sync-update.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-sync-update.c	Mon Jun 11 14:50:10 2007 +0300
@@ -3,8 +3,6 @@
 #include "lib.h"
 #include "ioloop.h"
 #include "array.h"
-#include "buffer.h"
-#include "file-set-size.h"
 #include "mmap-util.h"
 #include "mail-index-view-private.h"
 #include "mail-index-sync-private.h"
@@ -35,94 +33,99 @@
 		prev_offset = ctx->ext_intro_offset;
 	}
 
-	if (!ctx->sync_only_external) {
-		i_assert(prev_offset >= map->hdr.log_file_int_offset ||
+	if (!ctx->sync_only_external) { // FIXME: never happens
+		i_assert(prev_offset >= map->hdr.log_file_index_int_offset ||
 			 prev_seq > map->hdr.log_file_seq);
-		map->hdr.log_file_int_offset = prev_offset;
+		map->hdr.log_file_index_int_offset = prev_offset;
 	} else if (map->hdr.log_file_seq != prev_seq) {
 		/* log sequence changed. update internal offset to
 		   beginning of the new file. */
-		i_assert(map->hdr.log_file_int_offset ==
+		i_assert(map->hdr.log_file_index_int_offset ==
 			 ctx->view->index->log->head->hdr.prev_file_offset);
-		map->hdr.log_file_int_offset =
+		map->hdr.log_file_index_int_offset =
 			ctx->view->index->log->head->hdr.hdr_size;
 	}
 
 	/* we might be in the middle of syncing internal transactions, with
 	   some of the following external transactions already synced. */
 	i_assert(prev_seq > map->hdr.log_file_seq ||
-		 prev_offset >= map->hdr.log_file_ext_offset ||
+		 prev_offset >= map->hdr.log_file_index_ext_offset ||
 		 (!eol && !ctx->sync_only_external));
 	if (map->hdr.log_file_seq != prev_seq ||
-	    prev_offset > map->hdr.log_file_ext_offset) {
+	    prev_offset > map->hdr.log_file_index_ext_offset) {
 		map->hdr.log_file_seq = prev_seq;
-		map->hdr.log_file_ext_offset = prev_offset;
+		map->hdr.log_file_index_ext_offset = prev_offset;
 	}
 }
 
+#if 0 // FIXME: can we / do we want to support this?
 static int
 mail_index_map_msync(struct mail_index *index, struct mail_index_map *map)
 {
-	unsigned int base_size;
-
-	if (MAIL_INDEX_MAP_IS_IN_MEMORY(map))
+	if (MAIL_INDEX_MAP_IS_IN_MEMORY(map)) {
+		buffer_write(map->hdr_copy_buf, 0, &map->hdr, sizeof(map->hdr));
 		return 0;
+	}
 
-	base_size = I_MIN(map->hdr.base_header_size, sizeof(map->hdr));
-	map->mmap_used_size = index->hdr->header_size +
+	map->mmap_used_size = map->hdr.header_size +
 		map->records_count * map->hdr.record_size;
 
-	memcpy(map->mmap_base, &map->hdr, base_size);
-	memcpy(PTR_OFFSET(map->mmap_base, base_size),
-	       CONST_PTR_OFFSET(map->hdr_base, base_size),
-	       map->hdr.header_size - base_size);
+	memcpy(map->mmap_base, &map->hdr,
+	       I_MIN(map->hdr.base_header_size, sizeof(map->hdr)));
+	memcpy(PTR_OFFSET(map->mmap_base, map->hdr.base_header_size),
+	       CONST_PTR_OFFSET(map->hdr_base, map->hdr.base_header_size),
+	       map->hdr.header_size - map->hdr.base_header_size);
 	if (msync(map->mmap_base, map->mmap_used_size, MS_SYNC) < 0) {
 		mail_index_set_syscall_error(index, "msync()");
 		return -1;
 	}
 	return 0;
 }
+#endif
 
-void mail_index_sync_replace_map(struct mail_index_sync_map_ctx *ctx,
-				 struct mail_index_map *map)
+static void mail_index_sync_replace_map(struct mail_index_sync_map_ctx *ctx,
+					struct mail_index_map *map)
 {
         struct mail_index_view *view = ctx->view;
-	struct mail_index_map *old_map = view->map;
 
 	i_assert(view->map != map);
 
-	old_map->refcount++;
-
-	/* if map still exists after this, it's only in views. */
-	view->map->write_to_disk = FALSE;
-	/* keywords aren't parsed for the new map yet */
-	view->map->keywords_read = FALSE;
+#if 0 // FIXME
+	/* we could have already updated some of the records, so make sure
+	   that other views (in possibly other processes) will see this map's
+	   header in a valid state.  */
+	mail_index_sync_update_log_offset(ctx, view->map, FALSE);
+	(void)mail_index_map_msync(view->index, view->map);
+#endif
 
 	mail_index_unmap(view->index, &view->map);
 	view->map = map;
 
-	if ((ctx->type & (MAIL_INDEX_SYNC_HANDLER_FILE |
-			  MAIL_INDEX_SYNC_HANDLER_HEAD)) != 0 &&
-	    view->index->map != map) {
-		mail_index_unmap(view->index, &view->index->map);
+	if (ctx->type != MAIL_INDEX_SYNC_HANDLER_VIEW) {
 		view->index->map = map;
 		view->index->hdr = &map->hdr;
-		map->refcount++;
+	}
+}
 
-		if (ctx->type == MAIL_INDEX_SYNC_HANDLER_FILE) {
-			map->write_to_disk = TRUE;
-			map->write_atomic = TRUE;
-		}
-	}
+void mail_index_sync_move_to_private(struct mail_index_sync_map_ctx *ctx)
+{
+	struct mail_index_map *map = ctx->view->map;
 
-	/* some views may still use the same mapping, and since we could have
-	   already updated the records, make sure we leave the header in a
-	   valid state as well */
-	mail_index_sync_update_log_offset(ctx, old_map, FALSE);
-	(void)mail_index_map_msync(view->index, old_map);
-	mail_index_unmap(view->index, &old_map);
+	if (map->refcount == 1) {
+		if (!MAIL_INDEX_MAP_IS_IN_MEMORY(map))
+			mail_index_map_move_to_memory(map);
+	} else {
+		map = mail_index_map_clone(map);
+		mail_index_sync_replace_map(ctx, map);
+	}
+}
 
-	i_assert(view->hdr.messages_count == map->hdr.messages_count);
+struct mail_index_map *
+mail_index_sync_get_atomic_map(struct mail_index_sync_map_ctx *ctx)
+{
+	mail_index_sync_move_to_private(ctx);
+	ctx->view->map->write_atomic = TRUE;
+	return ctx->view->map;
 }
 
 static int
@@ -191,6 +194,22 @@
 	return 0;
 }
 
+static void
+mail_index_sync_header_update_counts(struct mail_index_sync_map_ctx *ctx,
+				     uint8_t old_flags, uint8_t new_flags)
+{
+	const char *error;
+
+	if (ctx->view->broken_counters)
+		return;
+
+	if (mail_index_header_update_counts(&ctx->view->map->hdr,
+					    old_flags, new_flags, &error) < 0) {
+		mail_index_sync_set_corrupted(ctx, "%s", error);
+		ctx->view->broken_counters = TRUE;
+	}
+}
+
 void mail_index_view_recalc_counters(struct mail_index_view *view)
 {
 	struct mail_index_map *map = view->map;
@@ -231,105 +250,109 @@
 		hdr->first_deleted_uid_lowwater = rec->uid;
 }
 
-static int sync_expunge(const struct mail_transaction_expunge *e,
-			struct mail_index_sync_map_ctx *ctx)
+static int
+sync_expunge_call_handlers(struct mail_index_sync_map_ctx *ctx,
+			   uint32_t seq1, uint32_t seq2)
 {
-	struct mail_index_view *view = ctx->view;
-	struct mail_index_map *map = view->map;
+	const struct mail_index_expunge_handler *eh;
 	struct mail_index_record *rec;
-	const char *error;
-	uint32_t count, seq, seq1, seq2;
-        const struct mail_index_expunge_handler *expunge_handlers, *eh;
-	unsigned int i, expunge_handlers_count;
-
-	if (e->uid1 > e->uid2 || e->uid1 == 0) {
-		mail_index_sync_set_corrupted(ctx,
-				"Invalid UID range in expunge (%u .. %u)",
-				e->uid1, e->uid2);
-		return -1;
-	}
-
-	if (!view->map->write_to_disk || view->map->refcount != 1) {
-		/* expunges have to be atomic. so we'll have to copy
-		   the mapping, do the changes there and then finally
-		   replace the whole index file. to avoid extra disk
-		   I/O we copy the index into memory rather than to
-		   temporary file */
-		map = mail_index_map_clone(map, map->hdr.record_size);
-		mail_index_sync_replace_map(ctx, map);
-	}
-	i_assert(MAIL_INDEX_MAP_IS_IN_MEMORY(map));
-
-	/* we want atomic rename()ing */
-	map->write_atomic = TRUE;
-
-	if (mail_index_lookup_uid_range(view, e->uid1, e->uid2,
-					&seq1, &seq2) < 0)
-		return -1;
-
-	if (seq1 == 0)
-		return 1;
+	unsigned int i, count;
 
 	/* call expunge handlers only when syncing index file */
-	if (ctx->type == MAIL_INDEX_SYNC_HANDLER_FILE &&
-	    !ctx->expunge_handlers_set)
+	if (ctx->type != MAIL_INDEX_SYNC_HANDLER_FILE)
+		return 0;
+
+	if (!ctx->expunge_handlers_set)
 		mail_index_sync_init_expunge_handlers(ctx);
 
-	if (ctx->type == MAIL_INDEX_SYNC_HANDLER_FILE &&
-	    array_is_created(&ctx->expunge_handlers)) {
-		expunge_handlers = array_get(&ctx->expunge_handlers,
-					     &expunge_handlers_count);
-	} else {
-		expunge_handlers = NULL;
-		expunge_handlers_count = 0;
-	}
+	if (!array_is_created(&ctx->expunge_handlers))
+		return 0;
 
-	if (ctx->unreliable_flags || view->broken_counters)
-		view->broken_counters = TRUE;
-	else {
-		for (seq = seq1; seq <= seq2; seq++) {
-			rec = MAIL_INDEX_MAP_IDX(map, seq-1);
-			if (mail_index_header_update_counts(&map->hdr,
-							    rec->flags, 0,
-							    &error) < 0) {
-				mail_index_sync_set_corrupted(ctx, "%s", error);
-				return -1;
-			}
-		}
-	}
-
-	for (i = 0; i < expunge_handlers_count; i++) {
-		eh = &expunge_handlers[i];
-
-		for (seq = seq1; seq <= seq2; seq++) {
-			rec = MAIL_INDEX_MAP_IDX(map, seq-1);
-			if (eh->handler(ctx, seq,
+	eh = array_get(&ctx->expunge_handlers, &count);
+	for (i = 0; i < count; i++, eh++) {
+		for (; seq1 <= seq2; seq1++) {
+			rec = MAIL_INDEX_MAP_IDX(ctx->view->map, seq1-1);
+			if (eh->handler(ctx, seq1,
 					PTR_OFFSET(rec, eh->record_offset),
 					eh->sync_context, eh->context) < 0)
 				return -1;
 		}
 	}
+	return 0;
+}
 
-	/* @UNSAFE */
-	count = seq2 - seq1 + 1;
-	memmove(MAIL_INDEX_MAP_IDX(map, seq1-1), MAIL_INDEX_MAP_IDX(map, seq2),
-		(map->records_count - seq2) * map->hdr.record_size);
+static int
+sync_expunge(const struct mail_transaction_expunge *e, unsigned int count,
+	     struct mail_index_sync_map_ctx *ctx)
+{
+	struct mail_index_map *map = ctx->view->map;
+	struct mail_index_record *rec;
+	uint32_t seq_count, seq, seq1, seq2;
+	unsigned int i;
+
+	/* we don't ever want to move around data inside a memory mapped file.
+	   it gets corrupted too easily if we crash in the middle. */
+	// FIXME: it's necessary for current view code that we get atomic
+	// map even if these messages are already expunged, because the
+	// view code doesn't check that and our index_int_offset goes wrong
+	map = mail_index_sync_get_atomic_map(ctx);
+
+	for (i = 0; i < count; i++, e++) {
+		if (e->uid1 > e->uid2 || e->uid1 == 0) {
+			mail_index_sync_set_corrupted(ctx,
+				"Invalid UID range in expunge (%u .. %u)",
+				e->uid1, e->uid2);
+			return -1;
+		}
+		if (i > 0 && e->uid1 <= e[-1].uid2) {
+			mail_index_sync_set_corrupted(ctx,
+				"Non-sorted UID ranges in expunge");
+			return -1;
+		}
 
-	map->records_count -= count;
-	map->hdr.messages_count -= count;
-	view->hdr.messages_count -= count;
+		if (mail_index_lookup_uid_range(ctx->view, e->uid1, e->uid2,
+						&seq1, &seq2) < 0)
+			return -1;
+		if (seq1 == 0) {
+			/* everything expunged already */
+			continue;
+		}
 
-	if (map->buffer != NULL) {
-		buffer_set_used_size(map->buffer, map->records_count *
-				     map->hdr.record_size);
-		map->records = buffer_get_modifiable_data(map->buffer, NULL);
+		if (ctx->unreliable_flags || ctx->view->broken_counters)
+			ctx->view->broken_counters = TRUE;
+		else {
+			for (seq = seq1; seq <= seq2; seq++) {
+				rec = MAIL_INDEX_MAP_IDX(map, seq-1);
+				mail_index_sync_header_update_counts(ctx,
+								rec->flags, 0);
+			}
+		}
+
+		if (sync_expunge_call_handlers(ctx, seq1, seq2) < 0)
+			return -1;
+
+		/* @UNSAFE */
+		memmove(MAIL_INDEX_MAP_IDX(map, seq1-1),
+			MAIL_INDEX_MAP_IDX(map, seq2),
+			(map->records_count - seq2) * map->hdr.record_size);
+
+		seq_count = seq2 - seq1 + 1;
+		map->records_count -= seq_count;
+		map->hdr.messages_count -= seq_count;
+
+		/* lookup_uid_range() relies on this */
+		ctx->view->hdr.messages_count -= seq_count;
 	}
 	return 1;
 }
 
-static void write_seq_update(struct mail_index_map *map,
-			     uint32_t seq1, uint32_t seq2)
+void mail_index_sync_write_seq_update(struct mail_index_sync_map_ctx *ctx,
+				      uint32_t seq1, uint32_t seq2)
 {
+	struct mail_index_map *map = ctx->view->map;
+
+	i_assert(MAIL_INDEX_MAP_IS_IN_MEMORY(map));
+
 	if (map->write_seq_first == 0 ||
 	    map->write_seq_first > seq1)
 		map->write_seq_first = seq1;
@@ -342,8 +365,8 @@
 {
 	struct mail_index_view *view = ctx->view;
 	struct mail_index_map *map = view->map;
-	const char *error;
 	void *dest;
+	size_t append_pos;
 
 	if (rec->uid < map->hdr.next_uid) {
 		mail_index_sync_set_corrupted(ctx,
@@ -352,17 +375,19 @@
 		return -1;
 	}
 
-	if (MAIL_INDEX_MAP_IS_IN_MEMORY(map)) {
-		i_assert(map->records_count * map->hdr.record_size ==
-			 buffer_get_used_size(map->buffer));
-		dest = buffer_append_space_unsafe(map->buffer,
-						  map->hdr.record_size);
-		map->records = buffer_get_modifiable_data(map->buffer, NULL);
-	} else {
-		i_assert((map->records_count+1) * map->hdr.record_size <=
-			 map->mmap_size);
-		dest = MAIL_INDEX_MAP_IDX(map, map->records_count);
-	}
+	/* move to memory. the mapping is written when unlocking so we don't
+	   waste time re-mmap()ing multiple times or waste space growing index
+	   file too large */
+	mail_index_sync_move_to_private(ctx);
+	map = view->map;
+
+	/* don't rely on buffer->used being at the correct position.
+	   at least expunges can move it */
+	append_pos = map->records_count * map->hdr.record_size;
+	dest = buffer_get_space_unsafe(map->buffer, append_pos,
+				       map->hdr.record_size);
+	map->records = buffer_get_modifiable_data(map->buffer, NULL);
+
 	memcpy(dest, rec, sizeof(*rec));
 	memset(PTR_OFFSET(dest, sizeof(*rec)), 0,
 	       map->hdr.record_size - sizeof(*rec));
@@ -370,22 +395,16 @@
 	map->hdr.messages_count++;
 	map->hdr.next_uid = rec->uid+1;
 	map->records_count++;
-	view->hdr.messages_count++;
-	view->hdr.next_uid = rec->uid+1;
 
-	write_seq_update(map, map->hdr.messages_count, map->hdr.messages_count);
+	mail_index_sync_write_seq_update(ctx, map->hdr.messages_count,
+					 map->hdr.messages_count);
+	map->write_base_header = TRUE;
 
 	if ((rec->flags & MAIL_INDEX_MAIL_FLAG_DIRTY) != 0)
 		map->hdr.flags |= MAIL_INDEX_HDR_FLAG_HAVE_DIRTY;
 
 	mail_index_header_update_lowwaters(&map->hdr, rec);
-	if (!view->broken_counters) {
-		if (mail_index_header_update_counts(&map->hdr, 0, rec->flags,
-						    &error) < 0) {
-			mail_index_sync_set_corrupted(ctx, "%s", error);
-			return -1;
-		}
-	}
+	mail_index_sync_header_update_counts(ctx, 0, rec->flags);
 	return 1;
 }
 
@@ -395,7 +414,6 @@
 	struct mail_index_view *view = ctx->view;
 	struct mail_index_header *hdr;
 	struct mail_index_record *rec;
-	const char *error;
 	uint8_t flag_mask, old_flags;
 	uint32_t idx, seq1, seq2;
 
@@ -413,7 +431,9 @@
 	if (seq1 == 0)
 		return 1;
 
-	write_seq_update(view->map, seq1, seq2);
+	mail_index_sync_move_to_private(ctx);
+	mail_index_sync_write_seq_update(ctx, seq1, seq2);
+	view->map->write_base_header = TRUE;
 
 	hdr = &view->map->hdr;
 	if ((u->add_flags & MAIL_INDEX_MAIL_FLAG_DIRTY) != 0)
@@ -444,12 +464,8 @@
 			rec->flags = (rec->flags & flag_mask) | u->add_flags;
 
 			mail_index_header_update_lowwaters(hdr, rec);
-			if (mail_index_header_update_counts(hdr, old_flags,
-							    rec->flags,
-							    &error) < 0) {
-				mail_index_sync_set_corrupted(ctx, "%s", error);
-				return -1;
-			}
+			mail_index_sync_header_update_counts(ctx, old_flags,
+							     rec->flags);
 		}
 	}
 	return 1;
@@ -470,6 +486,7 @@
 
 	buffer_write(map->hdr_copy_buf, u->offset, u + 1, u->size);
 	map->hdr_base = map->hdr_copy_buf->data;
+	map->write_base_header = TRUE;
 
 	/* @UNSAFE */
 	if ((uint32_t)(u->offset + u->size) <= sizeof(map->hdr)) {
@@ -482,70 +499,10 @@
 	return 1;
 }
 
-static int mail_index_grow(struct mail_index *index, struct mail_index_map *map,
-			   unsigned int count)
-{
-	void *hdr_copy;
-	size_t size, hdr_copy_size;
-
-	if (MAIL_INDEX_MAP_IS_IN_MEMORY(map))
-		return 1;
-
-	i_assert(map == index->map);
-	i_assert(!index->mapping); /* mail_index_sync_from_transactions() */
-
-	size = map->hdr.header_size +
-		(map->records_count + count) * map->hdr.record_size;
-	if (size <= map->mmap_size)
-		return 1;
-
-	/* when we grow fast, do it exponentially */
-	if (count < index->last_grow_count)
-		count = index->last_grow_count;
-	if (count < MAIL_INDEX_MAX_POWER_GROW)
-		count = nearest_power(count);
-	index->last_grow_count = count;
-
-	size = map->hdr.header_size +
-		(map->records_count + count) * map->hdr.record_size;
-	if (file_set_size(index->fd, (off_t)size) < 0) {
-		mail_index_set_syscall_error(index, "file_set_size()");
-		return !ENOSPACE(errno) ? -1 :
-			mail_index_move_to_memory(index);
-	}
-
-	/* we only wish to grow the file, but mail_index_map() updates the
-	   headers as well and may break our modified hdr_copy. so, take
-	   a backup of it and put it back afterwards */
-	t_push();
-	i_assert(map->hdr_copy_buf->used == map->hdr.header_size);
-        hdr_copy_size = map->hdr_copy_buf->used;
-	hdr_copy = t_malloc(hdr_copy_size);
-	memcpy(hdr_copy, map->hdr_copy_buf->data, hdr_copy_size);
-	memcpy(hdr_copy, &map->hdr, sizeof(map->hdr));
-
-	if (mail_index_map(index, TRUE) <= 0) {
-		t_pop();
-		return -1;
-	}
-
-	map = index->map;
-	buffer_reset(map->hdr_copy_buf);
-	buffer_append(map->hdr_copy_buf, hdr_copy, hdr_copy_size);
-
-	map->hdr_base = map->hdr_copy_buf->data;
-	memcpy(&map->hdr, hdr_copy, sizeof(map->hdr));
-	map->records_count = map->hdr.messages_count;
-
-	i_assert(map->mmap_size >= size);
-
-	t_pop();
-	return 1;
-}
-
 static void
 mail_index_update_day_headers(struct mail_index_header *hdr, uint32_t uid)
 {
+	// FIXME: move as header updates to transaction committing
 	const int max_days =
 		sizeof(hdr->day_first_uid) / sizeof(hdr->day_first_uid[0]);
 	struct tm tm;
@@ -601,14 +558,10 @@
 	}
 	case MAIL_TRANSACTION_EXPUNGE:
 	case MAIL_TRANSACTION_EXPUNGE|MAIL_TRANSACTION_EXPUNGE_PROT: {
-		const struct mail_transaction_expunge *rec, *end;
+		const struct mail_transaction_expunge *rec = data, *end;
 
 		end = CONST_PTR_OFFSET(data, hdr->size);
-		for (rec = data; rec < end; rec++) {
-			ret = sync_expunge(rec, ctx);
-			if (ret <= 0)
-				break;
-		}
+		ret = sync_expunge(rec, end - rec, ctx);
 		break;
 	}
 	case MAIL_TRANSACTION_FLAG_UPDATE: {
@@ -740,8 +693,7 @@
 
 	i_assert(ctx->view->map->records_count ==
 		 ctx->view->map->hdr.messages_count);
-	i_assert(ctx->view->hdr.messages_count ==
-		 ctx->view->map->hdr.messages_count);
+	ctx->view->hdr = ctx->view->map->hdr;
 	return ret;
 }
 
@@ -767,9 +719,9 @@
 	mail_index_sync_deinit_handlers(sync_map_ctx);
 }
 
-static void mail_index_sync_remove_recent(struct mail_index_sync_ctx *sync_ctx)
+static void mail_index_sync_remove_recent(struct mail_index_sync_map_ctx *ctx)
 {
-	struct mail_index_map *map = sync_ctx->view->map;
+	struct mail_index_map *map = ctx->view->map;
 	struct mail_index_record *rec;
 	unsigned int i;
 
@@ -778,167 +730,192 @@
 		if ((rec->flags & MAIL_RECENT) != 0) {
 			rec->flags &= ~MAIL_RECENT;
 
-			write_seq_update(map, i + 1, i + 1);
+			mail_index_sync_write_seq_update(ctx, i + 1, i + 1);
 		}
 	}
 
 	map->hdr.recent_messages_count = 0;
 	map->hdr.first_recent_uid_lowwater = map->hdr.next_uid;
+	map->write_base_header = TRUE;
 }
 
-static void log_view_seek_back(struct mail_transaction_log_view *log_view)
+static void mail_index_sync_update_hdr_dirty_flag(struct mail_index_map *map)
 {
-	uint32_t prev_seq;
-	uoff_t prev_offset;
+	const struct mail_index_record *rec;
+	unsigned int i;
+
+	if ((map->hdr.flags & MAIL_INDEX_HDR_FLAG_HAVE_DIRTY) != 0)
+		return;
 
-	mail_transaction_log_view_get_prev_pos(log_view, &prev_seq,
-					       &prev_offset);
-	mail_transaction_log_view_seek(log_view, prev_seq, prev_offset);
+	/* do we have dirty flags anymore? */
+	for (i = 0; i < map->records_count; i++) {
+		rec = MAIL_INDEX_MAP_IDX(map, i);
+		if ((rec->flags & MAIL_INDEX_MAIL_FLAG_DIRTY) != 0) {
+			map->hdr.flags |= MAIL_INDEX_HDR_FLAG_HAVE_DIRTY;
+			break;
+		}
+	}
 }
 
-int mail_index_sync_update_index(struct mail_index_sync_ctx *sync_ctx,
-				 bool sync_only_external)
+int mail_index_sync_map(struct mail_index *index, struct mail_index_map **_map,
+			enum mail_index_sync_handler_type type, bool force)
 {
-	struct mail_index *index = sync_ctx->index;
-	struct mail_index_view *view = sync_ctx->view;
-	struct mail_index_map *map;
-        struct mail_index_sync_map_ctx sync_map_ctx;
+	struct mail_index_map *map = *_map;
+	struct mail_index_view *view;
+	struct mail_index_sync_map_ctx sync_map_ctx;
 	const struct mail_transaction_header *thdr;
-	const void *data;
-	unsigned int count, old_lock_id;
-	uint32_t i, first_append_uid;
+	const void *tdata;
+	uint32_t prev_seq, mailbox_sync_seq, expunge_seq;
+	uoff_t prev_offset, mailbox_sync_offset, expunge_offset;
 	int ret;
-	bool had_dirty, skipped, check_ext_offsets;
+	bool had_dirty;
+
+	if (!force) {
+		/* see if we'd prefer to reopen the index file instead of
+		   syncing the current map from the transaction log */
+		uoff_t log_size, index_size;
+
+		if (index->log->head == NULL || index->fd == -1)
+			return 0;
+
+		index_size = map->hdr.header_size +
+			map->records_count * map->hdr.record_size;
 
-	mail_index_sync_map_init(&sync_map_ctx, view,
-				 MAIL_INDEX_SYNC_HANDLER_FILE);
-	sync_map_ctx.sync_only_external = sync_only_external;
+		/* this isn't necessary correct currently, but it should be
+		   close enough */
+		log_size = index->log->head->last_size;
+		if (log_size > map->hdr.log_file_index_int_offset &&
+		    log_size - map->hdr.log_file_index_int_offset > index_size)
+			return 0;
+	}
 
-	/* we'll have to update view->lock_id to avoid mail_index_view_lock()
-	   trying to update the file later. */
-	old_lock_id = view->lock_id;
-	if (mail_index_lock_exclusive(index, &view->lock_id) < 0)
-		return -1;
-	mail_index_unlock(index, old_lock_id);
+	view = mail_index_view_open_with_map(index, map);
+	if (mail_transaction_log_view_set(view->log_view,
+					  map->hdr.log_file_seq,
+					  map->hdr.log_file_index_int_offset,
+					  (uint32_t)-1, (uoff_t)-1,
+					  MAIL_TRANSACTION_TYPE_MASK) <= 0) {
+		/* can't use it. sync by re-reading index. */
+		mail_index_view_close(&view);
+		return 0;
+	}
 
-	/* NOTE: locking may change index->map so make sure the assignment is
-	   after locking */
-	map = index->map;
-	if (MAIL_INDEX_MAP_IS_IN_MEMORY(map))
-		map->write_to_disk = TRUE;
+	mail_transaction_log_get_mailbox_sync_pos(index->log, &mailbox_sync_seq,
+						  &mailbox_sync_offset);
+
+	/* view referenced the map. avoid unnecessary map cloning by
+	   unreferencing the map while view exists. */
+	map->refcount--;
+
+	had_dirty = (map->hdr.flags & MAIL_INDEX_HDR_FLAG_HAVE_DIRTY) != 0;
+	if (had_dirty) {
+		map->hdr.flags &= ~MAIL_INDEX_HDR_FLAG_HAVE_DIRTY;
+		map->write_base_header = TRUE;
+	}
 
 	if (map->hdr_base != map->hdr_copy_buf->data) {
+		/* if syncing updates the header, it updates hdr_copy_buf
+		   and updates hdr_base to hdr_copy_buf. so the buffer must
+		   initially contain a valid header or we'll break it when
+		   writing it. */
 		buffer_reset(map->hdr_copy_buf);
 		buffer_append(map->hdr_copy_buf, map->hdr_base,
 			      map->hdr.header_size);
 		map->hdr_base = map->hdr_copy_buf->data;
 	}
-	i_assert(map->hdr.base_header_size >= sizeof(map->hdr));
 
-	mail_index_unmap(index, &view->map);
-	view->map = map;
-	view->map->refcount++;
-
-	i_assert(view->hdr.messages_count == map->hdr.messages_count);
-
-	had_dirty = (map->hdr.flags & MAIL_INDEX_HDR_FLAG_HAVE_DIRTY) != 0;
-	if (had_dirty)
-		map->hdr.flags &= ~MAIL_INDEX_HDR_FLAG_HAVE_DIRTY;
-
-	if (sync_ctx->sync_recent) {
-		/* mark all messages non-recent */
-		mail_index_sync_remove_recent(sync_ctx);
+	if (type != MAIL_INDEX_SYNC_HANDLER_VIEW) {
+		i_assert(index->map == NULL && index->hdr == NULL);
+		index->map = map;
+		index->hdr = &map->hdr;
 	}
 
-	/* make sure we don't go doing fsck while modifying the index */
-	index->sync_update = TRUE;
+	mail_index_sync_map_init(&sync_map_ctx, view, type);
+	map = NULL;
 
-	first_append_uid = 0;
-	check_ext_offsets = TRUE;
+	/* FIXME: when transaction sync lock is removed, we'll need to handle
+	   the case when a transaction is committed while mailbox is being
+	   synced ([synced transactions][new transaction][ext transaction]).
+	   this means int_offset contains [synced] and ext_offset contains
+	   all */
+	expunge_seq = expunge_offset = 0;
 	while ((ret = mail_transaction_log_view_next(view->log_view, &thdr,
-						     &data, &skipped)) > 0) {
-		if ((thdr->type & MAIL_TRANSACTION_EXTERNAL) == 0) {
-			if (sync_only_external) {
-				/* we're syncing only external changes. */
-				continue;
-			}
-		} else if (check_ext_offsets) {
-			if (mail_index_is_ext_synced(view->log_view, view->map))
-				continue;
-			check_ext_offsets = FALSE;
-		}
+						     &tdata, NULL)) > 0) {
+		mail_transaction_log_view_get_prev_pos(view->log_view,
+						       &prev_seq, &prev_offset);
 
-		if ((thdr->type & MAIL_TRANSACTION_APPEND) != 0) {
-			const struct mail_index_record *rec = data;
-
-			if (first_append_uid == 0)
-				first_append_uid = rec->uid;
-
-			map = view->map;
-			count = thdr->size / sizeof(*rec);
-			if ((ret = mail_index_grow(index, map, count)) < 0)
-				break;
-
-			if (map != index->map) {
-				index->map->refcount++;
-				mail_index_sync_replace_map(&sync_map_ctx,
-							    index->map);
-			}
-
-			if (ret == 0) {
-				/* moved to memory. data pointer is invalid,
-				   seek back and do this append again. */
-				log_view_seek_back(view->log_view);
+		if ((thdr->type & MAIL_TRANSACTION_EXTERNAL) != 0) {
+			/* see if this transaction is already synced */
+			if (prev_seq < view->map->hdr.log_file_seq ||
+			    (prev_seq == view->map->hdr.log_file_seq &&
+			     prev_offset <
+			     view->map->hdr.log_file_index_ext_offset))
+				continue;
+		} else if ((thdr->type & MAIL_TRANSACTION_EXPUNGE) != 0) {
+			/* if the message hasn't yet been expunged from the
+			   mailbox, skip this expunge */
+			if (prev_seq > mailbox_sync_seq ||
+			    (prev_seq == mailbox_sync_seq &&
+			     prev_offset >= mailbox_sync_offset)) {
+				if (expunge_seq == 0) {
+					expunge_seq = prev_seq;
+					expunge_offset = prev_offset;
+				}
 				continue;
 			}
 		}
 
-		if (mail_index_sync_record(&sync_map_ctx, thdr, data) < 0) {
-			ret = -1;
-			break;
-		}
+		/* we'll just skip over broken entries */
+		(void)mail_index_sync_record(&sync_map_ctx, thdr, tdata);
 	}
+	map = view->map;
+
+	if (view->broken_counters)
+		mail_index_view_recalc_counters(view);
 
-	if (ret == 0) {
-		mail_index_sync_update_log_offset(&sync_map_ctx, view->map,
-						  TRUE);
-	}
-	mail_index_sync_map_deinit(&sync_map_ctx);
+	if (had_dirty)
+		mail_index_sync_update_hdr_dirty_flag(map);
 
-	index->sync_update = FALSE;
-
-	if (ret < 0) {
-		mail_index_view_unlock(view);
-		return -1;
+	/* update sync position */
+	// FIXME: eol=TRUE gives intro errors
+	mail_index_sync_update_log_offset(&sync_map_ctx, map, FALSE);
+	if (expunge_seq != 0) {
+		i_assert(expunge_seq == map->hdr.log_file_seq);
+		map->hdr.log_file_index_int_offset = expunge_offset;
+		map->write_base_header = TRUE;
 	}
 
-	map = view->map;
-	i_assert(map->records_count == map->hdr.messages_count);
-	i_assert(map->hdr_copy_buf->used <= map->hdr.header_size);
-
-	if (first_append_uid != 0)
-		mail_index_update_day_headers(&map->hdr, first_append_uid);
+	/* although mailbox_sync_update gets updated by the header update
+	   records, transaction log syncing can internally also update
+	   mailbox_sync_max_offset to skip over following external
+	   transactions. use it to avoid extra unneeded log reading. */
+	map->hdr.log_file_mailbox_offset =
+		index->log->head->mailbox_sync_max_offset;
 
-	if ((map->hdr.flags & MAIL_INDEX_HDR_FLAG_HAVE_DIRTY) == 0 &&
-	    had_dirty) {
-		/* do we have dirty flags anymore? */
-		const struct mail_index_record *rec;
+	/*FIXME: if (first_append_uid != 0)
+		mail_index_update_day_headers(&map->hdr, first_append_uid);*/
 
-		for (i = 0; i < map->records_count; i++) {
-			rec = MAIL_INDEX_MAP_IDX(map, i);
-			if ((rec->flags & MAIL_INDEX_MAIL_FLAG_DIRTY) != 0) {
-				map->hdr.flags |=
-					MAIL_INDEX_HDR_FLAG_HAVE_DIRTY;
-				break;
-			}
-		}
+	if (map->write_base_header) {
+		i_assert(MAIL_INDEX_MAP_IS_IN_MEMORY(map));
+		buffer_write(map->hdr_copy_buf, 0, &map->hdr, sizeof(map->hdr));
 	}
 
-	if (mail_index_map_msync(index, map) < 0)
-		ret = -1;
-	i_assert(view->map == index->map);
-	view->hdr = map->hdr;
+	/*FIXME:if (mail_index_map_msync(index, map) < 0)
+		ret = -1;*/
+
+	/* restore refcount before closing the view. this is necessary also
+	   if map got cloned, because view closing would otherwise destroy it */
+	map->refcount++;
+	mail_index_view_close(&view);
+
+	mail_index_sync_map_deinit(&sync_map_ctx);
 
-        mail_index_view_unlock(view);
-	return ret;
+	if (type != MAIL_INDEX_SYNC_HANDLER_VIEW) {
+		i_assert(index->map == map);
+		index->map = NULL;
+		index->hdr = NULL;
+	}
+
+	*_map = map;
+	return ret < 0 ? -1 : 1;
 }
--- a/src/lib-index/mail-index-sync.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-sync.c	Mon Jun 11 14:50:10 2007 +0300
@@ -2,7 +2,7 @@
 
 #include "lib.h"
 #include "array.h"
-#include "buffer.h"
+#include "write-full.h"
 #include "mail-index-view-private.h"
 #include "mail-index-sync-private.h"
 #include "mail-index-transaction-private.h"
@@ -10,8 +10,30 @@
 #include "mail-transaction-util.h"
 #include "mail-cache.h"
 
+#include <stdio.h>
 #include <stdlib.h>
 
+struct mail_index_sync_ctx {
+	struct mail_index *index;
+	struct mail_index_view *view;
+	struct mail_index_transaction *sync_trans, *ext_trans;
+
+	const struct mail_transaction_header *hdr;
+	const void *data;
+
+	ARRAY_DEFINE(sync_list, struct mail_index_sync_list);
+	uint32_t next_uid;
+	uint32_t last_mailbox_seq, last_mailbox_offset;
+
+	uint32_t append_uid_first, append_uid_last;
+
+	unsigned int lock_id;
+
+	unsigned int sync_appends:1;
+	unsigned int sync_recent:1;
+	unsigned int sync_dirty:1;
+};
+
 static void mail_index_sync_add_expunge(struct mail_index_sync_ctx *ctx)
 {
 	const struct mail_transaction_expunge *e = ctx->data;
@@ -108,7 +130,7 @@
 	ctx->sync_appends = TRUE;
 }
 
-static void mail_index_sync_add_transaction(struct mail_index_sync_ctx *ctx)
+static bool mail_index_sync_add_transaction(struct mail_index_sync_ctx *ctx)
 {
 	switch (ctx->hdr->type & MAIL_TRANSACTION_TYPE_MASK) {
 	case MAIL_TRANSACTION_EXPUNGE:
@@ -126,7 +148,10 @@
 	case MAIL_TRANSACTION_APPEND:
 		mail_index_sync_add_append(ctx);
 		break;
+	default:
+		return FALSE;
 	}
+	return TRUE;
 }
 
 static int mail_index_sync_add_dirty_updates(struct mail_index_sync_ctx *ctx)
@@ -178,9 +203,21 @@
 	return 0;
 }
 
+static void
+mail_index_sync_update_mailbox_pos(struct mail_index_sync_ctx *ctx)
+{
+	uint32_t seq;
+	uoff_t offset;
+
+	mail_transaction_log_view_get_prev_pos(ctx->view->log_view,
+					       &seq, &offset);
+
+	ctx->last_mailbox_seq = seq;
+	ctx->last_mailbox_offset = offset + ctx->hdr->size + sizeof(*ctx->hdr);
+}
+
 static int
-mail_index_sync_read_and_sort(struct mail_index_sync_ctx *ctx,
-			      bool *seen_external_r)
+mail_index_sync_read_and_sort(struct mail_index_sync_ctx *ctx)
 {
 	struct mail_index_transaction *sync_trans = ctx->sync_trans;
 	struct mail_index_sync_list *synclist;
@@ -188,8 +225,6 @@
 	unsigned int i, keyword_count;
 	int ret;
 
-	*seen_external_r = FALSE;
-
 	if ((ctx->view->map->hdr.flags & MAIL_INDEX_HDR_FLAG_HAVE_DIRTY) &&
 	    ctx->sync_dirty) {
 		/* show dirty flags as flag updates */
@@ -202,14 +237,17 @@
 			return -1;
 	}
 
-	/* read all transactions from log into a transaction in memory */
+	/* read all transactions from log into a transaction in memory.
+	   skip the external ones, they're already synced to mailbox and
+	   included in our view */
 	while ((ret = mail_transaction_log_view_next(ctx->view->log_view,
 						     &ctx->hdr,
 						     &ctx->data, NULL)) > 0) {
 		if ((ctx->hdr->type & MAIL_TRANSACTION_EXTERNAL) != 0)
-			*seen_external_r = TRUE;
-		 else
-			mail_index_sync_add_transaction(ctx);
+			continue;
+
+		if (mail_index_sync_add_transaction(ctx))
+			mail_index_sync_update_mailbox_pos(ctx);
 	}
 
 	/* create an array containing all expunge, flag and keyword update
@@ -254,21 +292,22 @@
 	return ret;
 }
 
-static int mail_index_need_lock(struct mail_index *index, bool sync_recent,
-				uint32_t log_file_seq, uoff_t log_file_offset)
+static bool
+mail_index_need_sync(struct mail_index *index,
+		     const struct mail_index_header *hdr, bool sync_recent,
+		     uint32_t log_file_seq, uoff_t log_file_offset)
 {
-	if (sync_recent && index->hdr->recent_messages_count > 0)
-		return 1;
+	// FIXME: how's this recent syncing supposed to work?
+	if (sync_recent && hdr->recent_messages_count > 0)
+		return TRUE;
 
-	if (index->hdr->log_file_seq > log_file_seq ||
-	     (index->hdr->log_file_seq == log_file_seq &&
-	      index->hdr->log_file_int_offset >= log_file_offset &&
-	      index->hdr->log_file_ext_offset >= log_file_offset)) {
-		/* already synced */
-		return mail_cache_need_compress(index->cache);
-	}
+	if (hdr->log_file_seq < log_file_seq ||
+	     (hdr->log_file_seq == log_file_seq &&
+	      hdr->log_file_mailbox_offset < log_file_offset))
+		return TRUE;
 
-	return 1;
+	/* already synced */
+	return mail_cache_need_compress(index->cache);
 }
 
 static int
@@ -297,41 +336,6 @@
 	return 0;
 }
 
-static int mail_index_sync_commit_external(struct mail_index_sync_ctx *ctx)
-{
-	int ret;
-
-	/* find the first external transaction, if there are any */
-	while ((ret = mail_transaction_log_view_next(ctx->view->log_view,
-						     &ctx->hdr, &ctx->data,
-						     NULL)) > 0) {
-		if ((ctx->hdr->type & MAIL_TRANSACTION_EXTERNAL) != 0)
-			break;
-	}
-	if (ret < 0)
-		return -1;
-
-	if (ret > 0) {
-		uint32_t seq;
-		uoff_t offset;
-
-		/* found it. update log view's range to begin from it and
-		   write all external transactions to index. */
-		mail_transaction_log_view_get_prev_pos(ctx->view->log_view,
-						       &seq, &offset);
-		if (mail_index_sync_set_log_view(ctx->view, seq, offset) < 0)
-			return -1;
-		if (mail_index_sync_update_index(ctx, TRUE) < 0)
-			return -1;
-	}
-	return 0;
-}
-
-#define MAIL_INDEX_IS_SYNCS_SAME(index) \
-	((index)->hdr != NULL && \
-	 (index)->sync_log_file_seq == (index)->hdr->log_file_seq && \
-	 (index)->sync_log_file_offset == (index)->hdr->log_file_ext_offset)
-
 int mail_index_sync_begin(struct mail_index *index,
                           struct mail_index_sync_ctx **ctx_r,
 			  struct mail_index_view **view_r,
@@ -339,57 +343,43 @@
 			  uint32_t log_file_seq, uoff_t log_file_offset,
 			  bool sync_recent, bool sync_dirty)
 {
+	const struct mail_index_header *hdr;
 	struct mail_index_sync_ctx *ctx;
 	struct mail_index_view *sync_view;
 	uint32_t seq;
 	uoff_t offset;
 	unsigned int lock_id = 0;
-	bool seen_external;
 
 	if (mail_transaction_log_sync_lock(index->log, &seq, &offset) < 0)
 		return -1;
 
-	if (!index->mmap_disable || !MAIL_INDEX_IS_SYNCS_SAME(index) ||
-	    index->sync_log_file_seq != seq ||
-	    index->sync_log_file_offset != offset) {
-		/* make sure we have the latest file mapped */
-		if (mail_index_lock_shared(index, TRUE, &lock_id) < 0) {
-			mail_transaction_log_sync_unlock(index->log);
-			return -1;
-		}
+	/* The view must contain what we expect the mailbox to look like
+	   currently. That allows the backend to update external flag
+	   changes (etc.) if the view doesn't match the mailbox.
 
-		/* with mmap_disable the force parameter has somewhat special
-		   meaning, it syncs exactly to the log seq/offset in index
-		   file's header. */
-		if (mail_index_map(index, index->mmap_disable) <= 0) {
-			mail_transaction_log_sync_unlock(index->log);
-			mail_index_unlock(index, lock_id);
-			return -1;
-		}
+	   We'll update the view to contain everything that exist in the
+	   transaction log except for expunges. They're synced in
+	   mail_index_sync_commit(). */
+	if (mail_index_map(index, MAIL_INDEX_SYNC_HANDLER_HEAD,
+			   &lock_id) <= 0) {
+		// FIXME: handle ret=0 specially?
+		mail_transaction_log_sync_unlock(index->log);
+		return -1;
 	}
+	hdr = index->hdr;
 
-	if ((index->hdr->flags & MAIL_INDEX_HDR_FLAG_FSCK) != 0) {
-		if (mail_index_fsck(index) <= 0) {
-			mail_index_unlock(index, lock_id);
-			mail_transaction_log_sync_unlock(index->log);
-			return -1;
-		}
-	}
-
-	if (!mail_index_need_lock(index, sync_recent,
+	if (!mail_index_need_sync(index, hdr, sync_recent,
 				  log_file_seq, log_file_offset)) {
 		mail_index_unlock(index, lock_id);
 		mail_transaction_log_sync_unlock(index->log);
 		return 0;
 	}
 
-	if (index->hdr->log_file_int_offset > index->hdr->log_file_ext_offset ||
-	    (index->hdr->log_file_seq == seq &&
-	     index->hdr->log_file_ext_offset > offset) ||
-	    (index->hdr->log_file_seq != seq &&
-	     !mail_transaction_log_is_head_prev(index->log,
-	     				index->hdr->log_file_seq,
-					index->hdr->log_file_ext_offset))) {
+	if (hdr->log_file_index_int_offset > hdr->log_file_index_ext_offset ||
+	    hdr->log_file_seq > seq ||
+	    (hdr->log_file_seq == seq &&
+	     (hdr->log_file_index_ext_offset > offset ||
+	      hdr->log_file_mailbox_offset > offset))) {
 		/* broken sync positions. fix them. */
 		mail_index_set_error(index,
 			"broken sync positions in index file %s",
@@ -406,6 +396,8 @@
 	ctx->lock_id = lock_id;
 	ctx->sync_recent = sync_recent;
 	ctx->sync_dirty = sync_dirty;
+	ctx->last_mailbox_seq = hdr->log_file_seq;
+	ctx->last_mailbox_offset = hdr->log_file_mailbox_offset;
 
 	ctx->view = mail_index_view_open(index);
 
@@ -413,43 +405,17 @@
 	ctx->sync_trans = mail_index_transaction_begin(sync_view, FALSE, TRUE);
 	mail_index_view_close(&sync_view);
 
-	if (mail_index_sync_set_log_view(ctx->view,
-					 index->hdr->log_file_seq,
-					 index->hdr->log_file_int_offset) < 0) {
+	/* we wish to see all the changes from last mailbox sync position to
+	   the end of the transaction log */
+	if (mail_index_sync_set_log_view(ctx->view, hdr->log_file_seq,
+					 hdr->log_file_mailbox_offset) < 0) {
                 mail_index_sync_rollback(&ctx);
 		return -1;
 	}
 
-	/* See if there are some external transactions which were
-	   written to transaction log, but weren't yet committed to
-	   index. commit them first to avoid conflicts with another
-	   external sync.
-
-	   This is mostly needed to make sure there won't be multiple
-	   appends with same UIDs, because those would cause
-	   transaction log to be marked corrupted.
-
-	   Note that any internal transactions must not be committed
-	   yet. They need to be synced with the real mailbox first. */
-	if (seq != index->hdr->log_file_seq ||
-	    offset != index->hdr->log_file_ext_offset) {
-		if (mail_index_sync_commit_external(ctx) < 0) {
-			mail_index_sync_rollback(&ctx);
-			return -1;
-		}
-
-		mail_index_view_close(&ctx->view);
-		ctx->view = mail_index_view_open(index);
-
-		if (mail_index_sync_set_log_view(ctx->view,
-					index->hdr->log_file_seq,
-					index->hdr->log_file_int_offset) < 0)
-			return -1;
-	}
-
 	/* we need to have all the transactions sorted to optimize
 	   caller's mailbox access patterns */
-	if (mail_index_sync_read_and_sort(ctx, &seen_external) < 0) {
+	if (mail_index_sync_read_and_sort(ctx) < 0) {
                 mail_index_sync_rollback(&ctx);
 		return -1;
 	}
@@ -632,10 +598,8 @@
         struct mail_index_sync_ctx *ctx = *_ctx;
 
 	*_ctx = NULL;
+
 	mail_index_unlock(ctx->index, ctx->lock_id);
-
-	i_assert(ctx->index->map == NULL ||
-		 !ctx->index->map->write_to_disk);
 	mail_transaction_log_sync_unlock(ctx->index->log);
 
 	mail_index_view_close(&ctx->view);
@@ -645,15 +609,188 @@
 	i_free(ctx);
 }
 
+static int mail_index_recreate(struct mail_index *index)
+{
+	struct mail_index_map *map = index->map;
+	unsigned int base_size;
+	const char *path;
+	int ret, fd;
+
+	i_assert(!MAIL_INDEX_IS_IN_MEMORY(index));
+
+	fd = mail_index_create_tmp_file(index, &path);
+	if (fd == -1)
+		return -1;
+
+	/* write base header */
+	base_size = I_MIN(map->hdr.base_header_size, sizeof(map->hdr));
+	ret = write_full(fd, &map->hdr, base_size);
+	if (ret == 0) {
+		/* write extended headers */
+		ret = write_full(fd, CONST_PTR_OFFSET(map->hdr_base, base_size),
+				 map->hdr.header_size - base_size);
+	}
+	if (ret == 0) {
+		ret = write_full(fd, map->records, map->records_count *
+				 map->hdr.record_size);
+	}
+	if (ret < 0)
+		mail_index_file_set_syscall_error(index, path, "write_full()");
+
+	if (ret == 0 && !index->fsync_disable && fsync(fd) < 0) {
+		mail_index_file_set_syscall_error(index, path, "fsync()");
+		ret = -1;
+	}
+
+	if (close(fd) < 0) {
+		mail_index_file_set_syscall_error(index, path, "close()");
+		ret = -1;
+	}
+
+	if (ret == 0 && rename(path, index->filepath) < 0) {
+		mail_index_set_error(index, "rename(%s, %s) failed: %m",
+				     path, index->filepath);
+		ret = -1;
+	}
+
+	if (ret < 0) {
+		if (unlink(path) < 0) {
+			mail_index_set_error(index, "unlink(%s) failed: %m",
+					     path);
+		}
+	}
+	return ret;
+}
+
+static int mail_index_write_map_over(struct mail_index *index)
+{
+	struct mail_index_map *map = index->map;
+	unsigned int base_size;
+
+	if (MAIL_INDEX_IS_IN_MEMORY(index))
+		return 0;
+
+	/* write records. */
+	if (map->write_seq_first != 0) {
+		size_t rec_offset =
+			(map->write_seq_first-1) * map->hdr.record_size;
+
+		if (pwrite_full(index->fd,
+				CONST_PTR_OFFSET(map->records, rec_offset),
+				(map->write_seq_last -
+				 map->write_seq_first + 1) *
+				map->hdr.record_size,
+				map->hdr.header_size + rec_offset) < 0)
+			return -1;
+	}
+
+	/* write base header */
+	if (map->write_base_header) {
+		base_size = I_MIN(map->hdr.base_header_size, sizeof(map->hdr));
+		if (pwrite_full(index->fd, &map->hdr, base_size, 0) < 0)
+			return -1;
+	}
+
+	/* write extended headers */
+	if (map->write_ext_header) {
+		base_size = map->hdr.base_header_size;
+		if (pwrite_full(index->fd,
+				CONST_PTR_OFFSET(map->hdr_base, base_size),
+				map->hdr.header_size - base_size,
+				base_size) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+#define mail_index_map_has_changed(map) \
+	((map)->write_base_header || (map)->write_ext_header || \
+	 (map)->write_seq_first != 0)
+
+static void mail_index_write(struct mail_index *index, bool want_rotate)
+{
+	struct mail_index_map *map = index->map;
+	const struct mail_index_header *hdr = &map->hdr;
+	unsigned int lock_id;
+
+	if (!mail_index_map_has_changed(map))
+		return;
+
+	if (hdr->base_header_size < sizeof(*hdr)) {
+		/* header size growed. we can't update this file anymore. */
+		map->write_atomic = TRUE;
+	}
+	if (index->fd == -1) {
+		/* index file doesn't exist, it's corrupted or we haven't
+		   opened it for some reason */
+		map->write_atomic = TRUE;
+	}
+	if (!map->write_atomic) {
+		if (mail_index_try_lock_exclusive(index, &lock_id) <= 0) {
+			/* locking failed, rewrite */
+			map->write_atomic = TRUE;
+		}
+	}
+
+	if (map->write_atomic) {
+		if (!MAIL_INDEX_IS_IN_MEMORY(index)) {
+			if (mail_index_recreate(index) < 0) {
+				mail_index_move_to_memory(index);
+				return;
+			}
+		}
+	} else {
+		if (mail_index_write_map_over(index) < 0) {
+			mail_index_set_error(index,
+				"pwrite_full(%s) failed: %m", index->filepath);
+			mail_index_set_inconsistent(index);
+		}
+		mail_index_unlock(index, lock_id);
+	}
+
+	index->last_read_log_file_index_offset = hdr->log_file_index_int_offset;
+
+	map->write_atomic = FALSE;
+	map->write_seq_first = map->write_seq_last = 0;
+	map->write_base_header = FALSE;
+	map->write_ext_header = FALSE;
+
+	if (want_rotate &&
+	    hdr->log_file_index_int_offset == hdr->log_file_index_ext_offset &&
+	    hdr->log_file_index_int_offset == hdr->log_file_mailbox_offset)
+		(void)mail_transaction_log_rotate(index->log);
+}
+
+static void
+mail_index_sync_update_mailbox_offset(struct mail_index_sync_ctx *ctx)
+{
+	const struct mail_index_header *hdr = &ctx->index->map->hdr;
+	uint32_t seq;
+	uoff_t offset;
+
+	mail_transaction_log_view_get_prev_pos(ctx->view->log_view,
+					       &seq, &offset);
+	mail_transaction_log_set_mailbox_sync_pos(ctx->index->log, seq, offset);
+
+	/* This sync may have seen only external transactions, in which case
+	   it's not required to write the mailbox sync offset. Otherwise we
+	   must update the offset even if nothing else is going to be
+	   written. */
+	if (hdr->log_file_mailbox_offset != ctx->last_mailbox_offset)
+		ctx->ext_trans->log_updates = TRUE;
+}
+
 int mail_index_sync_commit(struct mail_index_sync_ctx **_ctx)
 {
         struct mail_index_sync_ctx *ctx = *_ctx;
 	struct mail_index *index = ctx->index;
-	const struct mail_index_header *hdr;
-	uint32_t seq;
+	unsigned int lock_id;
+	uint32_t seq, diff;
 	uoff_t offset;
+	bool want_rotate;
 	int ret = 0;
 
+	mail_index_sync_update_mailbox_offset(ctx);
 	if (mail_cache_need_compress(index->cache)) {
 		/* if cache compression fails, we don't really care.
 		   the cache offsets are updated only if the compression was
@@ -661,37 +798,27 @@
 		(void)mail_cache_compress(index->cache, ctx->ext_trans);
 	}
 
-	if (mail_index_transaction_commit(&ctx->ext_trans, &seq, &offset) < 0)
-		ret = -1;
-
-	if (mail_transaction_log_view_is_corrupted(ctx->view->log_view))
-		ret = -1;
-
-	/* we have had the transaction log locked since the beginning of sync,
-	   so only external changes could have been committed. write them to
-	   the index here as well. */
-	mail_transaction_log_get_head(index->log, &seq, &offset);
-
-	hdr = index->hdr;
-	if (ret == 0 && (hdr->log_file_seq != seq ||
-			 hdr->log_file_int_offset != offset ||
-			 hdr->log_file_ext_offset != offset ||
-			 ctx->sync_recent)) {
-		/* write all pending changes to index. */
-		if (mail_index_sync_set_log_view(ctx->view,
-						 hdr->log_file_seq,
-						 hdr->log_file_int_offset) < 0)
-			ret = -1;
-		else if (mail_index_sync_update_index(ctx, FALSE) < 0)
-			ret = -1;
+	if (mail_index_transaction_commit(&ctx->ext_trans, &seq, &offset) < 0) {
+		mail_index_sync_end(&ctx);
+		return -1;
 	}
 
-	if (ret == 0) {
-		index->sync_log_file_seq = index->map->hdr.log_file_seq;
-		index->sync_log_file_offset =
-			index->map->hdr.log_file_int_offset;
+	/* refresh the mapping with newly committed external transactions
+	   and the synced expunges. sync using file handler here so that the
+	   expunge handlers get called. */
+	if (mail_index_map(ctx->index, MAIL_INDEX_SYNC_HANDLER_FILE,
+			   &lock_id) <= 0) {
+		// FIXME: handle ret=0 specially?
+		// FIXME: do we really need to return failure?
+		ret = -1;
 	}
 
+	/* FIXME: create a better rule? */
+	want_rotate = mail_transaction_log_want_rotate(index->log);
+	diff = index->map->hdr.log_file_index_int_offset -
+		index->last_read_log_file_index_offset;
+	if (ret == 0 && (diff > 1024 || want_rotate))
+		mail_index_write(index, want_rotate);
 	mail_index_sync_end(_ctx);
 	return ret;
 }
--- a/src/lib-index/mail-index-view-private.h	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-view-private.h	Mon Jun 11 14:50:10 2007 +0300
@@ -75,11 +75,14 @@
 	unsigned int broken_counters:1;
 };
 
+struct mail_index_view *
+mail_index_view_open_with_map(struct mail_index *index,
+			      struct mail_index_map *map);
 void mail_index_view_clone(struct mail_index_view *dest,
 			   const struct mail_index_view *src);
 void mail_index_view_ref(struct mail_index_view *view);
 int mail_index_view_lock(struct mail_index_view *view);
-int mail_index_view_lock_head(struct mail_index_view *view, bool update_index);
+int mail_index_view_lock_head(struct mail_index_view *view);
 void mail_index_view_unref_maps(struct mail_index_view *view);
 void mail_index_view_add_hidden_transaction(struct mail_index_view *view,
 					    uint32_t log_file_seq,
--- a/src/lib-index/mail-index-view-sync.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-view-sync.c	Mon Jun 11 14:50:10 2007 +0300
@@ -23,7 +23,7 @@
 	unsigned int sync_map_update:1;
 };
 
-static void
+static int
 mail_transaction_log_sort_expunges(ARRAY_TYPE(seq_range) *expunges,
 				   const struct seq_range *src, size_t src_size)
 {
@@ -38,14 +38,15 @@
 	dest = array_get_modifiable(expunges, &dest_count);
 	if (dest_count == 0) {
 		array_append(expunges, src, src_size / sizeof(*src));
-		return;
+		return 0;
 	}
 
 	src_end = CONST_PTR_OFFSET(src, src_size);
 	for (i = 0; src != src_end; src++) {
 		/* src[] must be sorted. */
-		i_assert(src+1 == src_end || src->seq2 < src[1].seq1);
-		i_assert(src->seq1 <= src->seq2);
+		if (src->seq1 > src->seq2 ||
+		    (src+1 != src_end && src->seq2 >= src[1].seq1))
+			return -1;
 
 		for (; i < dest_count; i++) {
 			if (src->seq1 < dest[i].seq1)
@@ -84,6 +85,7 @@
 			i = first;
 		}
 	}
+	return 0;
 }
 
 static int view_sync_set_log_view_range(struct mail_index_view *view,
@@ -97,7 +99,7 @@
 					    view->log_file_seq,
 					    view->log_file_offset,
 					    hdr->log_file_seq,
-					    hdr->log_file_int_offset,
+					    hdr->log_file_index_int_offset,
 					    type_mask);
 	if (ret <= 0) {
 		if (ret == 0) {
@@ -132,7 +134,13 @@
 	while ((ret = mail_transaction_log_view_next(view->log_view,
 						     &hdr, &data, NULL)) > 0) {
 		i_assert((hdr->type & MAIL_TRANSACTION_EXPUNGE) != 0);
-		mail_transaction_log_sort_expunges(expunges_r, data, hdr->size);
+		if (mail_transaction_log_sort_expunges(expunges_r, data,
+						       hdr->size) < 0) {
+			mail_transaction_log_view_set_corrupted(view->log_view,
+				"Corrupted expunge record");
+			ret = -1;
+			break;
+		}
 	}
 
 	if (ret < 0) {
@@ -175,23 +183,15 @@
 
 	/* Keep log position so we know where to continue syncing */
 	map->hdr.log_file_seq = view->hdr.log_file_seq;
-	map->hdr.log_file_int_offset = view->hdr.log_file_int_offset;
-	map->hdr.log_file_ext_offset = view->hdr.log_file_ext_offset;
+	map->hdr.log_file_index_int_offset =
+		view->hdr.log_file_index_int_offset;
+	map->hdr.log_file_index_ext_offset =
+		view->hdr.log_file_index_ext_offset;
 
 	view->hdr = map->hdr;
 	buffer_write(map->hdr_copy_buf, 0, &map->hdr, sizeof(map->hdr));
 }
 
-#define MAIL_INDEX_VIEW_VISIBLE_FLAGS_MASK \
-	(MAIL_INDEX_SYNC_TYPE_FLAGS | \
-	 MAIL_INDEX_SYNC_TYPE_KEYWORD_RESET | \
-	 MAIL_INDEX_SYNC_TYPE_KEYWORD_ADD | MAIL_INDEX_SYNC_TYPE_KEYWORD_REMOVE)
-
-#define MAIL_TRANSACTION_VISIBLE_SYNC_MASK \
-	(MAIL_TRANSACTION_EXPUNGE | MAIL_TRANSACTION_APPEND | \
-	 MAIL_TRANSACTION_FLAG_UPDATE | MAIL_TRANSACTION_KEYWORD_UPDATE | \
-	 MAIL_TRANSACTION_KEYWORD_RESET)
-
 #ifdef DEBUG
 static void mail_index_view_check(struct mail_index_view *view)
 {
@@ -228,10 +228,20 @@
 }
 #endif
 
+#define MAIL_INDEX_VIEW_VISIBLE_FLAGS_MASK \
+	(MAIL_INDEX_SYNC_TYPE_FLAGS | \
+	 MAIL_INDEX_SYNC_TYPE_KEYWORD_RESET | \
+	 MAIL_INDEX_SYNC_TYPE_KEYWORD_ADD | MAIL_INDEX_SYNC_TYPE_KEYWORD_REMOVE)
+
+#define MAIL_TRANSACTION_VISIBLE_SYNC_MASK \
+	(MAIL_TRANSACTION_EXPUNGE | MAIL_TRANSACTION_APPEND | \
+	 MAIL_TRANSACTION_FLAG_UPDATE | MAIL_TRANSACTION_KEYWORD_UPDATE | \
+	 MAIL_TRANSACTION_KEYWORD_RESET)
+
 #define VIEW_IS_SYNCED_TO_SAME(view, hdr) \
 	((hdr)->log_file_seq == (view)->log_file_seq && \
-	 (hdr)->log_file_int_offset == (view)->log_file_offset && \
-	 (hdr)->log_file_ext_offset == (view)->log_file_offset && \
+	 (hdr)->log_file_index_int_offset == (view)->log_file_offset && \
+	 (hdr)->log_file_index_ext_offset == (view)->log_file_offset && \
 	 (!array_is_created(&view->syncs_done) || \
 	  array_count(&view->syncs_done) == 0))
 
@@ -255,7 +265,7 @@
 	i_assert(!view->syncing);
 	i_assert(view->transactions == 0);
 
-	if (mail_index_view_lock_head(view, TRUE) < 0)
+	if (mail_index_view_lock_head(view) < 0)
 		return -1;
 
 	if ((sync_mask & MAIL_INDEX_SYNC_TYPE_EXPUNGE) != 0) {
@@ -341,8 +351,7 @@
 #endif
 		}
 
-		map = mail_index_map_clone(view->map,
-					   view->map->hdr.record_size);
+		map = mail_index_map_clone(view->map);
 		view->map->records_count = old_records_count;
 		mail_index_unmap(view->index, &view->map);
 		view->map = map;
@@ -461,7 +470,7 @@
 		   at the end of view sync we'll update the ext_offset in the
 		   header so that this check always becomes FALSE for
 		   subsequent syncs. */
-		synced_to_map = offset < view->hdr.log_file_ext_offset &&
+		synced_to_map = offset < view->hdr.log_file_index_ext_offset &&
 			seq == view->hdr.log_file_seq &&
 			(ctx->hdr->type & MAIL_TRANSACTION_EXTERNAL) != 0;
 
@@ -651,8 +660,7 @@
 	mail_index_view_sync_clean_log_syncs(ctx, &view->syncs_done, TRUE);
 	mail_index_view_sync_clean_log_syncs(ctx, &view->syncs_hidden, FALSE);
 
-	if (!ctx->last_read && ctx->hdr != NULL &&
-	    ctx->data_offset != ctx->hdr->size) {
+	if (!ctx->last_read) {
 		/* we didn't sync everything */
 		view->inconsistent = TRUE;
 	}
@@ -668,18 +676,25 @@
 			i_assert(view->log_file_seq >
 				 view->map->hdr.log_file_seq);
 			view->map->hdr.log_file_seq = view->log_file_seq;
-			view->map->hdr.log_file_ext_offset =
+			view->map->hdr.log_file_index_ext_offset =
 				view->log_file_offset;
 		} else {
 			i_assert(view->log_file_offset >=
-				 view->map->hdr.log_file_int_offset);
+				 view->map->hdr.log_file_index_int_offset);
 			if (view->log_file_offset >
-			    view->map->hdr.log_file_ext_offset) {
-				view->map->hdr.log_file_ext_offset =
+			    view->map->hdr.log_file_index_ext_offset) {
+				view->map->hdr.log_file_index_ext_offset =
 					view->log_file_offset;
 			}
 		}
-		view->map->hdr.log_file_int_offset = view->log_file_offset;
+		view->map->hdr.log_file_index_int_offset =
+			view->log_file_offset;
+		buffer_write(view->map->hdr_copy_buf, 0,
+			     &view->map->hdr, sizeof(view->map->hdr));
+	} else {
+		i_assert(view->inconsistent ||
+			 view->log_file_offset >=
+			 view->map->hdr.log_file_index_int_offset);
 	}
 	view->hdr = view->map->hdr;
 
--- a/src/lib-index/mail-index-view.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index-view.c	Mon Jun 11 14:50:10 2007 +0300
@@ -70,10 +70,8 @@
 }
 #endif
 
-int mail_index_view_lock_head(struct mail_index_view *view, bool update_index)
+int mail_index_view_lock_head(struct mail_index_view *view)
 {
-	unsigned int lock_id;
-
 #ifdef DEBUG
 	mail_index_view_check_nextuid(view);
 #endif
@@ -83,30 +81,11 @@
 		return 0;
 
 	if (!mail_index_is_locked(view->index, view->lock_id)) {
-		if (mail_index_lock_shared(view->index, update_index,
-					   &view->lock_id) < 0)
-			return -1;
-
-		if (mail_index_map(view->index, FALSE) <= 0) {
+		if (mail_index_lock_shared(view->index, &view->lock_id) < 0) {
 			view->inconsistent = TRUE;
 			return -1;
 		}
-
-		if (view->index->indexid != view->indexid) {
-			/* index was rebuilt */
-			view->inconsistent = TRUE;
-			return -1;
-		}
-	} else if (update_index) {
-		if (mail_index_lock_shared(view->index, TRUE, &lock_id) < 0)
-			return -1;
-
-		mail_index_unlock(view->index, view->lock_id);
-		view->lock_id = lock_id;
 	}
-
-	i_assert(view->index->lock_type != F_UNLCK);
-
 	return 0;
 }
 
@@ -123,7 +102,7 @@
 		return 0;
 	}
 
-	return mail_index_view_lock_head(view, FALSE);
+	return mail_index_view_lock_head(view);
 }
 
 void mail_index_view_unlock(struct mail_index_view *view)
@@ -242,7 +221,7 @@
 	}
 
 	/* look up the record from head mapping. it may contain some changes. */
-	if (mail_index_view_lock_head(view, FALSE) < 0)
+	if (mail_index_view_lock_head(view) < 0)
 		return -1;
 
 	/* start looking up from the same sequence as in the old view.
@@ -467,7 +446,7 @@
 	/* if we have a mapping, the view where it's from is already locked */
 	if (map == NULL) {
 		/* no mapping given, use head mapping */
-		if (mail_index_view_lock_head(view, FALSE) < 0)
+		if (mail_index_view_lock_head(view) < 0)
 			return -1;
 
 		map = view->index->map;
@@ -694,12 +673,12 @@
 	_view_get_header_ext
 };
 
-struct mail_index_view *mail_index_view_open(struct mail_index *index)
+struct mail_index_view *
+mail_index_view_open_with_map(struct mail_index *index,
+			      struct mail_index_map *map)
 {
 	struct mail_index_view *view;
 
-	i_assert(index->map != NULL);
-
 	view = i_new(struct mail_index_view, 1);
 	view->refcount = 1;
 	view->v = view_vfuncs;
@@ -707,21 +686,24 @@
 	view->log_view = mail_transaction_log_view_open(index->log);
 
 	view->indexid = index->indexid;
-	view->map = index->map;
+	view->map = map;
 	view->map->refcount++;
 
 	view->hdr = view->map->hdr;
 
 	view->log_file_seq = view->map->hdr.log_file_seq;
-	view->log_file_offset =
-		I_MIN(view->map->hdr.log_file_int_offset,
-		      view->map->hdr.log_file_ext_offset);
+	view->log_file_offset = view->map->hdr.log_file_index_int_offset;
 
 	i_array_init(&view->module_contexts,
 		     I_MIN(5, mail_index_module_register.id));
 	return view;
 }
 
+struct mail_index_view *mail_index_view_open(struct mail_index *index)
+{
+	return mail_index_view_open_with_map(index, index->map);
+}
+
 const struct mail_index_ext *
 mail_index_view_get_ext(struct mail_index_view *view, uint32_t ext_id)
 {
--- a/src/lib-index/mail-index.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index.c	Mon Jun 11 14:50:10 2007 +0300
@@ -47,6 +47,7 @@
 	index->keywords_hash =
 		hash_create(default_pool, index->keywords_pool, 0,
 			    strcase_hash, (hash_cmp_callback_t *)strcasecmp);
+	index->log = mail_transaction_log_alloc(index);
 	return index;
 }
 
@@ -57,6 +58,7 @@
 	*_index = NULL;
 	mail_index_close(index);
 
+	mail_transaction_log_free(&index->log);
 	hash_destroy(index->keywords_hash);
 	pool_unref(index->extension_pool);
 	pool_unref(index->keywords_pool);
@@ -321,99 +323,12 @@
 	return &index->keywords;
 }
 
-bool mail_index_is_ext_synced(struct mail_transaction_log_view *log_view,
-			      struct mail_index_map *map)
-{
-	uint32_t prev_seq;
-	uoff_t prev_offset;
-
-	mail_transaction_log_view_get_prev_pos(log_view, &prev_seq,
-					       &prev_offset);
-	return prev_seq < map->hdr.log_file_seq ||
-		(prev_seq == map->hdr.log_file_seq &&
-		 prev_offset < map->hdr.log_file_ext_offset);
-}
-
-int mail_index_read_header(struct mail_index *index,
-			   void *buf, size_t buf_size, size_t *pos_r)
-{
-	size_t pos;
-	int ret;
-
-	memset(buf, 0, sizeof(struct mail_index_header));
-
-        /* try to read the whole header, but it's not necessarily an error to
-	   read less since the older versions of the index format could be
-	   smaller. Request reading up to buf_size, but accept if we only got
-	   the header. */
-        pos = 0;
-	do {
-		ret = pread(index->fd, PTR_OFFSET(buf, pos),
-			    buf_size - pos, pos);
-		if (ret > 0)
-			pos += ret;
-	} while (ret > 0 && pos < sizeof(struct mail_index_header));
-
-	*pos_r = pos;
-	return ret;
-}
-
-int mail_index_get_latest_header(struct mail_index *index,
-				 struct mail_index_header *hdr_r)
-{
-	size_t pos;
-	unsigned int i;
-	int ret;
-
-	if (MAIL_INDEX_IS_IN_MEMORY(index)) {
-		*hdr_r = *index->hdr;
-		return TRUE;
-	}
-
-	if (!index->mmap_disable) {
-		ret = mail_index_map(index, FALSE);
-		if (ret > 0)
-			*hdr_r = *index->hdr;
-		else
-			memset(hdr_r, 0, sizeof(*hdr_r));
-		return ret;
-	}
-
-	for (i = 0;; i++) {
-		ret = mail_index_read_header(index, hdr_r, sizeof(*hdr_r),
-					     &pos);
-		if (ret <= 0 || errno != ESTALE ||
-		    i == MAIL_INDEX_ESTALE_RETRY_COUNT)
-			break;
-
-		/* ESTALE - reopen index file */
-                if (close(index->fd) < 0)
-			mail_index_set_syscall_error(index, "close()");
-		index->fd = -1;
-
-		ret = mail_index_try_open_only(index);
-		if (ret <= 0) {
-			if (ret == 0) {
-				/* the file was lost */
-				errno = ENOENT;
-				mail_index_set_syscall_error(index, "open()");
-			}
-			return -1;
-		}
-	}
-
-	if (ret < 0)
-		mail_index_set_syscall_error(index, "pread_full()");
-	return ret;
-}
-
 int mail_index_try_open_only(struct mail_index *index)
 {
+	i_assert(index->fd == -1);
 	i_assert(!MAIL_INDEX_IS_IN_MEMORY(index));
 
-        /* Note that our caller must close index->fd by itself.
-           mail_index_reopen() for example wants to revert back to old
-           index file if opening the new one fails. */
+        /* Note that our caller must close index->fd by itself. */
 	index->fd = nfs_safe_open(index->filepath, O_RDWR);
 	index->readonly = FALSE;
 
@@ -433,44 +348,24 @@
 }
 
 static int
-mail_index_try_open(struct mail_index *index, unsigned int *lock_id_r)
+mail_index_try_open(struct mail_index *index)
 {
 	unsigned int lock_id;
 	int ret;
 
         i_assert(index->fd == -1);
-	i_assert(index->lock_type == F_UNLCK);
-
-	if (lock_id_r != NULL)
-		*lock_id_r = 0;
 
 	if (MAIL_INDEX_IS_IN_MEMORY(index))
 		return 0;
 
-	ret = mail_index_try_open_only(index);
-	if (ret <= 0)
-		return ret;
+	ret = mail_index_map(index, MAIL_INDEX_SYNC_HANDLER_HEAD, &lock_id);
+	mail_index_unlock(index, lock_id);
 
-	if (mail_index_lock_shared(index, FALSE, &lock_id) < 0) {
-		(void)close(index->fd);
-		index->fd = -1;
-		return -1;
-	}
-	ret = mail_index_map(index, FALSE);
 	if (ret == 0) {
 		/* it's corrupted - recreate it */
-		mail_index_unlock(index, lock_id);
-		if (lock_id_r != NULL)
-			*lock_id_r = 0;
-
-		i_assert(index->file_lock == NULL);
-		(void)close(index->fd);
+		if (close(index->fd) < 0)
+			mail_index_set_syscall_error(index, "close()");
 		index->fd = -1;
-	} else {
-		if (lock_id_r != NULL)
-			*lock_id_r = lock_id;
-		else
-			mail_index_unlock(index, lock_id);
 	}
 	return ret;
 }
@@ -480,26 +375,19 @@
 {
 	size_t hdr_size;
 
+	// FIXME: this whole function should go away
+	if (index->fd == -1)
+		return -1;
+
 	hdr_size = I_MIN(sizeof(*hdr), hdr->base_header_size);
 
-	if (!MAIL_INDEX_MAP_IS_IN_MEMORY(index->map)) {
-		memcpy(index->map->mmap_base, hdr, hdr_size);
-		if (msync(index->map->mmap_base, hdr_size, MS_SYNC) < 0)
-			return mail_index_set_syscall_error(index, "msync()");
-		index->map->hdr = *hdr;
-	} else {
-		if (!MAIL_INDEX_IS_IN_MEMORY(index)) {
-			if (pwrite_full(index->fd, hdr, hdr_size, 0) < 0) {
-				mail_index_set_syscall_error(index,
-							     "pwrite_full()");
-				return -1;
-			}
-		}
-
-		index->map->hdr = *hdr;
-		buffer_write(index->map->hdr_copy_buf, 0, hdr, hdr_size);
+	if (pwrite_full(index->fd, hdr, hdr_size, 0) < 0) {
+		mail_index_set_syscall_error(index, "pwrite_full()");
+		return -1;
 	}
 
+	index->map->hdr = *hdr;
+	buffer_write(index->map->hdr_copy_buf, 0, hdr, hdr_size);
 	return 0;
 }
 
@@ -526,192 +414,46 @@
 	return fd;
 }
 
-static int mail_index_create(struct mail_index *index,
-			     struct mail_index_header *hdr)
+static bool mail_index_open_files(struct mail_index *index,
+				  enum mail_index_open_flags flags)
 {
-	const char *path;
-	uint32_t seq;
-	uoff_t offset;
 	int ret;
-
-	i_assert(!MAIL_INDEX_IS_IN_MEMORY(index));
-	i_assert(index->lock_type == F_UNLCK);
-
-	/* log file lock protects index creation */
-	if (mail_transaction_log_sync_lock(index->log, &seq, &offset) < 0)
-		return -1;
-
-	ret = mail_index_try_open(index, NULL);
-	if (ret != 0) {
-		mail_transaction_log_sync_unlock(index->log);
-		return ret < 0 ? -1 : 0;
-	}
-
-	/* mark the existing log file as synced */
-	hdr->log_file_seq = seq;
-	hdr->log_file_int_offset = offset;
-	hdr->log_file_ext_offset = offset;
+	bool created = FALSE;
 
-	/* create it fully in index.tmp first */
-	index->fd = mail_index_create_tmp_file(index, &path);
-	if (index->fd == -1)
-		ret = -1;
-	else if (write_full(index->fd, hdr, sizeof(*hdr)) < 0) {
-		mail_index_file_set_syscall_error(index, path, "write_full()");
-		ret = -1;
-	} else {
-		index->lock_type = F_WRLCK;
-		ret = mail_index_map(index, FALSE);
-		index->lock_type = F_UNLCK;
-	}
-
+	ret = mail_transaction_log_open(index->log);
 	if (ret == 0) {
-		/* it's corrupted even while we just created it,
-		   should never happen unless someone pokes the file directly */
-		mail_index_set_error(index,
-			"Newly created index file is corrupted: %s", path);
-		ret = -1;
-	}
+		if ((flags & MAIL_INDEX_OPEN_FLAG_CREATE) == 0)
+			return FALSE;
 
-	if (ret < 0) {
-		if (unlink(path) < 0 && errno != ENOENT) {
-			mail_index_file_set_syscall_error(index, path,
-							  "unlink()");
-		}
-	} else {
-		/* make it visible to others */
-		if (rename(path, index->filepath) < 0) {
-			mail_index_set_error(index, "rename(%s, %s) failed: %m",
-					     path, index->filepath);
-			ret = -1;
+		ret = mail_transaction_log_create(index->log);
+		created = TRUE;
+	}
+	if (ret >= 0) {
+		ret = created ? 0 : mail_index_try_open(index);
+		if (ret == 0) {
+			/* doesn't exist / corrupted */
+			index->map = mail_index_map_alloc(index);
+			index->hdr = &index->map->hdr;
 		}
 	}
-
-	mail_transaction_log_sync_unlock(index->log);
-	return ret;
-}
-
-static void mail_index_header_init(struct mail_index_header *hdr)
-{
-	time_t now = time(NULL);
-
-	i_assert((sizeof(*hdr) % sizeof(uint64_t)) == 0);
-
-	memset(hdr, 0, sizeof(*hdr));
-
-	hdr->major_version = MAIL_INDEX_MAJOR_VERSION;
-	hdr->minor_version = MAIL_INDEX_MINOR_VERSION;
-	hdr->base_header_size = sizeof(*hdr);
-	hdr->header_size = sizeof(*hdr);
-	hdr->record_size = sizeof(struct mail_index_record);
-
-#ifndef WORDS_BIGENDIAN
-	hdr->compat_flags |= MAIL_INDEX_COMPAT_LITTLE_ENDIAN;
-#endif
-
-	hdr->indexid = now;
-
-	hdr->next_uid = 1;
-}
-
-void mail_index_create_in_memory(struct mail_index *index,
-				 const struct mail_index_header *hdr)
-{
-        struct mail_index_header tmp_hdr;
-	struct mail_index_map tmp_map;
-
-	if (hdr == NULL) {
-		mail_index_header_init(&tmp_hdr);
-		hdr = &tmp_hdr;
-	}
-
-	memset(&tmp_map, 0, sizeof(tmp_map));
-	tmp_map.hdr = *hdr;
-	tmp_map.hdr_base = hdr;
-
-	/* a bit kludgy way to do this, but it initializes everything
-	   nicely and correctly */
-	index->map = mail_index_map_clone(&tmp_map, hdr->record_size);
-	index->hdr = &index->map->hdr;
-}
+	if (ret < 0) {
+		/* open/create failed, fallback to in-memory indexes */
+		if ((flags & MAIL_INDEX_OPEN_FLAG_CREATE) == 0)
+			return FALSE;
 
-/* returns -1 = error, 0 = won't create, 1 = ok */
-static int mail_index_open_files(struct mail_index *index,
-				 enum mail_index_open_flags flags)
-{
-	struct mail_index_header hdr;
-	unsigned int lock_id = 0;
-	int ret;
-	bool create = FALSE, created = FALSE;
-
-	ret = mail_index_try_open(index, &lock_id);
-	if (ret > 0)
-		hdr = *index->hdr;
-	else if (ret == 0) {
-		/* doesn't exist, or corrupted */
-		if ((flags & MAIL_INDEX_OPEN_FLAG_CREATE) == 0 &&
-		    !MAIL_INDEX_IS_IN_MEMORY(index))
-			return 0;
-		mail_index_header_init(&hdr);
-		index->hdr = &hdr;
-		create = TRUE;
-	} else if (ret < 0)
-		return -1;
-
-	index->indexid = hdr.indexid;
-
-	index->log = create ?
-		mail_transaction_log_create(index) :
-		mail_transaction_log_open_or_create(index);
-	if (index->log == NULL) {
-		if (ret == 0)
-			index->hdr = NULL;
-		return -1;
-	}
-
-	if (index->map == NULL) {
-		mail_index_header_init(&hdr);
-		index->hdr = &hdr;
-
-		/* index->indexid may be updated by transaction log opening,
-		   in case someone else had already created a new log file */
-		hdr.indexid = index->indexid;
-
-		if (lock_id != 0) {
-			mail_index_unlock(index, lock_id);
-			lock_id = 0;
-		}
-
-		if (!MAIL_INDEX_IS_IN_MEMORY(index)) {
-			if (mail_index_create(index, &hdr) < 0) {
-				/* fallback to in-memory index */
-				mail_index_move_to_memory(index);
-				mail_index_create_in_memory(index, &hdr);
-			}
-		} else {
-			mail_index_create_in_memory(index, &hdr);
-		}
-		created = TRUE;
-	}
-	i_assert(index->hdr != &hdr);
-
-	if (lock_id == 0) {
-		if (mail_index_lock_shared(index, FALSE, &lock_id) < 0)
-			return -1;
-
+		if (mail_index_move_to_memory(index) < 0)
+			return FALSE;
 	}
 
 	index->cache = created ? mail_cache_create(index) :
 		mail_cache_open_or_create(index);
-
-	mail_index_unlock(index, lock_id);
-	return 1;
+	return TRUE;
 }
 
 int mail_index_open(struct mail_index *index, enum mail_index_open_flags flags,
 		    enum file_lock_method lock_method)
 {
-	int i = 0, ret;
+	int i = 0, ret = 1;
 
 	if (index->opened) {
 		if (index->hdr != NULL &&
@@ -719,6 +461,7 @@
 			/* corrupted, reopen files */
                         mail_index_close(index);
 		} else {
+			i_assert(index->map != NULL);
 			return 1;
 		}
 	}
@@ -745,18 +488,13 @@
 			(flags & MAIL_INDEX_OPEN_FLAG_FSYNC_DISABLE) != 0;
 		index->lock_method = lock_method;
 
-		/* don't even bother to handle dotlocking without mmap being
-		   disabled. that combination simply doesn't make any sense */
-		if (lock_method == FILE_LOCK_METHOD_DOTLOCK &&
-		    !index->mmap_disable) {
-			i_fatal("lock_method=dotlock and mmap_disable=no "
-				"combination isn't supported. "
-				"You don't _really_ want it anyway.");
+		i_assert(!index->opened);
+		if (!mail_index_open_files(index, flags)) {
+			/* doesn't exist and create flag not used */
+			ret = 0;
+			break;
 		}
-
-		ret = mail_index_open_files(index, flags);
-		if (ret <= 0)
-			break;
+		i_assert(index->map != NULL);
 
 		index->opened = TRUE;
 		if (index->fsck) {
@@ -764,8 +502,10 @@
 			ret = mail_index_fsck(index);
 			if (ret == 0) {
 				/* completely broken, reopen */
-				if (i++ < 3)
+				if (i++ < 3) {
+					mail_index_close(index);
 					continue;
+				}
 				/* too many tries */
 				ret = -1;
 			}
@@ -776,17 +516,14 @@
 	if (ret <= 0)
 		mail_index_close(index);
 
+	i_assert(ret <= 0 || index->map != NULL);
 	return ret;
 }
 
-void mail_index_close(struct mail_index *index)
+static void mail_index_close_file(struct mail_index *index)
 {
-	if (index->log != NULL)
-		mail_transaction_log_close(&index->log);
 	if (index->map != NULL)
 		mail_index_unmap(index, &index->map);
-	if (index->cache != NULL)
-		mail_cache_free(&index->cache);
 	if (index->file_lock != NULL)
 		file_lock_free(&index->file_lock);
 
@@ -796,125 +533,59 @@
 		index->fd = -1;
 	}
 
+	if (index->lock_type == F_RDLCK)
+		index->lock_type = F_UNLCK;
+	index->lock_id += 2;
+	index->shared_lock_count = 0;
+}
+
+void mail_index_close(struct mail_index *index)
+{
+	mail_transaction_log_close(index->log);
+	if (index->cache != NULL)
+		mail_cache_free(&index->cache);
+
 	i_free_and_null(index->filepath);
 
 	index->indexid = 0;
 	index->opened = FALSE;
 }
 
-int mail_index_reopen(struct mail_index *index, int fd)
-{
-	struct mail_index_map *old_map;
-	struct file_lock *old_file_lock;
-	unsigned int old_shared_locks, old_lock_id, lock_id = 0;
-	int ret, old_fd, old_lock_type;
-
-	i_assert(!MAIL_INDEX_IS_IN_MEMORY(index));
-	i_assert(index->excl_lock_count == 0);
-
-	old_map = index->map;
-	if (old_map != NULL)
-		old_map->refcount++;
-	old_fd = index->fd;
-
-	/* new file, new locks. the old fd can keep its locks, they don't
-	   matter anymore as no-one's going to modify the file. */
-	old_lock_type = index->lock_type;
-	old_lock_id = index->lock_id;
-	old_shared_locks = index->shared_lock_count;
-	old_file_lock = index->file_lock;
-
-	if (index->lock_type == F_RDLCK)
-		index->lock_type = F_UNLCK;
-	index->lock_id += 2;
-	index->shared_lock_count = 0;
-	index->file_lock = NULL;
-
-	if (fd != -1) {
-		index->fd = fd;
-		ret = 0;
-	} else {
-		ret = mail_index_try_open_only(index);
-		if (ret > 0)
-			ret = mail_index_lock_shared(index, FALSE, &lock_id);
-		else if (ret == 0) {
-			/* index file is lost */
-			ret = -1;
-		}
-	}
-
-	if (ret == 0) {
-		/* read the new mapping. note that with mmap_disable we want
-		   to keep the old mapping in index->map so we can update it
-		   by reading transaction log. */
-		if (mail_index_map(index, TRUE) <= 0)
-			ret = -1;
-	}
-
-	if (lock_id != 0)
-		mail_index_unlock(index, lock_id);
-
-	if (ret == 0) {
-		if (old_map != NULL)
-			mail_index_unmap(index, &old_map);
-		if (old_file_lock != NULL)
-			file_lock_free(&old_file_lock);
-		if (close(old_fd) < 0)
-			mail_index_set_syscall_error(index, "close()");
-	} else {
-		if (index->map != NULL)
-			mail_index_unmap(index, &index->map);
-
-		if (index->fd != -1) {
-			if (close(index->fd) < 0)
-				mail_index_set_syscall_error(index, "close()");
-		}
-
-		index->map = old_map;
-		index->hdr = &index->map->hdr;
-		index->fd = old_fd;
-		index->file_lock = old_file_lock;
-		index->lock_type = old_lock_type;
-		index->lock_id = old_lock_id;
-		index->shared_lock_count = old_shared_locks;
-	}
-	return ret;
-}
-
-int mail_index_reopen_if_needed(struct mail_index *index)
+int mail_index_reopen_if_changed(struct mail_index *index)
 {
 	struct stat st1, st2;
 
+	i_assert(index->excl_lock_count == 0);
+
 	if (MAIL_INDEX_IS_IN_MEMORY(index))
 		return 0;
 
+	if (index->fd == -1)
+		return mail_index_try_open_only(index);
+
 	if (fstat(index->fd, &st1) < 0) {
-		if (errno == ESTALE) {
-			/* deleted, reopen */
-			if (mail_index_reopen(index, -1) < 0)
-				return -1;
-			return 1;
-		}
-		return mail_index_set_syscall_error(index, "fstat()");
+		if (errno != ESTALE)
+			return mail_index_set_syscall_error(index, "fstat()");
+		/* deleted/recreated, reopen */
+		return mail_index_try_open_only(index);
 	}
 	if (nfs_safe_stat(index->filepath, &st2) < 0) {
-		mail_index_set_syscall_error(index, "stat()");
-		if (errno != ENOENT)
-			return -1;
+		if (errno == ENOENT)
+			return 0;
 
-		/* lost it? recreate later */
-		mail_index_mark_corrupted(index);
-		return -1;
+		return mail_index_set_syscall_error(index, "stat()");
 	}
 
-	if (st1.st_ino != st2.st_ino ||
-	    !CMP_DEV_T(st1.st_dev, st2.st_dev)) {
-		if (mail_index_reopen(index, -1) < 0)
-			return -1;
+	if (st1.st_ino == st2.st_ino && CMP_DEV_T(st1.st_dev, st2.st_dev)) {
+		/* the same file */
 		return 1;
-	} else {
-		return 0;
 	}
+
+	/* new file, new locks. the old fd can keep its locks, they don't
+	   matter anymore as no-one's going to modify the file. */
+	mail_index_close_file(index);
+
+	return mail_index_try_open_only(index);
 }
 
 int mail_index_refresh(struct mail_index *index)
@@ -931,19 +602,7 @@
 		return 0;
 	}
 
-	if (!index->mmap_disable) {
-		/* reopening is all we need */
-		return mail_index_reopen_if_needed(index);
-	}
-
-	i_assert(!index->mapping);
-
-	/* mail_index_map() simply reads latest changes from transaction log,
-	   which makes us fully refreshed. */
-	if (mail_index_lock_shared(index, TRUE, &lock_id) < 0)
-		return -1;
-
-	ret = mail_index_map(index, FALSE);
+	ret = mail_index_map(index, MAIL_INDEX_SYNC_HANDLER_HEAD, &lock_id);
 	mail_index_unlock(index, lock_id);
 	return ret <= 0 ? -1 : 0;
 }
@@ -1000,8 +659,7 @@
 
 	/* move index map to memory */
 	if (!MAIL_INDEX_MAP_IS_IN_MEMORY(index->map)) {
-		map = mail_index_map_clone(index->map,
-					   index->map->hdr.record_size);
+		map = mail_index_map_clone(index->map);
 		mail_index_unmap(index, &index->map);
 		index->map = map;
 		index->hdr = &map->hdr;
--- a/src/lib-index/mail-index.h	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-index.h	Mon Jun 11 14:50:10 2007 +0300
@@ -5,7 +5,7 @@
 #include "seq-range-array.h"
 
 #define MAIL_INDEX_MAJOR_VERSION 7
-#define MAIL_INDEX_MINOR_VERSION 0
+#define MAIL_INDEX_MINOR_VERSION 1
 
 #define MAIL_INDEX_HEADER_MIN_SIZE 120
 
@@ -71,11 +71,9 @@
 	uint32_t first_unseen_uid_lowwater;
 	uint32_t first_deleted_uid_lowwater;
 
-	/* We have internal and external sync offsets. External changes are
-	   synced into index somewhat more often, so int_offset <= ext_offset */
 	uint32_t log_file_seq;
-	uint32_t log_file_int_offset;
-	uint32_t log_file_ext_offset;
+	uint32_t log_file_index_int_offset;
+	uint32_t log_file_index_ext_offset;
 
 	uint64_t sync_size;
 	uint32_t sync_stamp;
@@ -83,6 +81,8 @@
 	/* daily first UIDs that have been added to index. */
 	uint32_t day_stamp;
 	uint32_t day_first_uid[8];
+
+	uint32_t log_file_mailbox_offset;
 };
 
 struct mail_index_record {
@@ -172,8 +172,9 @@
 bool mail_index_view_is_inconsistent(struct mail_index_view *view);
 
 /* Transaction has to be opened to be able to modify index. You can have
-   multiple transactions open simultaneously. Note that committed transactions
-   won't show up until you've synchronized mailbox (mail_index_sync_begin).
+   multiple transactions open simultaneously. Committed transactions won't
+   show up until you've synchronized the view. Expunges won't show up until
+   you've synchronized the mailbox (mail_index_sync_begin).
 
    If transaction is marked as hidden, the changes won't be listed when the
    view is synchronized. Expunges can't be hidden.
@@ -203,12 +204,11 @@
 struct mail_index_view *
 mail_index_transaction_open_updated_view(struct mail_index_transaction *t);
 
-/* Begin synchronizing mailbox with index file. This call locks the index
-   exclusively against other modifications. Returns 1 if ok, -1 if error.
+/* Begin synchronizing mailbox with index file. Returns 1 if ok, -1 if error.
 
    If log_file_seq is not (uint32_t)-1 and index is already synchronized up
-   to given log_file_offset, the synchronization isn't started and this
-   function returns 0. This should be done when you wish to sync your previous
+   to the given log_file_offset, the synchronization isn't started and this
+   function returns 0. This should be done when you wish to sync your committed
    transaction instead of doing a full mailbox synchronization.
 
    mail_index_sync_next() returns all changes from previously committed
@@ -217,7 +217,7 @@
    sync types, then they might). You must go through all of them and update
    the mailbox accordingly.
 
-   None of the changes actually show up in index until after successful
+   None of the changes actually show up in the index until after a successful
    mail_index_sync_commit().
 
    Returned sequence numbers describe the mailbox state at the beginning of
--- a/src/lib-index/mail-transaction-log-append.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-transaction-log-append.c	Mon Jun 11 14:50:10 2007 +0300
@@ -1,7 +1,6 @@
 /* Copyright (C) 2003-2004 Timo Sirainen */
 
 #include "lib.h"
-#include "ioloop.h"
 #include "array.h"
 #include "buffer.h"
 #include "write-full.h"
@@ -14,6 +13,9 @@
 	struct mail_transaction_log_file *file;
 	struct mail_index_transaction *trans;
 	buffer_t *output;
+
+	uint32_t first_append_size;
+	bool sync_includes_this;
 };
 
 static void log_append_buffer(struct log_append_context *ctx,
@@ -41,10 +43,10 @@
 					       (hdr_buf == NULL ? 0 :
 						hdr_buf->used));
 	if (!MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(ctx->file) &&
-	    ctx->file->first_append_size == 0) {
+	    ctx->first_append_size == 0) {
 		/* size will be written later once everything
 		   is in disk */
-		ctx->file->first_append_size = hdr_size;
+		ctx->first_append_size = hdr_size;
 	} else {
 		hdr.size = hdr_size;
 	}
@@ -87,7 +89,7 @@
 		return 0;
 	}
 
-	i_assert(file->first_append_size != 0);
+	i_assert(ctx->first_append_size != 0);
 	if (pwrite_full(file->fd, ctx->output->data, ctx->output->used,
 			file->sync_offset) < 0) {
 		/* write failure, fallback to in-memory indexes. */
@@ -97,15 +99,32 @@
 		return log_buffer_move_to_memory(ctx);
 	}
 
+	i_assert(!ctx->sync_includes_this ||
+		 file->sync_offset + ctx->output->used ==
+		 file->mailbox_sync_max_offset);
+
+	if (!file->log->index->fsync_disable && fdatasync(file->fd) < 0) {
+		mail_index_file_set_syscall_error(file->log->index,
+						  file->filepath,
+						  "fsync()");
+		return log_buffer_move_to_memory(ctx);
+	}
+
 	/* now that the whole transaction has been written, rewrite the first
 	   record's size so the transaction becomes visible */
-	if (pwrite_full(file->fd, &file->first_append_size,
+	if (pwrite_full(file->fd, &ctx->first_append_size,
 			sizeof(uint32_t), file->sync_offset) < 0) {
 		mail_index_file_set_syscall_error(file->log->index,
 						  file->filepath,
 						  "pwrite_full()");
 		return log_buffer_move_to_memory(ctx);
 	}
+
+	/* FIXME: when we're relying on O_APPEND and someone else wrote a
+	   transaction, we'll need to wait for it to commit its transaction.
+	   if it crashes before doing that, we'll need to overwrite it with
+	   a dummy record */
+
 	file->sync_offset += ctx->output->used;
 	return 0;
 }
@@ -344,16 +363,36 @@
 	}
 }
 
-#define LOG_WANT_ROTATE(file) \
-	(((file)->sync_offset > MAIL_TRANSACTION_LOG_ROTATE_MIN_SIZE && \
-	  (time_t)(file)->hdr.create_stamp < \
-	   ioloop_time - MAIL_TRANSACTION_LOG_ROTATE_TIME) || \
-	 ((file)->sync_offset > MAIL_TRANSACTION_LOG_ROTATE_MAX_SIZE))
+static void log_append_sync_offset_if_needed(struct log_append_context *ctx)
+{
+	struct mail_transaction_header_update *u;
+	buffer_t *buf;
+	uint32_t offset;
 
-#define ARE_ALL_TRANSACTIONS_IN_INDEX(log, idx_hdr) \
-	((log)->head->hdr.file_seq == (idx_hdr)->log_file_seq && \
-	 (log)->head->sync_offset == (idx_hdr)->log_file_int_offset && \
-	 (log)->head->sync_offset == (idx_hdr)->log_file_ext_offset)
+	if (ctx->file->mailbox_sync_max_offset == ctx->file->sync_offset) {
+		/* FIXME: when we remove exclusive log locking, we
+		   can't rely on this. then write non-changed offset + check
+		   real offset + rewrite the new offset if other transactions
+		   weren't written in the middle */
+		ctx->file->mailbox_sync_max_offset += ctx->output->used +
+			sizeof(struct mail_transaction_header) +
+			sizeof(*u) + sizeof(offset);
+		ctx->sync_includes_this = TRUE;
+	}
+	offset = ctx->file->mailbox_sync_max_offset;
+
+	if (ctx->file->mailbox_sync_saved_offset == offset)
+		return;
+
+	buf = buffer_create_static_hard(pool_datastack_create(),
+					sizeof(*u) + sizeof(offset));
+	u = buffer_append_space_unsafe(buf, sizeof(*u));
+	u->offset = offsetof(struct mail_index_header, log_file_mailbox_offset);
+	u->size = sizeof(offset);
+	buffer_append(buf, &offset, sizeof(offset));
+
+	log_append_buffer(ctx, buf, NULL, MAIL_TRANSACTION_HEADER_UPDATE);
+}
 
 static int
 mail_transaction_log_append_locked(struct mail_index_transaction *t,
@@ -364,10 +403,8 @@
 	struct mail_index *index;
 	struct mail_transaction_log *log;
 	struct mail_transaction_log_file *file;
-	struct mail_index_header idx_hdr;
 	struct log_append_context ctx;
 	uoff_t append_offset;
-	unsigned int lock_id;
 
 	index = mail_index_view_get_index(view);
 	log = index->log;
@@ -380,34 +417,7 @@
 			return -1;
 	}
 
-	if (LOG_WANT_ROTATE(log->head) &&
-	    ARE_ALL_TRANSACTIONS_IN_INDEX(log, index->hdr)) {
-		/* we might want to rotate, but check first that everything is
-		   synced in index. */
-		if (mail_index_lock_shared(index, TRUE, &lock_id) < 0)
-			return -1;
-
-		/* we need the latest log_file_*_offsets. It's important to
-		   use this function instead of mail_index_map() as it may
-		   have generated them by reading log files. */
-		if (mail_index_get_latest_header(index, &idx_hdr) <= 0) {
-			mail_index_unlock(index, lock_id);
-			return -1;
-		}
-		mail_index_unlock(index, lock_id);
-
-		if (ARE_ALL_TRANSACTIONS_IN_INDEX(log, &idx_hdr)) {
-			/* if rotation fails because there's not enough disk
-			   space, just continue. we'll probably move to
-			   in-memory indexes then. */
-			if (mail_transaction_log_rotate(log, TRUE) < 0 &&
-			    !index->nodiskspace)
-				return -1;
-		}
-	}
-
 	file = log->head;
-	file->first_append_size = 0;
 
 	if (file->sync_offset < file->buffer_offset)
 		file->sync_offset = file->buffer_offset;
@@ -456,6 +466,12 @@
 				  NULL, MAIL_TRANSACTION_HEADER_UPDATE);
 	}
 
+	/* NOTE: mailbox sync offset update must be the last change.
+	   it may update the sync offset to include this transaction, so it
+	   needs to know this transaction's size */
+	if (t->external)
+		log_append_sync_offset_if_needed(&ctx);
+
 	if (file->sync_offset < file->last_size) {
 		/* there is some garbage at the end of the transaction log
 		   (eg. previous write failed). remove it so reader doesn't
--- a/src/lib-index/mail-transaction-log-file.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-transaction-log-file.c	Mon Jun 11 14:50:10 2007 +0300
@@ -12,6 +12,7 @@
 #include "mail-transaction-log-private.h"
 
 #define LOG_PREFETCH 1024
+#define MEMORY_LOG_NAME "(in-memory transaction log file)"
 
 void
 mail_transaction_log_file_set_corrupted(struct mail_transaction_log_file *file,
@@ -21,13 +22,13 @@
 
 	file->hdr.indexid = 0;
 	if (!MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file)) {
-		if (pwrite_full(file->fd, &file->hdr.indexid,
+		/*FIXME:if (pwrite_full(file->fd, &file->hdr.indexid,
 				sizeof(file->hdr.indexid),
 				offsetof(struct mail_transaction_log_header,
 					 indexid)) < 0) {
 			mail_index_file_set_syscall_error(file->log->index,
 				file->filepath, "pwrite()");
-		}
+		}*/
 	}
 
 	va_start(va, fmt);
@@ -58,11 +59,14 @@
 	return file;
 }
 
-void mail_transaction_log_file_free(struct mail_transaction_log_file *file)
+void mail_transaction_log_file_free(struct mail_transaction_log_file **_file)
 {
+	struct mail_transaction_log_file *file = *_file;
 	struct mail_transaction_log_file **p;
 	int old_errno = errno;
 
+	*_file = NULL;
+
 	mail_transaction_log_file_unlock(file);
 
 	for (p = &file->log->files; *p != NULL; p = &(*p)->next) {
@@ -100,19 +104,21 @@
         errno = old_errno;
 }
 
-void
+static void
 mail_transaction_log_file_add_to_list(struct mail_transaction_log_file *file)
 {
 	struct mail_transaction_log *log = file->log;
 	struct mail_transaction_log_file **p;
+	struct mail_index_map *map = log->index->map;
 
-	if (log->index->map != NULL &&
-	    file->hdr.file_seq == log->index->map->hdr.log_file_seq &&
-	    log->index->map->hdr.log_file_int_offset != 0) {
+	if (map != NULL && file->hdr.file_seq == map->hdr.log_file_seq &&
+	    map->hdr.log_file_index_int_offset != 0) {
 		/* we can get a valid log offset from index file. initialize
 		   sync_offset from it so we don't have to read the whole log
 		   file from beginning. */
-		file->sync_offset = log->index->map->hdr.log_file_int_offset;
+		file->sync_offset = map->hdr.log_file_index_int_offset;
+		file->mailbox_sync_saved_offset =
+			map->hdr.log_file_mailbox_offset;
 	} else {
 		file->sync_offset = file->hdr.hdr_size;
 	}
@@ -133,7 +139,10 @@
 			      struct mail_transaction_log_header *hdr)
 {
 	struct mail_index *index = log->index;
-	unsigned int lock_id;
+	unsigned int lock_id = 0;
+
+	if (log->index->indexid == 0)
+		log->index->indexid = ioloop_time;
 
 	memset(hdr, 0, sizeof(*hdr));
 	hdr->major_version = MAIL_TRANSACTION_LOG_MAJOR_VERSION;
@@ -144,16 +153,17 @@
 
 	if (index->fd != -1) {
 		/* not creating index - make sure we have latest header */
-		if (mail_index_lock_shared(index, TRUE, &lock_id) < 0)
-			return -1;
-		if (mail_index_map(index, FALSE) <= 0) {
-			mail_index_unlock(index, lock_id);
+		if (mail_index_map(index, MAIL_INDEX_SYNC_HANDLER_HEAD,
+				   &lock_id) <= 0)
 			return -1;
-		}
 	}
-	hdr->prev_file_seq = index->hdr->log_file_seq;
-	hdr->prev_file_offset = index->hdr->log_file_int_offset;
-	hdr->file_seq = index->hdr->log_file_seq+1;
+	if (index->hdr != NULL) {
+		hdr->prev_file_seq = index->hdr->log_file_seq;
+		hdr->prev_file_offset = index->hdr->log_file_index_int_offset;
+		hdr->file_seq = index->hdr->log_file_seq + 1;
+	} else {
+		hdr->file_seq = 1;
+	}
 
 	if (index->fd != -1)
 		mail_index_unlock(index, lock_id);
@@ -170,11 +180,7 @@
 {
 	struct mail_transaction_log_file *file;
 
-	file = i_new(struct mail_transaction_log_file, 1);
-	file->log = log;
-	file->filepath = i_strdup("(in-memory transaction log file)");
-	file->fd = -1;
-
+	file = mail_transaction_log_file_alloc(log, MEMORY_LOG_NAME);
 	if (mail_transaction_log_init_hdr(log, &file->hdr) < 0) {
 		i_free(file);
 		return NULL;
@@ -300,12 +306,12 @@
 
 static int
 mail_transaction_log_file_read_hdr(struct mail_transaction_log_file *file,
-				   int head, bool ignore_estale)
+				   bool ignore_estale)
 {
         struct mail_transaction_log_file *f;
 	int ret;
 
-	i_assert(!MAIL_INDEX_IS_IN_MEMORY(file->log->index));
+	i_assert(!MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file));
 
 	ret = pread_full(file->fd, &file->hdr, sizeof(file->hdr), 0);
 	if (ret < 0) {
@@ -363,29 +369,13 @@
 
 	/* make sure we already don't have a file with the same sequence
 	   opened. it shouldn't happen unless the old log file was
-	   corrupted.
-
-	   If we're opening head log file, make sure the sequence is larger
-	   than any existing one. */
-	if (head) {
-		for (f = file->log->files; f != NULL; f = f->next) {
-			if (f->hdr.file_seq >= file->hdr.file_seq) {
-				mail_transaction_log_file_set_corrupted(file,
-					"invalid new transaction log sequence "
-					"(%u >= %u)",
-					f->hdr.file_seq, file->hdr.file_seq);
-				return 0;
-			}
-		}
-	} else {
-		for (f = file->log->files; f != NULL; f = f->next) {
-			if (f->hdr.file_seq == file->hdr.file_seq) {
-				mail_transaction_log_file_set_corrupted(file,
-					"old transaction log already opened "
-					"(%u == %u)",
-					f->hdr.file_seq, file->hdr.file_seq);
-				return 0;
-			}
+	   corrupted. */
+	for (f = file->log->files; f != NULL; f = f->next) {
+		if (f->hdr.file_seq == file->hdr.file_seq) {
+			mail_transaction_log_file_set_corrupted(file,
+				"old transaction log already opened (%u == %u)",
+				f->hdr.file_seq, file->hdr.file_seq);
+			return 0;
 		}
 	}
 
@@ -393,71 +383,112 @@
 }
 
 static int
+mail_transaction_log_file_stat(struct mail_transaction_log_file *file,
+			       bool ignore_estale)
+{
+	struct stat st;
+
+	if (fstat(file->fd, &st) < 0) {
+                if (errno != ESTALE || !ignore_estale) {
+			mail_index_file_set_syscall_error(file->log->index,
+				file->filepath, "fstat()");
+                }
+		return -1;
+	}
+
+	file->st_dev = st.st_dev;
+	file->st_ino = st.st_ino;
+	file->last_mtime = st.st_mtime;
+	file->last_size = st.st_size;
+	return 0;
+}
+
+static bool
+mail_transaction_log_file_is_dupe(struct mail_transaction_log_file *file)
+{
+	struct mail_transaction_log_file *tmp;
+
+	for (tmp = file->log->files; tmp != NULL; tmp = tmp->next) {
+		if (tmp->st_ino == file->st_ino &&
+		    CMP_DEV_T(tmp->st_dev, file->st_dev))
+			return TRUE;
+	}
+	return FALSE;
+}
+
+static int
 mail_transaction_log_file_create2(struct mail_transaction_log_file *file,
-				  bool lock, int new_fd,
-				  struct dotlock **dotlock,
-				  dev_t dev, ino_t ino, uoff_t file_size)
+				  int new_fd, struct dotlock **dotlock)
 {
 	struct mail_index *index = file->log->index;
-	struct mail_transaction_log_header hdr;
 	struct stat st;
 	const char *path2;
-	int old_fd, ret;
+	int fd, ret;
 	bool rename_existing;
 
-	i_assert(!lock || file->log->head->locked);
-
-	/* log creation is locked now - see if someone already created it */
-	if (lock) {
-		/* don't even bother checking the existing file, but rename it
-		   if it exists */
+	/* log creation is locked now - see if someone already created it.
+	   note that if we're rotating, we need to keep the log locked until
+	   the file has been rewritten. and because fcntl() locks are stupid,
+	   if we go and open()+close() the file and we had it already opened,
+	   its locks are lost. so we use stat() to check if the file has been
+	   recreated, although it almost never is. */
+	if (nfs_safe_stat(file->filepath, &st) < 0) {
+		if (errno != ENOENT) {
+			mail_index_file_set_syscall_error(index, file->filepath,
+							  "stat()");
+			return -1;
+		}
+		rename_existing = FALSE;
+	} else if (st.st_ino == file->st_ino &&
+		   CMP_DEV_T(st.st_dev, file->st_dev) &&
+		   /* inode/dev checks are enough when we're rotating the file,
+		      but not when we're replacing a broken log file */
+		   st.st_mtime == file->last_mtime &&
+		   (uoff_t)st.st_size == file->last_size) {
+		/* no-one else recreated the file */
 		rename_existing = TRUE;
-	} else if ((old_fd = nfs_safe_open(file->filepath, O_RDWR)) != -1) {
-		if ((ret = fstat(old_fd, &st)) < 0) {
-                        mail_index_file_set_syscall_error(index, file->filepath,
-                                                          "fstat()");
-		} else if (st.st_ino == ino && CMP_DEV_T(st.st_dev, dev) &&
-			   (uoff_t)st.st_size == file_size) {
-			/* same file, still broken */
+	} else {
+		/* recreated. use the file if its header is ok */
+		fd = nfs_safe_open(file->filepath, O_RDWR);
+		if (fd == -1) {
+			if (errno != ENOENT) {
+				mail_index_file_set_syscall_error(index,
+					file->filepath, "open()");
+				return -1;
+			}
 		} else {
-                        /* file changed, use the new file */
-			(void)file_dotlock_delete(dotlock);
-			file->fd = old_fd;
-			return 0;
+			file->fd = fd;
+			if (mail_transaction_log_file_read_hdr(file,
+							       FALSE) == 0) {
+				/* yes, it was ok */
+				(void)file_dotlock_delete(dotlock);
+				return 0;
+			}
+			file->fd = -1;
+			if (close(fd) < 0) {
+				mail_index_file_set_syscall_error(index,
+					file->filepath, "close()");
+			}
 		}
-
-		(void)close(old_fd);
-		old_fd = -1;
-
-                if (ret < 0) {
-                        /* fstat() failure, return after closing fd.. */
-                        return -1;
-                }
-		rename_existing = TRUE;
-	} else if (errno != ENOENT) {
-		mail_index_file_set_syscall_error(index, file->filepath,
-						  "open()");
-		return -1;
-	} else {
 		rename_existing = FALSE;
 	}
 
-	if (mail_transaction_log_init_hdr(file->log, &hdr) < 0)
+	if (mail_transaction_log_init_hdr(file->log, &file->hdr) < 0)
 		return -1;
 
-	if (write_full(new_fd, &hdr, sizeof(hdr)) < 0) {
+	if (write_full(new_fd, &file->hdr, sizeof(file->hdr)) < 0) {
 		mail_index_file_set_syscall_error(index, file->filepath,
 						  "write_full()");
 		return -1;
 	}
 
-	if (lock) {
-		file->fd = new_fd;
-		ret = mail_transaction_log_file_lock(file);
-		file->fd = -1;
-		if (ret < 0)
-			return -1;
-	}
+	file->fd = new_fd;
+	ret = mail_transaction_log_file_stat(file, FALSE);
+
+	/* if we return -1 the dotlock deletion code closes the fd */
+	file->fd = -1;
+	if (ret < 0)
+		return -1;
 
 	/* keep two log files */
 	if (rename_existing) {
@@ -487,33 +518,19 @@
 
 	/* success */
 	file->fd = new_fd;
+        mail_transaction_log_file_add_to_list(file);
 	return 0;
 }
 
-int mail_transaction_log_file_create(struct mail_transaction_log_file *file,
-				     bool lock, dev_t dev, ino_t ino,
-				     uoff_t file_size)
+int mail_transaction_log_file_create(struct mail_transaction_log_file *file)
 {
 	struct mail_index *index = file->log->index;
 	struct dotlock *dotlock;
-	struct stat st;
 	mode_t old_mask;
 	int fd;
 
 	i_assert(!MAIL_INDEX_IS_IN_MEMORY(index));
 
-	if (stat(index->dir, &st) < 0) {
-		if (ENOTFOUND(errno)) {
-			/* the whole index directory was deleted, which means
-			   the mailbox was deleted by another process.
-			   fail silently. */
-			mail_index_mark_corrupted(index);
-			return -1;
-		}
-		mail_index_file_set_syscall_error(index, index->dir, "stat()");
-		return -1;
-	}
-
 	/* With dotlocking we might already have path.lock created, so this
 	   filename has to be different. */
 	old_mask = umask(index->mode ^ 0666);
@@ -537,8 +554,7 @@
 
         /* either fd gets used or the dotlock gets deleted and returned fd
            is for the existing file */
-        if (mail_transaction_log_file_create2(file, lock, fd, &dotlock,
-					      dev, ino, file_size) < 0) {
+        if (mail_transaction_log_file_create2(file, fd, &dotlock) < 0) {
 		if (dotlock != NULL)
 			(void)file_dotlock_delete(&dotlock);
 		return -1;
@@ -546,142 +562,124 @@
 	return 0;
 }
 
-int mail_transaction_log_file_fd_open(struct mail_transaction_log_file *file,
-				      bool head, bool ignore_estale)
+int mail_transaction_log_file_open(struct mail_transaction_log_file *file,
+				   bool check_existing)
 {
-	struct stat st;
-
-	i_assert(!MAIL_INDEX_IS_IN_MEMORY(file->log->index));
-
-	if (fstat(file->fd, &st) < 0) {
-                if (errno != ESTALE || !ignore_estale) {
-			mail_index_file_set_syscall_error(file->log->index,
-							  file->filepath,
-							  "fstat()");
-                }
-		return -1;
-	}
-
-	file->st_dev = st.st_dev;
-	file->st_ino = st.st_ino;
-	file->last_mtime = st.st_mtime;
-	file->last_size = st.st_size;
-
-	return mail_transaction_log_file_read_hdr(file, head, ignore_estale);
-}
-
-int mail_transaction_log_file_fd_open_or_create(struct mail_transaction_log_file
-						*file, bool try_retry)
-{
+        unsigned int i;
+	bool ignore_estale;
 	int ret;
 
-	ret = mail_transaction_log_file_fd_open(file, TRUE, !try_retry);
-	if (ret == 0) {
-		/* corrupted header, recreate the file */
-		if (mail_transaction_log_file_create(file, FALSE,
-						     file->st_dev,
-						     file->st_ino,
-						     file->last_size) < 0)
-			ret = -1;
-		else {
-			ret = mail_transaction_log_file_fd_open(file, TRUE,
-								FALSE);
-			if (ret == 0) {
-				/* newly created transaction log corrupted */
-				return -1;
-			}
-		}
-	}
-	if (ret < 0)
-		return errno == ENOENT && try_retry ? 0 : -1;
-
-        mail_transaction_log_file_add_to_list(file);
-	return 1;
-}
+        for (i = 0;; i++) {
+                file->fd = nfs_safe_open(file->filepath, O_RDWR);
+                if (file->fd == -1) {
+			if (errno == ENOENT)
+				return 0;
 
-struct mail_transaction_log_file *
-mail_transaction_log_file_open_or_create(struct mail_transaction_log *log,
-					 const char *path)
-{
-        struct mail_transaction_log_file *file;
-        unsigned int i;
-	int ret;
-
-	if (MAIL_INDEX_IS_IN_MEMORY(log->index))
-		return mail_transaction_log_file_alloc_in_memory(log);
-
-	file = mail_transaction_log_file_alloc(log, path);
-
-        for (i = 0; ; i++) {
-                file->fd = nfs_safe_open(path, O_RDWR);
-                if (file->fd == -1) {
-                        if (errno != ENOENT) {
-				mail_index_file_set_syscall_error(log->index,
-                                                                  path,
-                                                                  "open()");
-				break;
-                        }
-
-                        /* doesn't exist, try creating it */
-			if (mail_transaction_log_file_create(file, FALSE,
-							     0, 0, 0) < 0)
-				break;
+			mail_index_file_set_syscall_error(file->log->index,
+				file->filepath, "open()");
+			return -1;
                 }
 
-		ret = mail_transaction_log_file_fd_open_or_create(file,
-                		i == MAIL_INDEX_ESTALE_RETRY_COUNT);
-		if (ret > 0)
-			return file;
-		if (ret < 0)
-			break;
-
-                /* ESTALE - retry */
-	}
-
-	mail_transaction_log_file_free(file);
-	return NULL;
-}
-
-struct mail_transaction_log_file *
-mail_transaction_log_file_open(struct mail_transaction_log *log,
-			       const char *path)
-{
-	struct mail_transaction_log_file *file;
-        unsigned int i;
-        int ret;
-
-	file = mail_transaction_log_file_alloc(log, path);
-        for (i = 0;; i++) {
-                file->fd = nfs_safe_open(path, O_RDWR);
-                if (file->fd == -1) {
-                        mail_index_file_set_syscall_error(log->index, path,
-                                                          "open()");
-			break;
-                }
-
-		ret = mail_transaction_log_file_fd_open(file,
-                		TRUE, i < MAIL_INDEX_ESTALE_RETRY_COUNT);
+		ignore_estale = i < MAIL_INDEX_ESTALE_RETRY_COUNT;
+		if (mail_transaction_log_file_stat(file, ignore_estale) < 0)
+			ret = -1;
+		else if (check_existing &&
+			 mail_transaction_log_file_is_dupe(file))
+			return 0;
+		else {
+			ret = mail_transaction_log_file_read_hdr(file,
+								 ignore_estale);
+		}
 		if (ret > 0) {
 			/* success */
-			mail_transaction_log_file_add_to_list(file);
-			return file;
+			break;
 		}
 
 		if (ret == 0) {
 			/* corrupted */
-			break;
+			if (unlink(file->filepath) < 0 && errno != ENOENT) {
+				mail_index_set_error(file->log->index,
+						     "unlink(%s) failed: %m",
+						     file->filepath);
+			}
+			return 0;
 		}
 		if (errno != ESTALE ||
 		    i == MAIL_INDEX_ESTALE_RETRY_COUNT) {
 			/* syscall error */
-			break;
+			return -1;
 		}
 
 		/* ESTALE - try again */
         }
 
-	mail_transaction_log_file_free(file);
-	return NULL;
+	mail_transaction_log_file_add_to_list(file);
+	return 1;
+}
+
+static int
+log_file_track_mailbox_sync_offset_hdr(struct mail_transaction_log_file *file,
+				       const void *data, unsigned int size)
+{
+	const struct mail_transaction_header_update *u = data;
+	const struct mail_index_header *ihdr;
+	const unsigned int offset_pos =
+		offsetof(struct mail_index_header, log_file_mailbox_offset);
+	const unsigned int offset_size = sizeof(ihdr->log_file_mailbox_offset);
+	uint32_t sync_offset;
+
+	i_assert(offset_size == sizeof(sync_offset));
+
+	if (size < sizeof(*u) || size < sizeof(*u) + u->size) {
+		mail_transaction_log_file_set_corrupted(file,
+			"header update extends beyond record size");
+		return -1;
+	}
+
+	if (u->offset <= offset_pos &&
+	    u->offset + u->size >= offset_pos + offset_size) {
+		memcpy(&sync_offset,
+		       CONST_PTR_OFFSET(u + 1, offset_pos - u->offset),
+		       sizeof(sync_offset));
+
+		if (sync_offset < file->mailbox_sync_saved_offset) {
+			mail_transaction_log_file_set_corrupted(file,
+				"mailbox_sync_offset shrinked");
+			return -1;
+		}
+		file->mailbox_sync_saved_offset = sync_offset;
+		if (sync_offset > file->mailbox_sync_max_offset)
+			file->mailbox_sync_max_offset = sync_offset;
+		return 1;
+	}
+	return 0;
+}
+
+static int
+log_file_track_mailbox_sync_offset(struct mail_transaction_log_file *file,
+				   const struct mail_transaction_header *hdr,
+				   unsigned int trans_size)
+{
+	int ret;
+
+	i_assert((hdr->type & MAIL_TRANSACTION_EXTERNAL) != 0);
+
+	if ((hdr->type & MAIL_TRANSACTION_HEADER_UPDATE) != 0) {
+		/* see if this updates mailbox_sync_offset */
+		ret = log_file_track_mailbox_sync_offset_hdr(file, hdr + 1,
+							     trans_size -
+							     sizeof(*hdr));
+		if (ret != 0)
+			return ret < 0 ? -1 : 0;
+	}
+
+	if (file->mailbox_sync_max_offset == file->sync_offset) {
+		/* external transactions aren't synced to mailbox. we can
+		   update mailbox sync offset to skip this transaction to
+		   avoid re-reading it at the next sync. */
+		file->mailbox_sync_max_offset += trans_size;
+	}
+	return 0;
 }
 
 static int
@@ -690,7 +688,7 @@
         const struct mail_transaction_header *hdr;
 	const void *data;
 	size_t size, avail;
-	uint32_t hdr_size = 0;
+	uint32_t trans_size = 0;
 
 	data = buffer_get_data(file->buffer, &size);
 
@@ -700,20 +698,27 @@
 	while (file->sync_offset - file->buffer_offset + sizeof(*hdr) <= size) {
 		hdr = CONST_PTR_OFFSET(data, file->sync_offset -
 				       file->buffer_offset);
-		hdr_size = mail_index_offset_to_uint32(hdr->size);
-		if (hdr_size == 0) {
+		trans_size = mail_index_offset_to_uint32(hdr->size);
+		if (trans_size == 0) {
 			/* unfinished */
 			return 0;
 		}
-		if (hdr_size < sizeof(*hdr)) {
+		if (trans_size < sizeof(*hdr)) {
 			mail_transaction_log_file_set_corrupted(file,
-				"hdr.size too small (%u)", hdr_size);
+				"hdr.size too small (%u)", trans_size);
 			return -1;
 		}
 
-		if (file->sync_offset - file->buffer_offset + hdr_size > size)
+		if (file->sync_offset - file->buffer_offset + trans_size > size)
 			break;
-		file->sync_offset += hdr_size;
+
+		/* transaction has been fully written */
+		if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) != 0) {
+			if (log_file_track_mailbox_sync_offset(file, hdr,
+							       trans_size) < 0)
+				return -1;
+		}
+		file->sync_offset += trans_size;
 	}
 
 	avail = file->sync_offset - file->buffer_offset;
@@ -723,15 +728,16 @@
 		   memory, it may be just that the memory was updated
 		   after we checked the file size. */
 		if (file->locked || file->mmap_base == NULL) {
-			if (hdr_size != 0) {
+			if (trans_size != 0) {
 				mail_transaction_log_file_set_corrupted(file,
-					"hdr.size too large (%u)", hdr_size);
+					"hdr.size too large (%u)", trans_size);
 			} else {
 				mail_transaction_log_file_set_corrupted(file,
 					"Unexpected garbage at EOF");
 			}
 			return -1;
 		}
+		// FIXME: here we probably want to flush NFS data cache
 	}
 	return 0;
 }
@@ -839,6 +845,13 @@
 	if (MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file))
 		return 1;
 
+	if (index->log_locked && file == file->log->head &&
+	    end_offset == (uoff_t)-1) {
+		/* we're not interested of going further than sync_offset */
+		i_assert(start_offset <= file->sync_offset);
+		end_offset = file->sync_offset;
+	}
+
 	if (file->buffer != NULL && file->buffer_offset <= start_offset) {
 		/* see if we already have it */
 		size = buffer_get_used_size(file->buffer);
--- a/src/lib-index/mail-transaction-log-private.h	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-transaction-log-private.h	Mon Jun 11 14:50:10 2007 +0300
@@ -4,13 +4,23 @@
 #include "file-dotlock.h"
 #include "mail-transaction-log.h"
 
+#if 0 // FIXME
 /* Rotate when log is older than ROTATE_TIME and larger than MIN_SIZE */
-#define MAIL_TRANSACTION_LOG_ROTATE_MIN_SIZE (1024*128)
+#define MAIL_TRANSACTION_LOG_ROTATE_MIN_SIZE (1024*4)
+/* If log is larger than MAX_SIZE, rotate regardless of the time */
+#define MAIL_TRANSACTION_LOG_ROTATE_MAX_SIZE (1024*16)
+#define MAIL_TRANSACTION_LOG_ROTATE_TIME (30)
+
+#define MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file) ((file)->fd == -1)
+#else
+/* Rotate when log is older than ROTATE_TIME and larger than MIN_SIZE */
+#define MAIL_TRANSACTION_LOG_ROTATE_MIN_SIZE (1024*256)
 /* If log is larger than MAX_SIZE, rotate regardless of the time */
 #define MAIL_TRANSACTION_LOG_ROTATE_MAX_SIZE (1024*1024)
 #define MAIL_TRANSACTION_LOG_ROTATE_TIME (60*5)
 
 #define MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file) ((file)->fd == -1)
+#endif
 
 struct mail_transaction_log_file {
 	struct mail_transaction_log *log;
@@ -28,14 +38,20 @@
 	time_t last_mtime;
 	uoff_t last_size;
 
+	struct mail_transaction_log_header hdr;
 	buffer_t *buffer;
 	uoff_t buffer_offset;
 	void *mmap_base;
 	size_t mmap_size;
 
-	struct mail_transaction_log_header hdr;
+	/* points to the next uncommitted transaction. usually same as EOF. */
 	uoff_t sync_offset;
-	uint32_t first_append_size;
+	/* saved_offset is the offset that was last written to transaction log.
+	   max_offset is what should be written to the log the next time a
+	   transaction is written. transaction log handling may update
+	   max_offset automatically by making it skip external transactions
+	   after the last saved offset (to avoid re-reading them unneededly). */
+	uoff_t mailbox_sync_saved_offset, mailbox_sync_max_offset;
 
 	struct file_lock *file_lock;
 
@@ -45,10 +61,14 @@
 struct mail_transaction_log {
 	struct mail_index *index;
         struct mail_transaction_log_view *views;
+
 	/* files is a linked list of all the opened log files. the list is
 	   sorted by the log file sequence, so that transaction views can use
 	   them easily. head contains a pointer to the newest log file. */
 	struct mail_transaction_log_file *files, *head;
+	/* open_file is used temporarily while opening the log file.
+	   if _open() failed, it's left there for _create(). */
+	struct mail_transaction_log_file *open_file;
 
 	unsigned int dotlock_count;
         struct dotlock_settings dotlock_settings, new_dotlock_settings;
@@ -61,33 +81,20 @@
 	__attr_format__(2, 3);
 
 struct mail_transaction_log_file *
+mail_transaction_log_file_alloc_in_memory(struct mail_transaction_log *log);
+struct mail_transaction_log_file *
 mail_transaction_log_file_alloc(struct mail_transaction_log *log,
 				const char *path);
-struct mail_transaction_log_file *
-mail_transaction_log_file_alloc_in_memory(struct mail_transaction_log *log);
-void mail_transaction_log_file_free(struct mail_transaction_log_file *file);
+void mail_transaction_log_file_free(struct mail_transaction_log_file **file);
 
-struct mail_transaction_log_file *
-mail_transaction_log_file_open(struct mail_transaction_log *log,
-			       const char *path);
-struct mail_transaction_log_file *
-mail_transaction_log_file_open_or_create(struct mail_transaction_log *log,
-					 const char *path);
-int mail_transaction_log_file_create(struct mail_transaction_log_file *file,
-				     bool lock, dev_t dev, ino_t ino,
-				     uoff_t file_size);
-
-int mail_transaction_log_file_fd_open(struct mail_transaction_log_file *file,
-				      bool head, bool ignore_estale);
-int mail_transaction_log_file_fd_open_or_create(struct mail_transaction_log_file
-						*file, bool try_retry);
+int mail_transaction_log_file_open(struct mail_transaction_log_file *file,
+				   bool check_existing);
+int mail_transaction_log_file_create(struct mail_transaction_log_file *file);
 int mail_transaction_log_file_read(struct mail_transaction_log_file *file,
 				   uoff_t offset);
 int mail_transaction_log_file_lock(struct mail_transaction_log_file *file);
-void
-mail_transaction_log_file_add_to_list(struct mail_transaction_log_file *file);
 
-int mail_transaction_log_file_find(struct mail_transaction_log *log,
+int mail_transaction_log_find_file(struct mail_transaction_log *log,
 				   uint32_t file_seq,
 				   struct mail_transaction_log_file **file_r);
 
@@ -96,7 +103,8 @@
 
 void mail_transaction_logs_clean(struct mail_transaction_log *log);
 
-int mail_transaction_log_rotate(struct mail_transaction_log *log, bool lock);
+bool mail_transaction_log_want_rotate(struct mail_transaction_log *log);
+int mail_transaction_log_rotate(struct mail_transaction_log *log);
 int mail_transaction_log_lock_head(struct mail_transaction_log *log);
 void mail_transaction_log_file_unlock(struct mail_transaction_log_file *file);
 
--- a/src/lib-index/mail-transaction-log-view.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-transaction-log-view.c	Mon Jun 11 14:50:10 2007 +0300
@@ -111,8 +111,14 @@
 	}
 
 	if (min_file_seq == 0) {
-		/* new index, transaction file not synced yet */
-		min_file_seq = 1;
+		/* index file doesn't exist yet. this transaction log should
+		   start from the beginning */
+		if (view->log->files->hdr.prev_file_seq != 0) {
+			/* but it doesn't */
+			return 0;
+		}
+
+		min_file_seq = view->log->files->hdr.file_seq;
 		min_file_offset = 0;
 
 		if (max_file_seq == 0) {
@@ -135,7 +141,7 @@
 	}
 
 	/* find the oldest log file first. */
-	ret = mail_transaction_log_file_find(view->log, min_file_seq, &file);
+	ret = mail_transaction_log_find_file(view->log, min_file_seq, &file);
 	if (ret <= 0)
 		return ret;
 
@@ -169,7 +175,7 @@
 		file = file->next;
 		if (file == NULL || file->hdr.file_seq != seq) {
 			/* see if we could find the missing file */
-			ret = mail_transaction_log_file_find(view->log,
+			ret = mail_transaction_log_find_file(view->log,
 							     seq, &file);
 			if (ret <= 0) {
 				if (ret < 0)
--- a/src/lib-index/mail-transaction-log.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-transaction-log.c	Mon Jun 11 14:50:10 2007 +0300
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2004 Timo Sirainen */
+/* Copyright (C) 2003-2007 Timo Sirainen */
 
 #include "lib.h"
 #include "ioloop.h"
@@ -6,14 +6,9 @@
 #include "file-dotlock.h"
 #include "nfs-workarounds.h"
 #include "close-keep-errno.h"
-#include "read-full.h"
-#include "write-full.h"
 #include "mmap-util.h"
 #include "mail-index-private.h"
-#include "mail-index-view-private.h"
 #include "mail-transaction-log-private.h"
-#include "mail-transaction-util.h"
-#include "mail-index-transaction-private.h"
 
 #include <stddef.h>
 #include <stdio.h>
@@ -26,49 +21,20 @@
 #define MAIL_TRANSACTION_LOG_SUFFIX ".log"
 #define LOG_NEW_DOTLOCK_SUFFIX ".newlock"
 
-#define INDEX_HAS_MISSING_LOGS(index, file) \
-	!(((file)->hdr.file_seq == (index)->hdr->log_file_seq && \
-	   (index)->hdr->log_file_int_offset >= (file)->hdr.hdr_size) || \
-	  ((file)->hdr.prev_file_seq == (index)->hdr->log_file_seq && \
-	   (file)->hdr.prev_file_offset == (index)->hdr->log_file_int_offset))
-
-static int mail_transaction_log_check_file_seq(struct mail_transaction_log *log)
+static void
+mail_transaction_log_set_head(struct mail_transaction_log *log,
+			      struct mail_transaction_log_file *file)
 {
-	struct mail_index *index = log->index;
-	struct mail_transaction_log_file *file;
-	unsigned int lock_id;
-	int ret;
+	i_assert(log->head != file);
 
-	if (mail_transaction_log_lock_head(log) < 0)
-		return -1;
-
-	file = log->head;
 	file->refcount++;
-
-	ret = mail_index_lock_shared(index, TRUE, &lock_id);
-	if (ret == 0) {
-		ret = mail_index_map(index, FALSE);
-		if (ret <= 0)
-			ret = -1;
-		else if (INDEX_HAS_MISSING_LOGS(index, file)) {
-			/* broken - fix it by creating a new log file */
-			ret = mail_transaction_log_rotate(log, FALSE);
-		}
-	}
-
-	if (--file->refcount == 0)
-		mail_transaction_logs_clean(log);
-	else
-		mail_transaction_log_file_unlock(file);
-	return ret;
+	log->head = file;
 }
 
-static struct mail_transaction_log *
-mail_transaction_log_open_int(struct mail_index *index, bool create)
+struct mail_transaction_log *
+mail_transaction_log_alloc(struct mail_index *index)
 {
 	struct mail_transaction_log *log;
-	struct mail_transaction_log_file *file;
-	const char *path;
 
 	log = i_new(struct mail_transaction_log, 1);
 	log->index = index;
@@ -80,78 +46,87 @@
 	log->new_dotlock_settings = log->dotlock_settings;
 	log->new_dotlock_settings.lock_suffix = LOG_NEW_DOTLOCK_SUFFIX;
 
-	path = t_strconcat(index->filepath,
-			   MAIL_TRANSACTION_LOG_SUFFIX, NULL);
-	if (MAIL_INDEX_IS_IN_MEMORY(index))
-		file = mail_transaction_log_file_alloc_in_memory(log);
-	else if (create) {
-		struct stat st;
-
-		file = mail_transaction_log_file_alloc(log, path);
-		if (stat(path, &st) < 0)
-			memset(&st, 0, sizeof(st));
-		if (mail_transaction_log_file_create(file, FALSE,
-						     st.st_dev, st.st_ino,
-						     st.st_size) < 0 ||
-		    mail_transaction_log_file_fd_open_or_create(file,
-								FALSE) < 0) {
-			mail_transaction_log_file_free(file);
-			file = NULL;
-		}
-	} else {
-		file = mail_transaction_log_file_open_or_create(log, path);
-	}
-
-	if (file == NULL) {
-		/* fallback to in-memory indexes */
-		if (mail_index_move_to_memory(index) < 0) {
-			mail_transaction_log_close(&log);
-			return NULL;
-		}
-		file = mail_transaction_log_file_open_or_create(log, path);
-		i_assert(file != NULL);
-	}
-	file->refcount++;
-	log->head = file;
-	i_assert(log->files != NULL);
-
-	if (index->fd != -1 &&
-	    INDEX_HAS_MISSING_LOGS(index, log->head)) {
-		/* head log file isn't same as head index file -
-		   shouldn't happen except in race conditions.
-		   lock them and check again */
-		if (mail_transaction_log_check_file_seq(log) < 0) {
-			mail_transaction_log_close(&log);
-			return NULL;
-		}
-	}
 	return log;
 }
 
-struct mail_transaction_log *
-mail_transaction_log_open_or_create(struct mail_index *index)
+int mail_transaction_log_open(struct mail_transaction_log *log)
 {
-	return mail_transaction_log_open_int(index, FALSE);
+	struct mail_transaction_log_file *file;
+	const char *path;
+	int ret;
+
+	if (log->open_file != NULL)
+		mail_transaction_log_file_free(&log->open_file);
+
+	if (MAIL_INDEX_IS_IN_MEMORY(log->index))
+		return 0;
+
+	path = t_strconcat(log->index->filepath,
+			   MAIL_TRANSACTION_LOG_SUFFIX, NULL);
+
+	file = mail_transaction_log_file_alloc(log, path);
+	if ((ret = mail_transaction_log_file_open(file, FALSE)) <= 0) {
+		/* leave the file for _create() */
+		log->open_file = file;
+		return ret;
+	}
+
+	mail_transaction_log_set_head(log, file);
+	return 1;
 }
 
-struct mail_transaction_log *
-mail_transaction_log_create(struct mail_index *index)
+int mail_transaction_log_create(struct mail_transaction_log *log)
 {
-	return mail_transaction_log_open_int(index, TRUE);
+	struct mail_transaction_log_file *file;
+	const char *path;
+
+	if (MAIL_INDEX_IS_IN_MEMORY(log->index)) {
+		file = mail_transaction_log_file_alloc_in_memory(log);
+		mail_transaction_log_set_head(log, file);
+		return 0;
+	}
+
+	path = t_strconcat(log->index->filepath,
+			   MAIL_TRANSACTION_LOG_SUFFIX, NULL);
+
+	file = mail_transaction_log_file_alloc(log, path);
+
+	if (log->open_file != NULL) {
+		/* remember what file we tried to open. if someone else created
+		   a new file, use it instead of recreating it */
+		file->st_ino = log->open_file->st_ino;
+		file->st_dev = log->open_file->st_dev;
+		file->last_size = log->open_file->last_size;
+		file->last_mtime = log->open_file->last_mtime;
+		mail_transaction_log_file_free(&log->open_file);
+	}
+
+	if (mail_transaction_log_file_create(file) < 0)
+		mail_transaction_log_file_free(&file);
+
+	mail_transaction_log_set_head(log, file);
+	return 1;
 }
 
-void mail_transaction_log_close(struct mail_transaction_log **_log)
+void mail_transaction_log_close(struct mail_transaction_log *log)
 {
-	struct mail_transaction_log *log = *_log;
-
 	mail_transaction_log_views_close(log);
 
+	if (log->open_file != NULL)
+		mail_transaction_log_file_free(&log->open_file);
 	if (log->head != NULL)
 		log->head->refcount--;
 	mail_transaction_logs_clean(log);
 	i_assert(log->files == NULL);
+}
+
+void mail_transaction_log_free(struct mail_transaction_log **_log)
+{
+	struct mail_transaction_log *log = *_log;
 
 	*_log = NULL;
+
+	mail_transaction_log_close(log);
 	log->index->log = NULL;
 	i_free(log);
 }
@@ -203,64 +178,62 @@
 
 		i_assert(file->refcount >= 0);
 		if (file->refcount == 0)
-			mail_transaction_log_file_free(file);
+			mail_transaction_log_file_free(&file);
 	}
 }
 
-int mail_transaction_log_rotate(struct mail_transaction_log *log, bool lock)
+#define LOG_WANT_ROTATE(file) \
+	(((file)->sync_offset > MAIL_TRANSACTION_LOG_ROTATE_MIN_SIZE && \
+	  (time_t)(file)->hdr.create_stamp < \
+	   ioloop_time - MAIL_TRANSACTION_LOG_ROTATE_TIME) || \
+	 ((file)->sync_offset > MAIL_TRANSACTION_LOG_ROTATE_MAX_SIZE))
+
+bool mail_transaction_log_want_rotate(struct mail_transaction_log *log)
+{
+	return LOG_WANT_ROTATE(log->head);
+}
+
+int mail_transaction_log_rotate(struct mail_transaction_log *log)
 {
 	struct mail_transaction_log_file *file;
 	const char *path = log->head->filepath;
-        struct stat st;
-	int ret;
+	struct stat st;
 
 	i_assert(log->head->locked);
 
-	if (MAIL_INDEX_IS_IN_MEMORY(log->index)) {
+	if (MAIL_INDEX_IS_IN_MEMORY(log->index))
 		file = mail_transaction_log_file_alloc_in_memory(log);
-		if (lock)
-			file->locked = TRUE;
-	} else {
+	else {
                 /* we're locked, we shouldn't need to worry about ESTALE
                    problems in here. */
 		if (fstat(log->head->fd, &st) < 0) {
-			mail_index_file_set_syscall_error(log->index, path,
-							  "fstat()");
+			mail_index_file_set_syscall_error(log->index,
+				file->filepath, "fstat()");
 			return -1;
 		}
 
 		file = mail_transaction_log_file_alloc(log, path);
-		if (mail_transaction_log_file_create(file, lock, st.st_dev,
-						     st.st_ino,
-						     st.st_size) < 0) {
-			mail_transaction_log_file_free(file);
-			return -1;
-		}
 
-                ret = mail_transaction_log_file_fd_open_or_create(file, FALSE);
-		if (ret <= 0) {
-			i_assert(ret != 0);
-			mail_transaction_log_file_free(file);
+		file->st_dev = st.st_dev;
+		file->st_ino = st.st_ino;
+		file->last_mtime = st.st_mtime;
+		file->last_size = st.st_size;
+
+		if (mail_transaction_log_file_create(file) < 0) {
+			mail_transaction_log_file_free(&file);
 			return -1;
 		}
 	}
 
-	i_assert(file->locked == lock);
-
 	if (--log->head->refcount == 0)
 		mail_transaction_logs_clean(log);
 	else
 		mail_transaction_log_file_unlock(log->head);
-
-	i_assert(log->head != file);
-	i_assert(log->files != NULL);
-	log->head = file;
-	log->head->refcount++;
+	mail_transaction_log_set_head(log, file);
 	return 0;
 }
 
-static int mail_transaction_log_refresh(struct mail_transaction_log *log,
-					bool create_if_needed)
+static int mail_transaction_log_refresh(struct mail_transaction_log *log)
 {
         struct mail_transaction_log_file *file;
 	struct stat st;
@@ -279,7 +252,7 @@
 							  "stat()");
 			return -1;
 		}
-		/* log was deleted. just reopen/recreate it. */
+		return -1;
 	} else {
 		if (log->head->st_ino == st.st_ino &&
 		    CMP_DEV_T(log->head->st_dev, st.st_dev)) {
@@ -288,38 +261,59 @@
 		}
 	}
 
-	file = create_if_needed ?
-		mail_transaction_log_file_open_or_create(log, path) :
-		mail_transaction_log_file_open(log, path);
-	if (file == NULL)
+	file = mail_transaction_log_file_alloc(log, path);
+	if (mail_transaction_log_file_open(file, FALSE) <= 0) {
+		mail_transaction_log_file_free(&file);
 		return -1;
+	}
 
 	i_assert(!file->locked);
 
 	if (--log->head->refcount == 0)
 		mail_transaction_logs_clean(log);
-
-	i_assert(log->files != NULL);
-	log->head = file;
-	log->head->refcount++;
+	mail_transaction_log_set_head(log, file);
 	return 0;
 }
 
-int mail_transaction_log_file_find(struct mail_transaction_log *log,
+void mail_transaction_log_get_mailbox_sync_pos(struct mail_transaction_log *log,
+					       uint32_t *file_seq_r,
+					       uoff_t *file_offset_r)
+{
+	*file_seq_r = log->head->hdr.file_seq;
+	*file_offset_r = log->head->mailbox_sync_max_offset;
+}
+
+void mail_transaction_log_set_mailbox_sync_pos(struct mail_transaction_log *log,
+					       uint32_t file_seq,
+					       uoff_t file_offset)
+{
+	i_assert(file_seq == log->head->hdr.file_seq);
+	i_assert(file_offset >= log->head->mailbox_sync_saved_offset);
+
+	if (file_offset >= log->head->mailbox_sync_max_offset)
+		log->head->mailbox_sync_max_offset = file_offset;
+}
+
+int mail_transaction_log_find_file(struct mail_transaction_log *log,
 				   uint32_t file_seq,
 				   struct mail_transaction_log_file **file_r)
 {
 	struct mail_transaction_log_file *file;
-	struct stat st;
 	const char *path;
-	int ret, fd;
+	int ret;
 
 	if (file_seq > log->head->hdr.file_seq) {
-		/* don't try to recreate log file if it gets lost. we're
-		   already in trouble and with mmap_disable the creation
-		   could cause a recursive mail_index_map() call */
-		if (mail_transaction_log_refresh(log, FALSE) < 0)
+		/* see if the .log file has been recreated */
+		if (log->head->locked) {
+			/* transaction log is locked. there's no way a newer
+			   file exists. */
+			return 0;
+		}
+
+		if (mail_transaction_log_refresh(log) < 0)
 			return -1;
+		if (file_seq > log->head->hdr.file_seq)
+			return 0;
 	}
 
 	for (file = log->files; file != NULL; file = file->next) {
@@ -335,63 +329,9 @@
 	/* see if we have it in log.2 file */
 	path = t_strconcat(log->index->filepath,
 			   MAIL_TRANSACTION_LOG_SUFFIX".2", NULL);
-	fd = nfs_safe_open(path, O_RDWR);
-	if (fd == -1) {
-		if (errno == ENOENT)
-			return 0;
-
-		mail_index_file_set_syscall_error(log->index, path, "open()");
-		return -1;
-	}
-
-	if (fstat(fd, &st) < 0) {
-		close_keep_errno(fd);
-                if (errno == ESTALE) {
-                        /* treat as "doesn't exist" */
-                        return 0;
-                }
-                mail_index_file_set_syscall_error(log->index, path, "fstat()");
-		return -1;
-	}
-
-	/* see if we have it already opened */
-	for (file = log->files; file != NULL; file = file->next) {
-		if (file->st_ino == st.st_ino &&
-		    CMP_DEV_T(file->st_dev, st.st_dev)) {
-			if (close(fd) < 0)
-				i_error("close() failed: %m");
-			return 0;
-		}
-	}
-
-
 	file = mail_transaction_log_file_alloc(log, path);
-	file->fd = fd;
-
-	ret = mail_transaction_log_file_fd_open(file, FALSE, TRUE);
-	if (ret <= 0) {
-		bool stale = errno == ESTALE;
-
-		if (ret == 0) {
-			/* corrupted, delete it */
-			if (unlink(file->filepath) < 0 && errno != ENOENT) {
-				i_error("unlink(%s) failed: %m",
-					file->filepath);
-			}
-			mail_transaction_log_file_free(file);
-			return 0;
-                }
-		mail_transaction_log_file_free(file);
-
-		if (stale) {
-                        /* treat as "doesn't exist" */
-                        return 0;
-                }
-		return -1;
-	}
-
-	/* got it */
-	mail_transaction_log_file_add_to_list(file);
+	if ((ret = mail_transaction_log_file_open(file, TRUE)) <= 0)
+		return ret;
 
 	/* but is it what we expected? */
 	if (file->hdr.file_seq != file_seq)
@@ -420,7 +360,7 @@
 			return -1;
 
 		file->refcount++;
-		ret = mail_transaction_log_refresh(log, TRUE);
+		ret = mail_transaction_log_refresh(log);
 		if (--file->refcount == 0) {
 			mail_transaction_logs_clean(log);
 			file = NULL;
--- a/src/lib-index/mail-transaction-log.h	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/lib-index/mail-transaction-log.h	Mon Jun 11 14:50:10 2007 +0300
@@ -106,12 +106,31 @@
 };
 
 struct mail_transaction_log *
-mail_transaction_log_open_or_create(struct mail_index *index);
-struct mail_transaction_log *
-mail_transaction_log_create(struct mail_index *index);
-void mail_transaction_log_close(struct mail_transaction_log **log);
+mail_transaction_log_alloc(struct mail_index *index);
+void mail_transaction_log_free(struct mail_transaction_log **log);
+
+/* Open the transaction log. Returns 1 if ok, 0 if file doesn't exist or it's
+   is corrupted, -1 if there was some I/O error. */
+int mail_transaction_log_open(struct mail_transaction_log *log);
+/* Create, or recreate, the transaction log. Returns 0 if ok, -1 if error. */
+int mail_transaction_log_create(struct mail_transaction_log *log);
+/* Close all the open transactions log files. */
+void mail_transaction_log_close(struct mail_transaction_log *log);
 
-int mail_transaction_log_move_to_memory(struct mail_transaction_log *log);
+/* Returns the file seq/offset where the mailbox is currently synced at.
+   Since the log is rotated only when mailbox is fully synced, the sequence
+   points always to the latest file. This function doesn't actually find the
+   latest sync position, so you'll need to use eg. log_view_set() before
+   calling this. */
+void mail_transaction_log_get_mailbox_sync_pos(struct mail_transaction_log *log,
+					       uint32_t *file_seq_r,
+					       uoff_t *file_offset_r);
+/* Set the current mailbox sync position. file_seq must always be the latest
+   log file's sequence. The offset written automatically to the log when
+   other transactions are being written. */
+void mail_transaction_log_set_mailbox_sync_pos(struct mail_transaction_log *log,
+					       uint32_t file_seq,
+					       uoff_t file_offset);
 
 struct mail_transaction_log_view *
 mail_transaction_log_view_open(struct mail_transaction_log *log);
@@ -172,4 +191,8 @@
 bool mail_transaction_log_is_head_prev(struct mail_transaction_log *log,
 				       uint32_t file_seq, uoff_t file_offset);
 
+/* Move currently opened log files to memory (called by
+   mail_index_move_to_memory()) */
+int mail_transaction_log_move_to_memory(struct mail_transaction_log *log);
+
 #endif
--- a/src/util/idxview.c	Mon Jun 11 06:28:07 2007 +0300
+++ b/src/util/idxview.c	Mon Jun 11 14:50:10 2007 +0300
@@ -69,8 +69,9 @@
 	printf("first unseen uid lowwater = %u\n", hdr.first_unseen_uid_lowwater);
 	printf("first deleted uid lowwater = %u\n", hdr.first_deleted_uid_lowwater);
 	printf("log file seq = %u\n", hdr.log_file_seq);
-	printf("log file int offset = %u\n", hdr.log_file_int_offset);
-	printf("log file ext offset = %u\n", hdr.log_file_ext_offset);
+	printf("log file index int offset = %u\n", hdr.log_file_index_int_offset);
+	printf("log file index ext offset = %u\n", hdr.log_file_index_ext_offset);
+	printf("log file mailbox offset = %u\n", hdr.log_file_mailbox_offset);
 	printf("sync size = %llu\n", (unsigned long long)hdr.sync_size);
 	printf("sync stamp = %u\n", hdr.sync_stamp);
 	printf("day stamp = %u\n", hdr.day_stamp);