changeset 7812:d10cb44ab446 HEAD

Modseqs are no longer calculated from transaction log sequence + offset. Now they begin from 1 and each "visible" transaction increases it by one.
author Timo Sirainen <tss@iki.fi>
date Wed, 11 Jun 2008 14:35:15 +0300
parents 85191a8cd7df
children f8a0e11566c8
files src/lib-index/mail-index-modseq.c src/lib-index/mail-index-modseq.h src/lib-index/mail-index-private.h src/lib-index/mail-index-sync-update.c src/lib-index/mail-index-view-sync.c src/lib-index/mail-transaction-log-file.c src/lib-index/mail-transaction-log-private.h src/lib-index/mail-transaction-log-view.c src/lib-index/mail-transaction-log.h src/lib-storage/index/index-fetch.c src/util/logview.c
diffstat 11 files changed, 491 insertions(+), 121 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-index/mail-index-modseq.c	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/lib-index/mail-index-modseq.c	Wed Jun 11 14:35:15 2008 +0300
@@ -2,9 +2,10 @@
 
 #include "lib.h"
 #include "array.h"
+#include "mail-transaction-log-private.h"
 #include "mail-index-private.h"
+#include "mail-index-sync-private.h"
 #include "mail-index-modseq.h"
-#include "mail-index-sync-private.h"
 
 #define MAIL_INDEX_MODSEQ_EXT_NAME "modseq"
 
@@ -37,8 +38,6 @@
 	struct mail_index_map_modseq *mmap;
 
 	uint64_t highest_modseq;
-	uint32_t log_seq;
-	uoff_t log_offset;
 };
 
 void mail_index_modseq_init(struct mail_index *index)
@@ -49,10 +48,10 @@
 					sizeof(uint64_t), sizeof(uint64_t));
 }
 
-static uint64_t mail_index_modseq_get_head(struct mail_index_map *map)
+static uint64_t mail_index_modseq_get_head(struct mail_index *index)
 {
-	return map->hdr.log_file_head_offset |
-		((uint64_t)(map->hdr.indexid + map->hdr.log_file_seq) << 32);
+	return index->log->head == NULL ? 1 :
+		index->log->head->sync_highest_modseq;
 }
 
 void mail_index_modseq_enable(struct mail_index *index)
@@ -73,7 +72,7 @@
 		trans = mail_index_transaction_begin(view, 0);
 
 		memset(&hdr, 0, sizeof(hdr));
-		hdr.highest_modseq = mail_index_modseq_get_head(index->map);
+		hdr.highest_modseq = mail_index_modseq_get_head(index);
 		mail_index_update_header_ext(trans, index->modseq_ext_id,
 					     0, &hdr, sizeof(hdr));
 
@@ -93,21 +92,38 @@
 	index->modseqs_enabled = TRUE;
 }
 
-uint64_t mail_index_modseq_get_highest(struct mail_index_view *view)
+const struct mail_index_modseq_header *
+mail_index_map_get_modseq_header(struct mail_index_map *map)
+{
+	const struct mail_index_ext *ext;
+	uint32_t idx;
+
+	if (!mail_index_map_get_ext_idx(map, map->index->modseq_ext_id, &idx))
+		return NULL;
+
+	ext = array_idx(&map->extensions, idx);
+	if (ext->hdr_size != sizeof(struct mail_index_modseq_header))
+		return NULL;
+
+	return CONST_PTR_OFFSET(map->hdr_base, ext->hdr_offset);
+}
+
+uint64_t mail_index_map_modseq_get_highest(struct mail_index_map *map)
 {
 	const struct mail_index_modseq_header *modseq_hdr;
-	const void *data;
-	size_t size;
 
-	mail_index_get_header_ext(view, view->index->modseq_ext_id,
-				  &data, &size);
-	if (size == sizeof(*modseq_hdr)) {
-		modseq_hdr = data;
-		if (modseq_hdr->highest_modseq != 0)
-			return modseq_hdr->highest_modseq;
+	modseq_hdr = mail_index_map_get_modseq_header(map);
+	if (modseq_hdr != NULL && modseq_hdr->highest_modseq != 0)
+		return modseq_hdr->highest_modseq;
+	else {
+		/* fallback to returning the log head */
+		return mail_index_modseq_get_head(map->index);
 	}
-	/* fallback to returning the log head */
-	return mail_index_modseq_get_head(view->map);
+}
+
+uint64_t mail_index_modseq_get_highest(struct mail_index_view *view)
+{
+	return mail_index_map_modseq_get_highest(view->map);
 }
 
 static struct mail_index_map_modseq *
@@ -142,13 +158,13 @@
 	uint32_t ext_map_idx;
 
 	if (mmap == NULL)
-		return mail_index_modseq_get_head(view->map);
+		return mail_index_modseq_get_head(view->index);
 
 	rec = mail_index_lookup_full(view, seq, &map);
 	if (!mail_index_map_get_ext_idx(map, view->index->modseq_ext_id,
 					&ext_map_idx)) {
 		/* not enabled yet */
-		return mail_index_modseq_get_head(view->map);
+		return mail_index_modseq_get_head(view->index);
 	}
 
 	ext = array_idx(&map->extensions, ext_map_idx);
@@ -238,16 +254,6 @@
 	return highest_modseq;
 }
 
-static uint64_t get_cur_modseq(struct mail_index_modseq_sync *ctx)
-{
-	mail_transaction_log_view_get_prev_pos(ctx->log_view,
-					       &ctx->log_seq, &ctx->log_offset);
-	i_assert(ctx->log_offset <= (uint32_t)-1);
-
-	return ctx->log_offset |
-		((uint64_t)(ctx->view->map->hdr.indexid + ctx->log_seq) << 32);
-}
-
 static void
 mail_index_modseq_update(struct mail_index_modseq_sync *ctx,
 			 uint64_t modseq, bool nonzeros,
@@ -279,10 +285,13 @@
 mail_index_modseq_update_highest(struct mail_index_modseq_sync *ctx,
 				 uint32_t seq1, uint32_t seq2)
 {
+	uint64_t modseq;
+
 	if (ctx->mmap == NULL)
 		return FALSE;
 
-	mail_index_modseq_update(ctx, get_cur_modseq(ctx), TRUE, seq1, seq2);
+	modseq = mail_transaction_log_view_get_prev_modseq(ctx->log_view);
+	mail_index_modseq_update(ctx, modseq, TRUE, seq1, seq2);
 	return TRUE;
 }
 
@@ -401,7 +410,8 @@
 						    &reset);
 		/* since we don't know if we skipped some changes, set all
 		   modseqs to beginning of the latest file. */
-		cur_modseq = get_cur_modseq(ctx);
+		cur_modseq = mail_transaction_log_view_get_prev_modseq(
+								ctx->log_view);
 		if (cur_modseq < hdr->highest_modseq) {
 			/* should happen only when setting initial modseqs.
 			   we may already have returned highest_modseq as
@@ -452,21 +462,25 @@
 	const struct mail_index_ext *ext;
 	const struct mail_index_modseq_header *old_modseq_hdr;
 	struct mail_index_modseq_header new_modseq_hdr;
-	uint32_t ext_map_idx;
+	uint32_t ext_map_idx, log_seq;
+	uoff_t log_offset;
 
 	if (!mail_index_map_get_ext_idx(map, ctx->view->index->modseq_ext_id,
 					&ext_map_idx))
 		return;
 
+	mail_transaction_log_view_get_prev_pos(ctx->view->log_view,
+					       &log_seq, &log_offset);
+
 	ext = array_idx(&map->extensions, ext_map_idx);
 	old_modseq_hdr = CONST_PTR_OFFSET(map->hdr_base, ext->hdr_offset);
 
-	if (old_modseq_hdr->log_seq < ctx->log_seq ||
-	    (old_modseq_hdr->log_seq == ctx->log_seq &&
-	     old_modseq_hdr->log_offset < ctx->log_offset)) {
+	if (old_modseq_hdr->log_seq < log_seq ||
+	    (old_modseq_hdr->log_seq == log_seq &&
+	     old_modseq_hdr->log_offset < log_offset)) {
 		new_modseq_hdr.highest_modseq = ctx->highest_modseq;
-		new_modseq_hdr.log_seq = ctx->log_seq;
-		new_modseq_hdr.log_offset = ctx->log_offset;
+		new_modseq_hdr.log_seq = log_seq;
+		new_modseq_hdr.log_offset = log_offset;
 
 		buffer_write(map->hdr_copy_buf, ext->hdr_offset,
 			     &new_modseq_hdr, sizeof(new_modseq_hdr));
@@ -517,7 +531,7 @@
 			array_delete(&metadata->modseqs, seq1, seq2-seq1);
 	}
 
-	modseq = get_cur_modseq(ctx);
+	modseq = mail_transaction_log_view_get_prev_modseq(ctx->log_view);
 	if (ctx->highest_modseq < modseq)
 		ctx->highest_modseq = modseq;
 }
@@ -601,15 +615,24 @@
 	i_free(mmap);
 }
 
-bool mail_index_modseq_get_log_offset(struct mail_index_view *view,
-				      uint64_t modseq, uint32_t *log_seq_r,
-				      uoff_t *log_offset_r)
+bool mail_index_modseq_get_next_log_offset(struct mail_index_view *view,
+					   uint64_t modseq, uint32_t *log_seq_r,
+					   uoff_t *log_offset_r)
 {
-	if (view->map->hdr.indexid >= (modseq >> 32)) {
-		/* invalid modseq or created for an earlier index */
+	struct mail_transaction_log_file *file, *prev_file = NULL;
+
+	for (file = view->index->log->files; file != NULL; file = file->next) {
+		if (modseq < file->hdr.initial_modseq)
+			break;
+		prev_file = file;
+	}
+
+	if (prev_file == NULL) {
+		/* the log file has been deleted already */
 		return FALSE;
 	}
-	*log_seq_r = (modseq >> 32) - view->map->hdr.indexid;
-	*log_offset_r = modseq & 0xffffffff;
-	return TRUE;
+
+	*log_seq_r = prev_file->hdr.file_seq;
+	return mail_transaction_log_file_get_modseq_next_offset(
+					prev_file, modseq, log_offset_r) == 0;
 }
--- a/src/lib-index/mail-index-modseq.h	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/lib-index/mail-index-modseq.h	Wed Jun 11 14:35:15 2008 +0300
@@ -4,6 +4,7 @@
 enum mail_flags;
 struct mail_keywords;
 struct mail_index;
+struct mail_index_map;
 struct mail_index_view;
 struct mail_index_modseq;
 struct mail_index_map_modseq;
@@ -19,6 +20,9 @@
 
 void mail_index_modseq_init(struct mail_index *index);
 
+const struct mail_index_modseq_header *
+mail_index_map_get_modseq_header(struct mail_index_map *map);
+uint64_t mail_index_map_modseq_get_highest(struct mail_index_map *map);
 void mail_index_modseq_enable(struct mail_index *index);
 uint64_t mail_index_modseq_get_highest(struct mail_index_view *view);
 
@@ -49,8 +53,8 @@
 
 void mail_index_map_modseq_free(struct mail_index_map_modseq *mmap);
 
-bool mail_index_modseq_get_log_offset(struct mail_index_view *view,
-				      uint64_t modseq, uint32_t *log_seq_r,
-				      uoff_t *log_offset_r);
+bool mail_index_modseq_get_next_log_offset(struct mail_index_view *view,
+					   uint64_t modseq, uint32_t *log_seq_r,
+					   uoff_t *log_offset_r);
 
 #endif
--- a/src/lib-index/mail-index-private.h	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/lib-index/mail-index-private.h	Wed Jun 11 14:35:15 2008 +0300
@@ -32,6 +32,10 @@
 	((struct mail_index_record *) \
 	 PTR_OFFSET((map)->rec_map->records, (idx) * (map)->hdr.record_size))
 
+#define MAIL_TRANSACTION_FLAG_UPDATE_IS_INTERNAL(u) \
+	((((u)->add_flags | (u)->remove_flags) & \
+	  MAIL_INDEX_FLAGS_MASK) == 0)
+
 typedef int mail_index_expunge_handler_t(struct mail_index_sync_map_ctx *ctx,
 					 uint32_t seq, const void *data,
 					 void **sync_context, void *context);
--- a/src/lib-index/mail-index-sync-update.c	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/lib-index/mail-index-sync-update.c	Wed Jun 11 14:35:15 2008 +0300
@@ -355,9 +355,11 @@
 		return 1;
 
 	mail_index_sync_write_seq_update(ctx, seq1, seq2);
-	mail_index_modseq_update_flags(ctx->modseq_ctx,
-				       u->add_flags | u->remove_flags,
-				       seq1, seq2);
+	if (!MAIL_TRANSACTION_FLAG_UPDATE_IS_INTERNAL(u)) {
+		mail_index_modseq_update_flags(ctx->modseq_ctx,
+					       u->add_flags | u->remove_flags,
+					       seq1, seq2);
+	}
 
 	if ((u->add_flags & MAIL_INDEX_MAIL_FLAG_DIRTY) != 0)
 		view->map->hdr.flags |= MAIL_INDEX_HDR_FLAG_HAVE_DIRTY;
--- a/src/lib-index/mail-index-view-sync.c	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/lib-index/mail-index-view-sync.c	Wed Jun 11 14:35:15 2008 +0300
@@ -236,13 +236,10 @@
 {
 	const struct mail_transaction_header *hdr;
 	const void *data;
-	uint32_t seq;
-	uoff_t offset;
 	bool have_expunges = FALSE;
 	int ret;
 
-	mail_transaction_log_view_get_prev_pos(view->log_view,
-					       &seq, &offset);
+	mail_transaction_log_view_mark(view->log_view);
 
 	while ((ret = mail_transaction_log_view_next(view->log_view,
 						     &hdr, &data)) > 0) {
@@ -260,7 +257,7 @@
 		}
 	}
 
-	mail_transaction_log_view_seek(view->log_view, seq, offset);
+	mail_transaction_log_view_rewind(view->log_view);
 
 	/* handle failures as having expunges (which is safer).
 	   we'll probably fail later. */
@@ -490,10 +487,6 @@
 	return 1;
 }
 
-#define FLAG_UPDATE_IS_INTERNAL(u) \
-	((((u)->add_flags | (u)->remove_flags) & \
-	  MAIL_INDEX_FLAGS_MASK) == 0)
-
 static bool
 mail_index_view_sync_get_rec(struct mail_index_view_sync_ctx *ctx,
 			     struct mail_index_view_sync_rec *rec)
@@ -534,7 +527,7 @@
 		/* data contains mail_transaction_flag_update[] */
 		for (;;) {
 			ctx->data_offset += sizeof(*update);
-			if (!FLAG_UPDATE_IS_INTERNAL(update))
+			if (!MAIL_TRANSACTION_FLAG_UPDATE_IS_INTERNAL(update))
 				break;
 
 			/* skip internal flag changes */
--- a/src/lib-index/mail-transaction-log-file.c	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/lib-index/mail-transaction-log-file.c	Wed Jun 11 14:35:15 2008 +0300
@@ -9,6 +9,7 @@
 #include "write-full.h"
 #include "mmap-util.h"
 #include "mail-index-private.h"
+#include "mail-index-modseq.h"
 #include "mail-transaction-log-private.h"
 
 #define LOG_PREFETCH 1024
@@ -106,23 +107,36 @@
 	struct mail_transaction_log *log = file->log;
 	struct mail_transaction_log_file **p;
 	struct mail_index_map *map = log->index->map;
+	const struct mail_index_modseq_header *modseq_hdr;
 
 	if (map != NULL && file->hdr.file_seq == map->hdr.log_file_seq &&
 	    map->hdr.log_file_head_offset != 0) {
 		/* we can get a valid log offset from index file. initialize
 		   sync_offset from it so we don't have to read the whole log
 		   file from beginning. */
-		if (map->hdr.log_file_head_offset >= file->hdr.hdr_size)
-			file->sync_offset = map->hdr.log_file_head_offset;
-		else {
+		uoff_t head_offset = map->hdr.log_file_head_offset;
+
+		modseq_hdr = mail_index_map_get_modseq_header(map);
+		if (head_offset < file->hdr.hdr_size) {
 			mail_index_set_error(log->index,
 				"%s: log_file_head_offset too small",
 				log->index->filepath);
 			file->sync_offset = file->hdr.hdr_size;
+			file->sync_highest_modseq = file->hdr.initial_modseq;
+		} else if (modseq_hdr == NULL ||
+			   modseq_hdr->log_seq != file->hdr.file_seq ||
+			   modseq_hdr->log_offset != head_offset) {
+			/* highest_modseq not synced, start from beginning */
+			file->sync_offset = file->hdr.hdr_size;
+			file->sync_highest_modseq = file->hdr.initial_modseq;
+		} else {
+			file->sync_offset = head_offset;
+			file->sync_highest_modseq = modseq_hdr->highest_modseq;
 		}
 		file->saved_tail_offset = map->hdr.log_file_tail_offset;
 	} else {
 		file->sync_offset = file->hdr.hdr_size;
+		file->sync_highest_modseq = file->hdr.initial_modseq;
 	}
 
 	/* insert it to correct position */
@@ -165,6 +179,8 @@
 		hdr->prev_file_seq = index->map->hdr.log_file_seq;
 		hdr->prev_file_offset = index->map->hdr.log_file_head_offset;
 		hdr->file_seq = index->map->hdr.log_file_seq + 1;
+		hdr->initial_modseq =
+			mail_index_map_modseq_get_highest(index->map);
 	} else {
 		hdr->file_seq = 1;
 	}
@@ -346,6 +362,8 @@
 		   shouldn't have filled */
 		memset(PTR_OFFSET(&file->hdr, file->hdr.hdr_size), 0,
 		       sizeof(file->hdr) - file->hdr.hdr_size);
+		if (file->hdr.minor_version == 0)
+			file->hdr.initial_modseq = 1;
 	}
 
 	if (file->hdr.indexid == 0) {
@@ -387,6 +405,7 @@
 		}
 	}
 
+	file->sync_highest_modseq = file->hdr.initial_modseq;
 	return 1;
 }
 
@@ -499,6 +518,7 @@
 	if (reset) {
 		file->hdr.prev_file_seq = 0;
 		file->hdr.prev_file_offset = 0;
+		file->hdr.initial_modseq = 1;
 	}
 
 	if (write_full(new_fd, &file->hdr, sizeof(file->hdr)) < 0) {
@@ -697,15 +717,265 @@
 	return 0;
 }
 
+bool
+mail_transaction_header_has_modseq(const struct mail_transaction_header *hdr)
+{
+	switch (hdr->type & MAIL_TRANSACTION_TYPE_MASK) {
+	case MAIL_TRANSACTION_EXPUNGE | MAIL_TRANSACTION_EXPUNGE_PROT:
+		if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) == 0) {
+			/* ignore expunge requests */
+			break;
+		}
+	case MAIL_TRANSACTION_APPEND:
+	case MAIL_TRANSACTION_FLAG_UPDATE:
+	case MAIL_TRANSACTION_KEYWORD_UPDATE:
+	case MAIL_TRANSACTION_KEYWORD_RESET:
+		/* these changes increase modseq */
+		return TRUE;
+	}
+	return FALSE;
+}
+
+static struct modseq_cache *
+modseq_cache_hit(struct mail_transaction_log_file *file, unsigned int idx)
+{
+	struct modseq_cache cache;
+
+	if (idx > 0) {
+		/* @UNSAFE: move it to top */
+		cache = file->modseq_cache[idx];
+		memmove(file->modseq_cache + 1, file->modseq_cache,
+			sizeof(*file->modseq_cache) * idx);
+		file->modseq_cache[0] = cache;
+	}
+	return &file->modseq_cache[0];
+}
+
+static struct modseq_cache *
+modseq_cache_get_offset(struct mail_transaction_log_file *file, uoff_t offset)
+{
+	unsigned int i, best = -1U;
+
+	for (i = 0; i < N_ELEMENTS(file->modseq_cache); i++) {
+		if (offset < file->modseq_cache[i].offset)
+			continue;
+
+		if (file->modseq_cache[i].offset == 0)
+			return NULL;
+
+		if (offset == file->modseq_cache[i].offset) {
+			/* exact cache hit */
+			return modseq_cache_hit(file, i);
+		}
+
+		if (best == -1U ||
+		    file->modseq_cache[i].offset <
+		    file->modseq_cache[best].offset)
+			best = i;
+	}
+	if (best == -1U)
+		return NULL;
+	return &file->modseq_cache[best];
+}
+
+static struct modseq_cache *
+modseq_cache_get_modseq(struct mail_transaction_log_file *file, uint64_t modseq)
+{
+	unsigned int i, best = -1U;
+
+	for (i = 0; i < N_ELEMENTS(file->modseq_cache); i++) {
+		if (modseq < file->modseq_cache[i].highest_modseq)
+			continue;
+
+		if (file->modseq_cache[i].offset == 0)
+			return NULL;
+
+		if (modseq == file->modseq_cache[i].highest_modseq) {
+			/* exact cache hit */
+			return modseq_cache_hit(file, i);
+		}
+
+		if (best == -1U ||
+		    file->modseq_cache[i].highest_modseq <
+		    file->modseq_cache[best].highest_modseq)
+			best = i;
+	}
+	if (best == -1U)
+		return NULL;
+	return &file->modseq_cache[best];
+}
+
 static int
-log_file_track_mailbox_sync_offset(struct mail_transaction_log_file *file,
-				   const struct mail_transaction_header *hdr,
-				   unsigned int trans_size)
+log_get_synced_record(struct mail_transaction_log_file *file, uoff_t *offset,
+		      const struct mail_transaction_header **hdr_r)
+{
+	const struct mail_transaction_header *hdr;
+	uint32_t trans_size;
+
+	hdr = CONST_PTR_OFFSET(file->buffer->data,
+			       *offset - file->buffer_offset);
+
+	/* we've already synced this record at some point. it should
+	   be valid. */
+	trans_size = mail_index_offset_to_uint32(hdr->size);
+	if (trans_size < sizeof(*hdr) ||
+	    *offset - file->buffer_offset + trans_size > file->buffer->used) {
+		mail_transaction_log_file_set_corrupted(file,
+			"Transaction log corrupted unexpectedly");
+		return -1;
+	}
+	*offset += trans_size;
+	*hdr_r = hdr;
+	return 0;
+}
+
+int mail_transaction_log_file_get_highest_modseq_at(
+		struct mail_transaction_log_file *file,
+		uoff_t offset, uint64_t *highest_modseq_r)
+{
+	const struct mail_transaction_header *hdr;
+	struct modseq_cache *cache;
+	uoff_t cur_offset;
+	uint64_t cur_modseq;
+	int ret;
+
+	i_assert(offset <= file->sync_offset);
+
+	if (offset == file->sync_offset) {
+		*highest_modseq_r = file->sync_highest_modseq;
+		return 0;
+	}
+
+	cache = modseq_cache_get_offset(file, offset);
+	if (cache == NULL) {
+		/* nothing usable in cache - scan from beginning */
+		cur_offset = file->hdr.hdr_size;
+		cur_modseq = file->hdr.initial_modseq;
+	} else if (cache->offset == offset) {
+		/* exact cache hit */
+		*highest_modseq_r = cache->highest_modseq;
+		return 0;
+	} else {
+		/* use cache to skip over some records */
+		cur_offset = cache->offset;
+		cur_modseq = cache->highest_modseq;
+	}
+
+	ret = mail_transaction_log_file_map(file, cur_offset, offset);
+	if (ret <= 0) {
+		if (ret < 0)
+			return -1;
+		mail_index_set_error(file->log->index,
+			"%s: Transaction log corrupted, can't get modseq",
+			file->filepath);
+		return -1;
+	}
+
+	i_assert(cur_offset >= file->buffer_offset);
+	i_assert(cur_offset + file->buffer->used >= offset);
+	while (cur_offset < offset) {
+		if (log_get_synced_record(file, &cur_offset, &hdr) < 0)
+			return- 1;
+		if (mail_transaction_header_has_modseq(hdr))
+			cur_modseq++;
+	}
+
+	/* @UNSAFE: cache the value */
+	memmove(file->modseq_cache + 1, file->modseq_cache,
+		sizeof(*file->modseq_cache) *
+		(N_ELEMENTS(file->modseq_cache) - 1));
+	file->modseq_cache[0].offset = cur_offset;
+	file->modseq_cache[0].highest_modseq = cur_modseq;
+
+	*highest_modseq_r = cur_modseq;
+	return 0;
+}
+
+int mail_transaction_log_file_get_modseq_next_offset(
+		struct mail_transaction_log_file *file,
+		uint64_t modseq, uoff_t *next_offset_r)
+{
+	const struct mail_transaction_header *hdr;
+	struct modseq_cache *cache;
+	uoff_t cur_offset, prev_offset;
+	uint64_t cur_modseq;
+	int ret;
+
+	if (modseq >= file->sync_highest_modseq) {
+		*next_offset_r = file->sync_offset;
+		return 0;
+	}
+
+	cache = modseq_cache_get_modseq(file, modseq);
+	if (cache == NULL) {
+		/* nothing usable in cache - scan from beginning */
+		cur_offset = file->hdr.hdr_size;
+		cur_modseq = file->hdr.initial_modseq;
+	} else if (cache->highest_modseq == modseq) {
+		/* exact cache hit */
+		*next_offset_r = cache->offset;
+		return 0;
+	} else {
+		/* use cache to skip over some records */
+		cur_offset = cache->offset;
+		cur_modseq = cache->highest_modseq;
+	}
+
+	ret = mail_transaction_log_file_map(file, cur_offset,
+					    file->sync_offset);
+	if (ret <= 0) {
+		if (ret < 0)
+			return -1;
+		mail_index_set_error(file->log->index,
+			"%s: Transaction log corrupted, can't get modseq",
+			file->filepath);
+		return -1;
+	}
+
+	i_assert(cur_offset >= file->buffer_offset);
+	while (cur_offset < file->sync_offset) {
+		prev_offset = cur_offset;
+		if (log_get_synced_record(file, &cur_offset, &hdr) < 0)
+			return -1;
+		if (mail_transaction_header_has_modseq(hdr)) {
+			if (++cur_modseq == modseq)
+				break;
+		}
+	}
+	if (modseq != cur_modseq) {
+		/* if we got to sync_offset, cur_modseq should be
+		   sync_highest_modseq */
+		mail_index_set_error(file->log->index,
+			"%s: Transaction log changed unexpectedly, "
+			"can't get modseq", file->filepath);
+		return -1;
+	}
+
+	/* @UNSAFE: cache the value */
+	memmove(file->modseq_cache + 1, file->modseq_cache,
+		sizeof(*file->modseq_cache) *
+		(N_ELEMENTS(file->modseq_cache) - 1));
+	file->modseq_cache[0].offset = cur_offset;
+	file->modseq_cache[0].highest_modseq = cur_modseq;
+
+	*next_offset_r = cur_offset;
+	return 0;
+}
+
+static int
+log_file_track_sync(struct mail_transaction_log_file *file,
+		    const struct mail_transaction_header *hdr,
+		    unsigned int trans_size)
 {
 	int ret;
 
-	i_assert((hdr->type & MAIL_TRANSACTION_EXTERNAL) != 0);
+	if (mail_transaction_header_has_modseq(hdr))
+		file->sync_highest_modseq++;
 
+	if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) == 0)
+		return 0;
+
+	/* external transactions: */
 	if ((hdr->type & MAIL_TRANSACTION_TYPE_MASK) ==
 	    MAIL_TRANSACTION_HEADER_UPDATE) {
 		/* see if this updates mailbox_sync_offset */
@@ -734,11 +1004,9 @@
 	size_t size, avail;
 	uint32_t trans_size = 0;
 
-	data = buffer_get_data(file->buffer, &size);
+	i_assert(file->sync_offset >= file->buffer_offset);
 
-	if (file->sync_offset < file->buffer_offset)
-		file->sync_offset = file->buffer_offset;
-
+	data = buffer_get_data(file->buffer, &size);
 	while (file->sync_offset - file->buffer_offset + sizeof(*hdr) <= size) {
 		hdr = CONST_PTR_OFFSET(data, file->sync_offset -
 				       file->buffer_offset);
@@ -757,11 +1025,9 @@
 			break;
 
 		/* transaction has been fully written */
-		if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) != 0) {
-			if (log_file_track_mailbox_sync_offset(file, hdr,
-							       trans_size) < 0)
-				return -1;
-		}
+		if (log_file_track_sync(file, hdr, trans_size) < 0)
+			return -1;
+
 		file->sync_offset += trans_size;
 		trans_size = 0;
 	}
@@ -937,6 +1203,13 @@
 		}
 	}
 
+	if (start_offset > file->sync_offset) {
+		/* although we could just skip over the unwanted data, we have
+		   to sync everything so that modseqs are calculated
+		   correctly */
+		start_offset = file->sync_offset;
+	}
+
 	if (file->buffer != NULL && file->buffer_offset > start_offset) {
 		/* we have to insert missing data to beginning of buffer */
 		ret = mail_transaction_log_file_insert_read(file, start_offset);
--- a/src/lib-index/mail-transaction-log-private.h	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/lib-index/mail-transaction-log-private.h	Wed Jun 11 14:35:15 2008 +0300
@@ -20,6 +20,13 @@
 
 #define MAIL_TRANSACTION_LOG_FILE_IN_MEMORY(file) ((file)->fd == -1)
 
+#define LOG_FILE_MODSEQ_CACHE_SIZE 10
+
+struct modseq_cache {
+	uoff_t offset;
+	uint64_t highest_modseq;
+};
+
 struct mail_transaction_log_file {
 	struct mail_transaction_log *log;
         struct mail_transaction_log_file *next;
@@ -44,6 +51,8 @@
 
 	/* points to the next uncommitted transaction. usually same as EOF. */
 	uoff_t sync_offset;
+	/* highest modseq at sync_offset */
+	uint64_t sync_highest_modseq;
 	/* saved_tail_offset is the offset that was last written to transaction
 	   log. max_tail_offset is what should be written to the log the next
 	   time a transaction is written. transaction log handling may update
@@ -51,6 +60,8 @@
 	   after the last saved offset (to avoid re-reading them unneededly). */
 	uoff_t saved_tail_offset, max_tail_offset;
 
+	struct modseq_cache modseq_cache[LOG_FILE_MODSEQ_CACHE_SIZE];
+
 	struct file_lock *file_lock;
 
 	unsigned int locked:1;
@@ -110,4 +121,13 @@
 int mail_transaction_log_lock_head(struct mail_transaction_log *log);
 void mail_transaction_log_file_unlock(struct mail_transaction_log_file *file);
 
+bool
+mail_transaction_header_has_modseq(const struct mail_transaction_header *hdr);
+int mail_transaction_log_file_get_highest_modseq_at(
+		struct mail_transaction_log_file *file,
+		uoff_t offset, uint64_t *highest_modseq_r);
+int mail_transaction_log_file_get_modseq_next_offset(
+		struct mail_transaction_log_file *file,
+		uint64_t modseq, uoff_t *next_offset_r);
+
 #endif
--- a/src/lib-index/mail-transaction-log-view.c	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/lib-index/mail-transaction-log-view.c	Wed Jun 11 14:35:15 2008 +0300
@@ -21,9 +21,19 @@
         struct mail_transaction_log_file *cur, *head, *tail;
 	uoff_t cur_offset;
 
+	/* prev_modseq doesn't contain correct values until we know that
+	   caller is really interested in modseqs. so the prev_modseq begins
+	   from 0 and it's relative to prev_modseq_start_offset. when
+	   prev_modseq_initialized=TRUE prev_modseq contains a correct value */
+	uint64_t prev_modseq;
+
 	uint32_t prev_file_seq;
 	uoff_t prev_file_offset;
 
+	struct mail_transaction_log_file *mark_file;
+	uoff_t mark_offset, mark_next_offset;
+	uint64_t mark_modseq;
+
 	unsigned int broken:1;
 };
 
@@ -289,6 +299,10 @@
 	view->max_file_offset = I_MIN(max_file_offset, view->head->sync_offset);
 	view->broken = FALSE;
 
+	if (mail_transaction_log_file_get_highest_modseq_at(view->cur,
+				view->cur_offset, &view->prev_modseq) < 0)
+		return -1;
+
 	i_assert(view->cur_offset <= view->cur->sync_offset);
 	return 1;
 }
@@ -307,12 +321,17 @@
 
 	view->cur = view->head = view->tail = NULL;
 
+	view->mark_file = NULL;
+	view->mark_offset = 0;
+	view->mark_modseq = 0;
+
 	view->min_file_seq = view->max_file_seq = 0;
 	view->min_file_offset = view->max_file_offset = 0;
 	view->cur_offset = 0;
 
 	view->prev_file_seq = 0;
 	view->prev_file_offset = 0;
+	view->prev_modseq = 0;
 }
 
 void
@@ -324,6 +343,12 @@
 	*file_offset_r = view->prev_file_offset;
 }
 
+uint64_t
+mail_transaction_log_view_get_prev_modseq(struct mail_transaction_log_view *view)
+{
+	return view->prev_modseq;
+}
+
 static bool
 mail_transaction_log_view_get_last(struct mail_transaction_log_view *view,
 				   struct mail_transaction_log_file **last_r,
@@ -589,6 +614,8 @@
 		ret = log_view_is_record_valid(file, hdr, data) ? 1 : -1;
 	} T_END;
 	if (ret > 0) {
+		if (mail_transaction_header_has_modseq(hdr))
+			view->prev_modseq++;
 		*hdr_r = hdr;
 		*data_r = data;
 		view->cur_offset += full_size;
@@ -631,24 +658,23 @@
 	return 1;
 }
 
-void mail_transaction_log_view_seek(struct mail_transaction_log_view *view,
-				    uint32_t seq, uoff_t offset)
+void mail_transaction_log_view_mark(struct mail_transaction_log_view *view)
 {
-	struct mail_transaction_log_file *file;
+	i_assert(view->cur->hdr.file_seq == view->prev_file_seq);
 
-	i_assert(seq >= view->min_file_seq && seq <= view->max_file_seq);
-	i_assert(seq != view->min_file_seq || offset >= view->min_file_offset);
-	i_assert(seq != view->max_file_seq || offset <= view->max_file_offset);
+	view->mark_file = view->cur;
+	view->mark_offset = view->prev_file_offset;
+	view->mark_next_offset = view->cur_offset;
+	view->mark_modseq = view->prev_modseq;
+}
 
-	if (view->cur == NULL || seq != view->cur->hdr.file_seq) {
-		for (file = view->tail; file != NULL; file = file->next) {
-			if (file->hdr.file_seq == seq)
-				break;
-		}
-		i_assert(file != NULL);
+void mail_transaction_log_view_rewind(struct mail_transaction_log_view *view)
+{
+	i_assert(view->mark_file != NULL);
 
-		view->cur = file;
-	}
-
-	view->cur_offset = offset;
+	view->cur = view->mark_file;
+	view->cur_offset = view->mark_next_offset;
+	view->prev_file_seq = view->cur->hdr.file_seq;
+	view->prev_file_offset = view->mark_offset;
+	view->prev_modseq = view->mark_modseq;
 }
--- a/src/lib-index/mail-transaction-log.h	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/lib-index/mail-transaction-log.h	Wed Jun 11 14:35:15 2008 +0300
@@ -1,8 +1,11 @@
 #ifndef MAIL_TRANSACTION_LOG_H
 #define MAIL_TRANSACTION_LOG_H
 
+struct mail_index;
+struct mail_index_transaction;
+
 #define MAIL_TRANSACTION_LOG_MAJOR_VERSION 1
-#define MAIL_TRANSACTION_LOG_MINOR_VERSION 0
+#define MAIL_TRANSACTION_LOG_MINOR_VERSION 1
 #define MAIL_TRANSACTION_LOG_HEADER_MIN_SIZE 24
 
 struct mail_transaction_log_header {
@@ -15,6 +18,7 @@
 	uint32_t prev_file_seq;
 	uint32_t prev_file_offset;
 	uint32_t create_stamp;
+	uint64_t initial_modseq;
 };
 
 enum mail_transaction_type {
@@ -165,9 +169,11 @@
 int mail_transaction_log_view_next(struct mail_transaction_log_view *view,
 				   const struct mail_transaction_header **hdr_r,
 				   const void **data_r);
-/* Seek to given position within view. Must be inside the view's range. */
-void mail_transaction_log_view_seek(struct mail_transaction_log_view *view,
-				    uint32_t seq, uoff_t offset);
+/* Mark the current view's position to the record returned previously with
+   _log_view_next(). */
+void mail_transaction_log_view_mark(struct mail_transaction_log_view *view);
+/* Seek to previously marked position. */
+void mail_transaction_log_view_rewind(struct mail_transaction_log_view *view);
 
 /* Returns the position of the record returned previously with
    mail_transaction_log_view_next() */
@@ -175,6 +181,9 @@
 mail_transaction_log_view_get_prev_pos(struct mail_transaction_log_view *view,
 				       uint32_t *file_seq_r,
 				       uoff_t *file_offset_r);
+/* Return the modseq of the change returned previously with _view_next(). */
+uint64_t
+mail_transaction_log_view_get_prev_modseq(struct mail_transaction_log_view *view);
 /* Returns TRUE if we're at the end of the view window. */
 bool mail_transaction_log_view_is_last(struct mail_transaction_log_view *view);
 
--- a/src/lib-storage/index/index-fetch.c	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/lib-storage/index/index-fetch.c	Wed Jun 11 14:35:15 2008 +0300
@@ -52,12 +52,12 @@
 	const struct seq_range *uid_range;
 	unsigned int count;
 	const void *tdata;
-	uint32_t prev_seq, log_seq, min_uid, max_uid;
-	uoff_t prev_offset, log_offset;
+	uint32_t log_seq, min_uid, max_uid;
+	uoff_t log_offset;
 	bool reset;
 
-	if (!mail_index_modseq_get_log_offset(ibox->view, modseq,
-					      &log_seq, &log_offset))
+	if (!mail_index_modseq_get_next_log_offset(ibox->view, modseq,
+						   &log_seq, &log_offset))
 		return FALSE;
 	if (log_seq > ibox->view->log_file_head_seq ||
 	    (log_seq == ibox->view->log_file_head_seq &&
@@ -68,11 +68,9 @@
 
 	hdr = mail_index_get_header(ibox->view);
 	log_view = mail_transaction_log_view_open(ibox->index->log);
-	/* we can't trust user-given log offsets, so we have to start reading
-	   from the beginning of the log. */
-	if (mail_transaction_log_view_set(log_view, log_seq, 0,
+	if (mail_transaction_log_view_set(log_view, log_seq, log_offset,
 					  ibox->view->log_file_head_seq,
-					  ibox->view->log_file_head_offset,
+					  ibox->view->log_file_head_offset, 
 					  &reset) <= 0) {
 		mail_transaction_log_view_close(&log_view);
 		return FALSE;
@@ -88,15 +86,6 @@
 		if ((thdr->type & EXPUNGE_MASK) != EXPUNGE_MASK)
 			continue;
 
-		mail_transaction_log_view_get_prev_pos(log_view,
-						       &prev_seq, &prev_offset);
-		if (prev_seq < log_seq ||
-		    (prev_offset <= log_offset && prev_seq == log_seq)) {
-			/* still too old expunge. note that
-			   prev_offset==log_offset is also skipped. */
-			continue;
-		}
-
 		rec = tdata;
 		end = rec + thdr->size / sizeof(*rec);
 		for (; rec != end; rec++) {
--- a/src/util/logview.c	Wed Jun 11 14:28:54 2008 +0300
+++ b/src/util/logview.c	Wed Jun 11 14:35:15 2008 +0300
@@ -21,7 +21,7 @@
 		(((uint32_t)buf[0] & 0x7f) << 23);
 }
 
-static void dump_hdr(int fd)
+static void dump_hdr(int fd, uint64_t *modseq_r)
 {
 	struct mail_transaction_log_header hdr;
 	ssize_t ret;
@@ -43,8 +43,29 @@
 	printf("file seq = %u\n", hdr.file_seq);
 	printf("prev file = %u/%u\n", hdr.prev_file_seq, hdr.prev_file_offset);
 	printf("create stamp = %u\n", hdr.create_stamp);
+	printf("initial modseq = %llu\n",
+	       (unsigned long long)hdr.initial_modseq);
+	*modseq_r = I_MAX(hdr.initial_modseq, 1);
 }
 
+static bool
+mail_transaction_header_has_modseq(const struct mail_transaction_header *hdr)
+{
+	switch (hdr->type & MAIL_TRANSACTION_TYPE_MASK) {
+	case MAIL_TRANSACTION_EXPUNGE | MAIL_TRANSACTION_EXPUNGE_PROT:
+		if ((hdr->type & MAIL_TRANSACTION_EXTERNAL) == 0) {
+			/* ignore expunge requests */
+			break;
+		}
+	case MAIL_TRANSACTION_APPEND:
+	case MAIL_TRANSACTION_FLAG_UPDATE:
+	case MAIL_TRANSACTION_KEYWORD_UPDATE:
+	case MAIL_TRANSACTION_KEYWORD_RESET:
+		/* these changes increase modseq */
+		return TRUE;
+	}
+	return FALSE;
+}
 static const char *log_record_type(unsigned int type)
 {
 	const char *name;
@@ -235,7 +256,7 @@
 	}
 }
 
-static int dump_record(int fd)
+static int dump_record(int fd, uint64_t *modseq)
 {
 	off_t offset;
 	ssize_t ret;
@@ -262,8 +283,13 @@
 		return 0;
 	}
 
-	printf("record: offset=%"PRIuUOFF_T", type=%s, size=%u\n",
+	printf("record: offset=%"PRIuUOFF_T", type=%s, size=%u",
 	       offset, log_record_type(hdr.type), hdr.size);
+	if (mail_transaction_header_has_modseq(&hdr)) {
+		*modseq += 1;
+		printf(", modseq=%llu", (unsigned long long)*modseq);
+	}
+	printf("\n");
 
 	if (hdr.size < 1024*1024) {
 		unsigned char *buf = t_malloc(hdr.size);
@@ -282,6 +308,7 @@
 
 int main(int argc, const char *argv[])
 {
+	uint64_t modseq;
 	int fd, ret;
 
 	lib_init();
@@ -295,10 +322,10 @@
 		return 1;
 	}
 
-	dump_hdr(fd);
+	dump_hdr(fd, &modseq);
 	do {
 		T_BEGIN {
-			ret = dump_record(fd);
+			ret = dump_record(fd, &modseq);
 		} T_END;
 	} while (ret > 0);
 	return 0;