changeset 2277:41e56f28d085 HEAD

Cache updating is done now by first reserving space where to write, and then writing to it whenever buffer gets full. There is no persistent cache file locks anymore because of this, but it also means that the same cached field may be written multiple times to the file by different processes. Also since we reserve more space than we actually need at first, it some space can be wasted if multiple processes are updating the cache.
author Timo Sirainen <tss@iki.fi>
date Sun, 04 Jul 2004 23:00:47 +0300
parents 5f374049abdb
children 675ef825829e
files src/lib-index/mail-cache-compress.c src/lib-index/mail-cache-lookup.c src/lib-index/mail-cache-private.h src/lib-index/mail-cache-transaction.c src/lib-index/mail-cache.c src/lib-index/mail-cache.h src/lib-index/mail-index-private.h src/lib-index/mail-index-sync-update.c src/lib-index/mail-index-transaction-private.h src/lib-index/mail-index-transaction.c src/lib-storage/index/index-mail-headers.c src/lib-storage/index/index-mail.c src/lib-storage/index/index-mail.h src/lib-storage/index/index-transaction.c
diffstat 14 files changed, 658 insertions(+), 478 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-index/mail-cache-compress.c	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-index/mail-cache-compress.c	Sun Jul 04 23:00:47 2004 +0300
@@ -166,7 +166,7 @@
 
 	ret = 0;
 	for (seq = 1; seq <= message_count; seq++) {
-		cache_rec = mail_cache_lookup(cache_view, seq, 0);
+		cache_rec = mail_cache_lookup(cache_view, seq);
 		if (cache_rec == NULL)
 			continue;
 
@@ -181,7 +181,7 @@
 		if (keep_fields == cached_fields &&
 		    cache_rec->prev_offset == 0) {
 			/* just one unmodified block, save it */
-                        mail_index_update_cache(t, seq, output->offset);
+                        mail_index_update_cache(t, seq, output->offset, NULL);
 			o_stream_send(output, cache_rec, cache_rec->size);
 
 			if ((cache_rec->size & 3) != 0) {
@@ -191,7 +191,7 @@
 		} else {
 			/* a) dropping fields
 			   b) multiple blocks, sort them into buffer */
-                        mail_index_update_cache(t, seq, output->offset);
+                        mail_index_update_cache(t, seq, output->offset, NULL);
 
 			t_push();
 			cache_rec = mail_cache_compress_record(cache_view, seq,
@@ -237,9 +237,7 @@
 {
 	int fd, ret, locked;
 
-	i_assert(cache->trans_ctx == NULL);
-
-	if ((ret = mail_cache_lock(cache, TRUE)) < 0)
+	if ((ret = mail_cache_lock(cache)) < 0)
 		return -1;
 	locked = ret > 0;
 
--- a/src/lib-index/mail-cache-lookup.c	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-index/mail-cache-lookup.c	Sun Jul 04 23:00:47 2004 +0300
@@ -10,7 +10,7 @@
 mail_cache_get_header_fields_str(struct mail_cache *cache, unsigned int idx)
 {
 	uint32_t offset, data_size;
-	unsigned char *buf;
+	const unsigned char *buf;
 
 	if (MAIL_CACHE_IS_UNUSABLE(cache))
 		return NULL;
@@ -147,19 +147,16 @@
 	return cache_rec;
 }
 
-int mail_cache_lookup_offset(struct mail_cache_view *view, uint32_t seq,
-			     uint32_t *offset_r, int skip_expunged)
+static int mail_cache_lookup_offset(struct mail_cache_view *view, uint32_t seq,
+				    uint32_t *offset_r)
 {
 	const struct mail_index_record *rec;
 	struct mail_index_map *map;
 	int i, ret;
 
 	for (i = 0; i < 2; i++) {
-		ret = mail_index_lookup_full(view->view, seq, &map, &rec);
-		if (ret < 0)
+		if (mail_index_lookup_full(view->view, seq, &map, &rec) < 0)
 			return -1;
-		if (ret == 0 && skip_expunged)
-			return 0;
 
 		if (map->hdr->cache_file_seq == view->cache->hdr->file_seq) {
 			*offset_r = rec->cache_offset;
@@ -174,18 +171,16 @@
 }
 
 struct mail_cache_record *
-mail_cache_lookup(struct mail_cache_view *view, uint32_t seq,
-		  enum mail_cache_field fields)
+mail_cache_lookup(struct mail_cache_view *view, uint32_t seq)
 {
 	uint32_t offset;
 
-	if (mail_cache_transaction_autocommit(view, seq, fields) < 0)
+	// FIXME: check transactions too
+
+        if (MAIL_CACHE_IS_UNUSABLE(view->cache))
 		return NULL;
 
-	if (MAIL_CACHE_IS_UNUSABLE(view->cache))
-		return NULL;
-
-	if (mail_cache_lookup_offset(view, seq, &offset, FALSE) <= 0)
+	if (mail_cache_lookup_offset(view, seq, &offset) <= 0)
 		return NULL;
 
 	return mail_cache_get_record(view->cache, offset);
@@ -197,7 +192,7 @@
 	struct mail_cache_record *cache_rec;
         enum mail_cache_field fields = 0;
 
-	cache_rec = mail_cache_lookup(view, seq, 0);
+	cache_rec = mail_cache_lookup(view, seq);
 	while (cache_rec != NULL) {
 		fields |= cache_rec->fields;
 		cache_rec = mail_cache_get_record(view->cache,
@@ -271,7 +266,7 @@
 
 	mail_cache_handle_decisions(view, seq, field);
 
-	cache_rec = mail_cache_lookup(view, seq, field);
+	cache_rec = mail_cache_lookup(view, seq);
 	while (cache_rec != NULL) {
 		if ((cache_rec->fields & field) != 0) {
 			return cache_get_field(view->cache, cache_rec, field,
--- a/src/lib-index/mail-cache-private.h	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-index/mail-cache-private.h	Sun Jul 04 23:00:47 2004 +0300
@@ -9,6 +9,9 @@
 /* Never compress the file if it's smaller than this */
 #define COMPRESS_MIN_SIZE (1024*50)
 
+/* Don't bother remembering holes smaller than this */
+#define MAIL_CACHE_MIN_HOLE_SIZE 1024
+
 /* Compress the file when deleted space reaches n% of total size */
 #define COMPRESS_PERCENTAGE 20
 
@@ -55,6 +58,7 @@
 
 	uint32_t continued_record_count;
 
+	uint32_t hole_offset;
 	uint32_t used_file_size;
 	uint32_t deleted_space;
 
@@ -70,6 +74,17 @@
 	uint32_t size; /* full record size, including this header */
 };
 
+struct mail_cache_hole_header {
+	uint32_t next_offset; /* 0 if no holes left */
+	uint32_t size; /* including this header */
+
+	/* make sure we notice if we're treating hole as mail_cache_record.
+	   magic is a large number so if it's treated as size field, it'll
+	   point outside the file */
+#define MAIL_CACHE_HOLE_HEADER_MAGIC 0xffeedeff
+	uint32_t magic;
+};
+
 struct mail_cache {
 	struct mail_index *index;
 
@@ -79,7 +94,8 @@
 	void *mmap_base;
 	size_t mmap_length;
 
-	struct mail_cache_header *hdr;
+	const struct mail_cache_header *hdr;
+	struct mail_cache_header hdr_copy;
 
 	pool_t split_header_pool;
 	uint32_t split_offsets[MAIL_CACHE_HEADERS_COUNT];
@@ -90,10 +106,9 @@
 
 	uint32_t field_usage_uid_highwater[32];
 
-        struct mail_cache_transaction_ctx *trans_ctx;
-	unsigned int locks;
-
+	unsigned int locked:1;
 	unsigned int need_compress:1;
+	unsigned int hdr_modified:1;
 };
 
 struct mail_cache_view {
@@ -110,6 +125,11 @@
 uint32_t mail_cache_offset_to_uint32(uint32_t offset);
 unsigned int mail_cache_field_index(enum mail_cache_field field);
 
+/* Explicitly lock the cache file. Returns -1 if error, 1 if ok, 0 if we
+   couldn't lock */
+int mail_cache_lock(struct mail_cache *cache);
+void mail_cache_unlock(struct mail_cache *cache);
+
 const char *
 mail_cache_get_header_fields_str(struct mail_cache *cache, unsigned int idx);
 const char *const *
@@ -118,22 +138,21 @@
 struct mail_cache_record *
 mail_cache_get_record(struct mail_cache *cache, uint32_t offset);
 
-int mail_cache_lookup_offset(struct mail_cache_view *view, uint32_t seq,
-			     uint32_t *offset, int skip_expunged);
 struct mail_cache_record *
-mail_cache_lookup(struct mail_cache_view *view, uint32_t seq,
-		  enum mail_cache_field fields);
+mail_cache_lookup(struct mail_cache_view *view, uint32_t seq);
 
-int
-mail_cache_transaction_autocommit(struct mail_cache_view *view,
-				  uint32_t seq, enum mail_cache_field fields);
+int mail_cache_transaction_commit(struct mail_cache_transaction_ctx *ctx);
+void mail_cache_transaction_rollback(struct mail_cache_transaction_ctx *ctx);
 
 int mail_cache_map(struct mail_cache *cache, size_t offset, size_t size);
 void mail_cache_file_close(struct mail_cache *cache);
 int mail_cache_reopen(struct mail_cache *cache);
 
+/* Update new_offset's prev_offset field to old_offset. */
 int mail_cache_link(struct mail_cache *cache, uint32_t old_offset,
 		    uint32_t new_offset);
+/* Mark record in given offset to be deleted. */
+int mail_cache_delete(struct mail_cache *cache, uint32_t offset);
 
 void mail_cache_handle_decisions(struct mail_cache_view *view, uint32_t seq,
 				 enum mail_cache_field field);
--- a/src/lib-index/mail-cache-transaction.c	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-index/mail-cache-transaction.c	Sun Jul 04 23:00:47 2004 +0300
@@ -3,8 +3,10 @@
 #include "lib.h"
 #include "buffer.h"
 #include "file-set-size.h"
-#include "mmap-util.h"
+#include "read-full.h"
+#include "write-full.h"
 #include "mail-cache-private.h"
+#include "mail-index-transaction-private.h"
 
 #include <stddef.h>
 #include <sys/stat.h>
@@ -14,149 +16,69 @@
 	struct mail_cache_view *view;
 	struct mail_index_transaction *trans;
 
+	uint32_t update_header_offsets[MAIL_CACHE_HEADERS_COUNT];
 	unsigned int next_unused_header_lowwater;
 
-	struct mail_cache_record cache_rec;
-	buffer_t *cache_data;
+	buffer_t *cache_data, *cache_data_seq;
+	uint32_t prev_seq;
+	size_t prev_pos;
 
-	uint32_t first_seq, last_seq, prev_seq;
-	enum mail_cache_field prev_fields;
-	buffer_t *cache_marks;
-	uint32_t used_file_size;
+        buffer_t *reservations;
+	uint32_t reserved_space_offset, reserved_space;
+	uint32_t last_grow_size;
+
+	uint32_t first_seq, last_seq;
+	enum mail_cache_field fields;
+
+	unsigned int changes:1;
 };
 
 static const unsigned char *null4[] = { 0, 0, 0, 0 };
 
-int mail_cache_transaction_begin(struct mail_cache_view *view, int nonblock,
-				 struct mail_index_transaction *t,
-				 struct mail_cache_transaction_ctx **ctx_r)
+struct mail_cache_transaction_ctx *
+mail_cache_get_transaction(struct mail_cache_view *view,
+			   struct mail_index_transaction *t)
 {
-        struct mail_cache_transaction_ctx *ctx;
-	int ret;
+	struct mail_cache_transaction_ctx *ctx;
 
-	i_assert(view->cache->trans_ctx == NULL);
-
-	ret = mail_cache_lock(view->cache, nonblock);
-	if (ret <= 0)
-		return ret;
+	if (t->cache_trans_ctx != NULL)
+		return t->cache_trans_ctx;
 
 	ctx = i_new(struct mail_cache_transaction_ctx, 1);
 	ctx->cache = view->cache;
 	ctx->view = view;
 	ctx->trans = t;
-	ctx->cache_data = buffer_create_dynamic(system_pool, 8192, (size_t)-1);
-	ctx->used_file_size = ctx->cache->hdr->used_file_size;
+	ctx->cache_data =
+		buffer_create_dynamic(system_pool, 32768, (size_t)-1);
+	ctx->cache_data_seq =
+		buffer_create_dynamic(system_pool, 256, (size_t)-1);
+	ctx->reservations =
+		buffer_create_dynamic(system_pool, 256, (size_t)-1);
 
-	view->cache->trans_ctx = ctx;
-	*ctx_r = ctx;
-	return 1;
+	t->cache_trans_ctx = ctx;
+	return ctx;
 }
 
-void mail_cache_transaction_end(struct mail_cache_transaction_ctx *ctx)
+static void mail_cache_transaction_free(struct mail_cache_transaction_ctx *ctx)
 {
-	i_assert(ctx->cache->trans_ctx != NULL);
-
-	(void)mail_cache_transaction_rollback(ctx);
-	mail_cache_unlock(ctx->cache);
-
-	ctx->cache->trans_ctx = NULL;
-
-	if (ctx->cache_marks != NULL)
-		buffer_free(ctx->cache_marks);
 	buffer_free(ctx->cache_data);
+	buffer_free(ctx->cache_data_seq);
+	buffer_free(ctx->reservations);
 	i_free(ctx);
 }
 
-static void mail_cache_transaction_flush(struct mail_cache_transaction_ctx *ctx)
-{
-	memset(&ctx->cache_rec, 0, sizeof(ctx->cache_rec));
-
-	ctx->next_unused_header_lowwater = 0;
-	ctx->first_seq = ctx->last_seq = ctx->prev_seq = 0;
-	ctx->prev_fields = 0;
-
-	if (ctx->cache_marks != NULL)
-		buffer_set_used_size(ctx->cache_marks, 0);
-	buffer_set_used_size(ctx->cache_data, 0);
-}
-
-static void mark_update(buffer_t **buf, uint32_t offset, uint32_t data)
-{
-	if (*buf == NULL)
-		*buf = buffer_create_dynamic(system_pool, 1024, (size_t)-1);
-
-	buffer_append(*buf, &offset, sizeof(offset));
-	buffer_append(*buf, &data, sizeof(data));
-}
-
-static int write_mark_updates(struct mail_cache *cache)
-{
-	const uint32_t *data, *end;
-	size_t size;
-
-	data = buffer_get_data(cache->trans_ctx->cache_marks, &size);
-	end = data + size/sizeof(uint32_t);
-
-	while (data < end) {
-		if (pwrite(cache->fd, data+1, sizeof(*data), data[0]) < 0) {
-			mail_cache_set_syscall_error(cache, "pwrite()");
-			return -1;
-		}
-		data += 2;
-	}
-	return 0;
-}
-
-static int commit_all_changes(struct mail_cache_transaction_ctx *ctx)
+static int mail_cache_grow_file(struct mail_cache *cache, size_t size)
 {
-	struct mail_cache *cache = ctx->cache;
-
-	/* write everything to disk */
-	if (msync(cache->mmap_base, cache->mmap_length, MS_SYNC) < 0) {
-		mail_cache_set_syscall_error(cache, "msync()");
-		return -1;
-	}
-
-	if (fdatasync(cache->fd) < 0) {
-		mail_cache_set_syscall_error(cache, "fdatasync()");
-		return -1;
-	}
-
-	if (ctx->cache_marks == NULL ||
-	    buffer_get_used_size(ctx->cache_marks) == 0)
-		return 0;
-
-	/* now that we're sure it's written, set on all the used-bits */
-	if (write_mark_updates(cache) < 0)
-		return -1;
+	struct stat st;
+	uoff_t new_fsize, grow_size;
 
-	/* update continued records count */
-        cache->hdr->continued_record_count +=
-		buffer_get_used_size(ctx->cache_marks) /
-		(sizeof(uint32_t) * 2);
+	i_assert(cache->locked);
 
-	if (cache->hdr->continued_record_count * 100 /
-	    cache->index->hdr->messages_count >=
-	    COMPRESS_CONTINUED_PERCENTAGE &&
-	    ctx->used_file_size >= COMPRESS_MIN_SIZE) {
-		/* too many continued rows, compress */
-		cache->need_compress = TRUE;
-	}
-	return 0;
-}
-
-static int
-mail_cache_grow(struct mail_cache_transaction_ctx *ctx, uint32_t size)
-{
-        struct mail_cache *cache = ctx->cache;
-	struct stat st;
-	uoff_t grow_size, new_fsize;
-
-	new_fsize = ctx->used_file_size + size;
+	/* grow the file */
+	new_fsize = cache->hdr_copy.used_file_size + size;
 	grow_size = new_fsize / 100 * MAIL_CACHE_GROW_PERCENTAGE;
 	if (grow_size < 16384)
 		grow_size = 16384;
-
 	new_fsize += grow_size;
 	new_fsize &= ~1023;
 
@@ -165,128 +87,413 @@
 		return -1;
 	}
 
-	if (ctx->used_file_size + size <= (uoff_t)st.st_size) {
-		/* no need to grow, just update mmap */
-		if (mail_cache_map(cache, 0, (size_t)st.st_size) < 0)
+	if ((uoff_t)st.st_size < new_fsize) {
+		if (file_set_size(cache->fd, new_fsize) < 0) {
+			mail_cache_set_syscall_error(cache, "file_set_size()");
 			return -1;
+		}
+	}
+	return 0;
+}
 
-		i_assert(cache->mmap_length >= (uoff_t)st.st_size);
+static int mail_cache_unlink_hole(struct mail_cache *cache, size_t size,
+				  struct mail_cache_hole_header *hole_r)
+{
+	struct mail_cache_header *hdr = &cache->hdr_copy;
+	struct mail_cache_hole_header hole;
+	uint32_t offset, prev_offset;
+
+	i_assert(cache->locked);
+
+	offset = hdr->hole_offset; prev_offset = 0;
+	while (offset != 0) {
+		if (pread_full(cache->fd, &hole, sizeof(hole), offset) <= 0) {
+			mail_cache_set_syscall_error(cache, "pread_full()");
+			return FALSE;
+		}
+
+		if (hole.magic != MAIL_CACHE_HOLE_HEADER_MAGIC) {
+			mail_cache_set_corrupted(cache,
+				"Invalid magic in hole header");
+			return FALSE;
+		}
+
+		if (hole.size >= size)
+			break;
+		offset = hole.next_offset;
+	}
+	if (offset == 0)
+		return FALSE;
+
+	if (prev_offset == 0)
+		hdr->hole_offset = hole.next_offset;
+	else {
+		if (pwrite_full(cache->fd, &hole.next_offset,
+				sizeof(hole.next_offset), prev_offset) < 0) {
+			mail_cache_set_syscall_error(cache, "pwrite_full()");
+			return FALSE;
+		}
+	}
+	hdr->deleted_space -= hole.size;
+
+	hole_r->next_offset = offset;
+	hole_r->size = hole.size;
+	return TRUE;
+}
+
+static void
+mail_cache_transaction_add_reservation(struct mail_cache_transaction_ctx *ctx)
+{
+	buffer_append(ctx->reservations, &ctx->reserved_space_offset,
+		      sizeof(ctx->reserved_space_offset));
+	buffer_append(ctx->reservations, &ctx->reserved_space,
+		      sizeof(ctx->reserved_space));
+}
+
+static int
+mail_cache_transaction_reserve_more(struct mail_cache_transaction_ctx *ctx,
+				    size_t size, int commit)
+{
+	struct mail_cache *cache = ctx->cache;
+	struct mail_cache_header *hdr = &cache->hdr_copy;
+	struct mail_cache_hole_header hole;
+	uint32_t *buf;
+
+	i_assert(cache->locked);
+
+	if (mail_cache_unlink_hole(cache, size, &hole)) {
+		/* found a large enough hole. */
+		ctx->reserved_space_offset = hole.next_offset;
+		ctx->reserved_space = hole.size;
+		mail_cache_transaction_add_reservation(ctx);
 		return 0;
 	}
 
-	if (file_set_size(cache->fd, (off_t)new_fsize) < 0) {
-		mail_cache_set_syscall_error(cache, "file_set_size()");
+	if (MAIL_CACHE_IS_UNUSABLE(cache)) {
+		/* mail_cache_unlink_hole() could have noticed corruption */
+		return -1;
+	}
+
+	if ((uoff_t)hdr->used_file_size + size > (uint32_t)-1) {
+		mail_index_set_error(cache->index, "Cache file too large: %s",
+				     cache->filepath);
 		return -1;
 	}
 
-	return mail_cache_map(cache, 0, (size_t)new_fsize);
+	if (!commit) {
+		size = (size + ctx->last_grow_size) * 2;
+		if ((uoff_t)hdr->used_file_size + size > (uint32_t)-1)
+			size = (uint32_t)-1;
+		ctx->last_grow_size = size;
+	}
+
+	if (mail_cache_grow_file(ctx->cache, size) < 0)
+		return -1;
+
+	if (ctx->reserved_space_offset + ctx->reserved_space ==
+	    hdr->used_file_size) {
+		/* we can simply grow it */
+		ctx->reserved_space = size - ctx->reserved_space;
+
+		/* grow reservation. it's probably the last one in the buffer,
+		   but it's not guarateed because we might have used holes
+		   as well */
+		buf = buffer_get_modifyable_data(ctx->reservations, &size);
+		size /= sizeof(uint32_t);
+		i_assert(size >= 2);
+
+		do {
+			size -= 2;
+			if (buf[size] == ctx->reserved_space_offset) {
+				buf[size+1] = ctx->reserved_space;
+				break;
+			}
+		} while (size >= 2);
+	} else {
+		ctx->reserved_space_offset = hdr->used_file_size;
+		ctx->reserved_space = size;
+		mail_cache_transaction_add_reservation(ctx);
+	}
+
+	cache->hdr_modified = TRUE;
+	hdr->used_file_size = ctx->reserved_space_offset + ctx->reserved_space;
+	return 0;
 }
 
-static uint32_t mail_cache_append_space(struct mail_cache_transaction_ctx *ctx,
-					uint32_t size)
+static void
+mail_cache_free_space(struct mail_cache *cache, uint32_t offset, uint32_t size)
 {
-	/* NOTE: must be done within transaction or rollback would break it */
-	uint32_t offset;
+	struct mail_cache_hole_header hole;
+
+	i_assert(cache->locked);
+
+	if (offset + size == cache->hdr_copy.used_file_size) {
+		/* we can just set used_file_size back */
+		cache->hdr_modified = TRUE;
+		cache->hdr_copy.used_file_size = offset;
+	} else if (size >= MAIL_CACHE_MIN_HOLE_SIZE) {
+		/* set it up as a hole */
+		hole.next_offset = cache->hdr_copy.hole_offset;
+		hole.size = size;
+		hole.magic = MAIL_CACHE_HOLE_HEADER_MAGIC;
 
-	i_assert((size & 3) == 0);
+		if (pwrite_full(cache->fd, &hole, sizeof(hole), offset) < 0) {
+			mail_cache_set_syscall_error(cache, "pwrite_full()");
+			return;
+		}
+
+		cache->hdr_copy.deleted_space += size;
+		cache->hdr_copy.hole_offset = offset;
+		cache->hdr_modified = TRUE;
+	}
+}
 
-	offset = ctx->used_file_size;
-	if (offset >= 0x40000000) {
-		mail_index_set_error(ctx->cache->index,
-				     "Cache file too large: %s",
-				     ctx->cache->filepath);
-		return 0;
+static void
+mail_cache_transaction_free_space(struct mail_cache_transaction_ctx *ctx)
+{
+	int locked = ctx->cache->locked;
+
+	if (ctx->reserved_space == 0)
+		return;
+
+	if (!locked) {
+		if (mail_cache_lock(ctx->cache) <= 0)
+			return;
 	}
 
-	if (offset + size > ctx->cache->mmap_length) {
-		if (mail_cache_grow(ctx, size) < 0)
+	mail_cache_free_space(ctx->cache, ctx->reserved_space_offset,
+			      ctx->reserved_space);
+
+	if (!locked)
+		mail_cache_unlock(ctx->cache);
+}
+
+static uint32_t
+mail_cache_transaction_get_space(struct mail_cache_transaction_ctx *ctx,
+				 size_t min_size, size_t max_size,
+				 size_t *available_space_r, int commit)
+{
+	int locked = ctx->cache->locked;
+	uint32_t offset;
+	size_t size;
+	int ret;
+
+	if (min_size > ctx->reserved_space) {
+		if (!locked) {
+			if (mail_cache_lock(ctx->cache) <= 0)
+				return -1;
+		}
+		ret = mail_cache_transaction_reserve_more(ctx, max_size,
+							  commit);
+		if (!locked)
+			mail_cache_unlock(ctx->cache);
+
+		if (ret < 0)
 			return 0;
+
+		size = max_size;
+	} else {
+		size = I_MIN(max_size, ctx->reserved_space);
 	}
 
-	ctx->used_file_size += size;
+	offset = ctx->reserved_space_offset;
+	ctx->reserved_space_offset += size;
+	ctx->reserved_space -= size;
+	if (available_space_r != NULL)
+		*available_space_r = size;
+
+	if (size == max_size && commit) {
+		/* final commit - see if we can free the rest of the
+		   reserved space */
+		mail_cache_transaction_free_space(ctx);
+	}
+
 	return offset;
 }
 
-static int mail_cache_write(struct mail_cache_transaction_ctx *ctx)
+static int
+mail_cache_transaction_flush(struct mail_cache_transaction_ctx *ctx)
 {
 	struct mail_cache *cache = ctx->cache;
-	uint32_t offset, write_offset;
-	const void *buf;
-	size_t size, buf_size;
-	int ret;
-
-	buf = buffer_get_data(ctx->cache_data, &buf_size);
-
-	size = sizeof(ctx->cache_rec) + buf_size;
-	ctx->cache_rec.size = size;
-
-        ret = mail_cache_lookup_offset(ctx->view, ctx->prev_seq, &offset, TRUE);
-	if (ret < 0)
-		return -1;
+	const struct mail_cache_record *rec, *tmp_rec;
+	const uint32_t *seq;
+	uint32_t write_offset, old_offset, rec_offset;
+	size_t size, max_size, seq_idx, seq_limit, seq_count;
+	int commit;
 
-	if (ret == 0) {
-		/* it's been expunged already, do nothing */
-	} else {
-		write_offset = mail_cache_append_space(ctx, size);
-		if (write_offset == 0)
-			return -1;
-
-		/* write the offset to index file. this record's prev_offset
-		   is updated to point to old cache record when index is
-		   being synced. */
-		mail_index_update_cache(ctx->trans, ctx->prev_seq, write_offset);
-
-		memcpy((char *) cache->mmap_base + write_offset,
-		       &ctx->cache_rec, sizeof(ctx->cache_rec));
-		memcpy((char *) cache->mmap_base + write_offset +
-		       sizeof(ctx->cache_rec), buf, buf_size);
+	commit = ctx->prev_seq == 0;
+	if (commit) {
+		/* committing, remove the last dummy record */
+		buffer_set_used_size(ctx->cache_data, ctx->prev_pos);
 	}
 
-	/* reset the write context */
-	ctx->prev_seq = 0;
-	ctx->prev_fields = 0;
+	rec = buffer_get_data(ctx->cache_data, &size);
+	i_assert(ctx->prev_pos <= size);
+
+	seq = buffer_get_data(ctx->cache_data_seq, &seq_count);
+	seq_count /= sizeof(*seq);
+
+	for (seq_idx = 0, rec_offset = 0; rec_offset < ctx->prev_pos;) {
+		max_size = ctx->prev_pos - rec_offset;
+		write_offset = mail_cache_transaction_get_space(ctx, rec->size,
+								max_size,
+								&max_size,
+								commit);
+		if (write_offset == 0) {
+			/* nothing to write / error */
+			return ctx->prev_pos == 0 ? 0 : -1;
+		}
+
+		if (max_size < ctx->prev_pos) {
+			/* see how much we can really write there */
+			tmp_rec = rec;
+			for (size = 0; size + tmp_rec->size <= max_size; ) {
+				seq_limit++;
+				size += tmp_rec->size;
+				tmp_rec = CONST_PTR_OFFSET(tmp_rec,
+							   tmp_rec->size);
+			}
+			max_size = size;
+		} else {
+			seq_limit = seq_count;
+		}
+
+		/* write it to file */
+		if (pwrite_full(cache->fd, rec, max_size, write_offset) < 0) {
+			mail_cache_set_syscall_error(cache, "pwrite_full()");
+			return -1;
+		}
 
-	memset(&ctx->cache_rec, 0, sizeof(ctx->cache_rec));
-	buffer_set_used_size(ctx->cache_data, 0);
+		/* write the cache_offsets to index file. records' prev_offset
+		   is updated to point to old cache record when index is being
+		   synced. */
+		for (; seq_idx < seq_limit; seq_idx++) {
+			mail_index_update_cache(ctx->trans, seq[seq_idx],
+						write_offset, &old_offset);
+			if (old_offset != 0) {
+				/* we added records for this message multiple
+				   times in this same uncommitted transaction.
+				   only the new one will be written to
+				   transaction log, we need to do the linking
+				   ourself here. */
+				if (mail_cache_link(cache, old_offset,
+						    write_offset) < 0)
+					return -1;
+			}
+
+			write_offset += rec->size;
+			rec_offset += rec->size;
+			rec = CONST_PTR_OFFSET(rec, rec->size);
+		}
+	}
+
+	/* drop the written data from buffer */
+	buffer_copy(ctx->cache_data, 0,
+		    ctx->cache_data, ctx->prev_pos, (size_t)-1);
+	buffer_set_used_size(ctx->cache_data, size - ctx->prev_pos);
+
+	buffer_set_used_size(ctx->cache_data_seq, 0);
 	return 0;
 }
 
+static void
+mail_cache_transaction_switch_seq(struct mail_cache_transaction_ctx *ctx)
+{
+	struct mail_cache_record *rec, new_rec;
+	void *data;
+	size_t size;
+
+	if (ctx->prev_seq != 0) {
+		/* fix record size */
+		data = buffer_get_modifyable_data(ctx->cache_data, &size);
+		rec = PTR_OFFSET(data, ctx->prev_pos);
+		rec->size = size - ctx->prev_pos;
+
+		buffer_append(ctx->cache_data_seq, &ctx->prev_seq,
+			      sizeof(ctx->prev_seq));
+		ctx->prev_pos = size;
+	}
+
+	memset(&new_rec, 0, sizeof(new_rec));
+	buffer_append(ctx->cache_data, &new_rec, sizeof(new_rec));
+
+	ctx->prev_seq = 0;
+	ctx->changes = TRUE;
+}
+
 int mail_cache_transaction_commit(struct mail_cache_transaction_ctx *ctx)
 {
-	int ret = 0;
+	struct mail_cache *cache = ctx->cache;
+	uint32_t offset;
+	int i, ret = 0;
 
-	if (MAIL_CACHE_IS_UNUSABLE(ctx->cache)) {
-		mail_cache_transaction_flush(ctx);
+	if (!ctx->changes) {
+		mail_cache_transaction_free(ctx);
 		return 0;
 	}
 
-	if (ctx->prev_seq != 0) {
-		if (mail_cache_write(ctx) < 0)
-			return -1;
+	if (mail_cache_lock(cache) <= 0) {
+		mail_cache_transaction_rollback(ctx);
+		return -1;
 	}
 
-	ctx->cache->hdr->used_file_size = ctx->used_file_size;
+	if (ctx->prev_seq != 0)
+                mail_cache_transaction_switch_seq(ctx);
+
+	if (mail_cache_transaction_flush(ctx) < 0)
+		ret = -1;
+
+	/* make sure everything's written before updating offsets */
+	if (fdatasync(cache->fd) < 0) {
+		mail_cache_set_syscall_error(cache, "fdatasync()");
+		ret = -1;
+	}
 
-	if (commit_all_changes(ctx) < 0)
-		ret = -1;
+	if (ret == 0) {
+		for (i = 0; i < MAIL_CACHE_HEADERS_COUNT; i++) {
+			offset = ctx->update_header_offsets[i];
+			if (offset != 0) {
+				cache->hdr_copy.header_offsets[i] =
+					mail_cache_uint32_to_offset(offset);
+				cache->hdr_modified = TRUE;
+			}
+		}
+	}
+
+	mail_cache_unlock(cache);
 
 	if (ctx->next_unused_header_lowwater == MAIL_CACHE_HEADERS_COUNT) {
 		/* they're all used - compress the cache to get more */
-		ctx->cache->need_compress = TRUE;
+		cache->need_compress = TRUE;
 	}
 
-	mail_cache_transaction_flush(ctx);
+	mail_cache_transaction_free(ctx);
 	return ret;
 }
 
 void mail_cache_transaction_rollback(struct mail_cache_transaction_ctx *ctx)
 {
 	struct mail_cache *cache = ctx->cache;
+	const uint32_t *buf;
+	size_t size;
 	unsigned int i;
 
-	/* no need to actually modify the file - we just didn't update
-	   used_file_size */
-	ctx->used_file_size = cache->hdr->used_file_size;
+	mail_cache_transaction_free_space(ctx);
+
+	buf = buffer_get_data(ctx->reservations, &size);
+	i_assert(size % sizeof(uint32_t)*2 == 0);
+	size /= sizeof(*buf);
+
+	if (size > 0) {
+		/* free flushed data as well. do it from end to beginning so
+		   we have a better chance of updating used_file_size instead
+		   of adding holes */
+		do {
+			size -= 2;
+			mail_cache_free_space(ctx->cache, buf[size],
+					      buf[size+1]);
+		} while (size > 0);
+	}
 
 	/* make sure we don't cache the headers */
 	for (i = 0; i < ctx->next_unused_header_lowwater; i++) {
@@ -295,7 +502,7 @@
 			cache->split_offsets[i] = 1;
 	}
 
-	mail_cache_transaction_flush(ctx);
+	mail_cache_transaction_free(ctx);
 }
 
 static const char *write_header_string(const char *const headers[],
@@ -328,7 +535,7 @@
 				 unsigned int idx, const char *const headers[])
 {
 	struct mail_cache *cache = ctx->cache;
-	uint32_t offset, update_offset, size;
+	uint32_t offset, size, total_size;
 	const char *header_str, *prev_str;
 
 	i_assert(*headers != NULL);
@@ -350,27 +557,27 @@
 		i_assert(strcmp(header_str, prev_str) != 0);
 	}
 
-	offset = mail_cache_append_space(ctx, size + sizeof(uint32_t));
+	total_size = size + sizeof(uint32_t);
+	offset = mail_cache_transaction_get_space(ctx, total_size, total_size,
+						  NULL, FALSE);
 	if (offset != 0) {
-		memcpy(PTR_OFFSET(cache->mmap_base, offset + sizeof(uint32_t)),
-		       header_str, size);
+		if (pwrite_full(cache->fd, &size, sizeof(size), offset) < 0 ||
+		    pwrite_full(cache->fd, header_str, size,
+				offset + sizeof(uint32_t)) < 0) {
+			mail_cache_set_syscall_error(cache, "pwrite_full()");
+			offset = 0;
+		}
+	}
 
-		memcpy(PTR_OFFSET(cache->mmap_base, offset),
-		       &size, sizeof(uint32_t));
+	if (offset != 0) {
+		ctx->update_header_offsets[idx] = offset;
+		ctx->changes = TRUE;
 
 		/* update cached headers */
 		cache->split_offsets[idx] = cache->hdr->header_offsets[idx];
 		cache->split_headers[idx] =
 			mail_cache_split_header(cache, header_str);
 
-		/* mark used-bit to be updated later. not really needed for
-		   read-safety, but if transaction get rolled back we can't let
-		   this point to invalid location. */
-		update_offset = (char *) &cache->hdr->header_offsets[idx] -
-			(char *) cache->mmap_base;
-		mark_update(&ctx->cache_marks, update_offset,
-			    mail_cache_uint32_to_offset(offset));
-
 		/* make sure get_header_fields() still works for this header
 		   while the transaction isn't yet committed. */
 		ctx->next_unused_header_lowwater = idx + 1;
@@ -380,58 +587,50 @@
 	return offset > 0;
 }
 
-static size_t get_insert_offset(struct mail_cache_transaction_ctx *ctx,
-				enum mail_cache_field field)
+static size_t
+mail_cache_transaction_get_insert_pos(struct mail_cache_transaction_ctx *ctx,
+				      enum mail_cache_field field)
 {
-	const unsigned char *buf;
+	const struct mail_cache_record *cache_rec;
+	const void *data;
 	unsigned int mask;
 	uint32_t data_size;
-	size_t offset = 0;
+	size_t pos;
 	int i;
 
-	buf = buffer_get_data(ctx->cache_data, NULL);
+	data = buffer_get_data(ctx->cache_data, NULL);
+	cache_rec = CONST_PTR_OFFSET(data, ctx->prev_pos);
 
+	pos = ctx->prev_pos + sizeof(*cache_rec);
 	for (i = 0, mask = 1; i < 31; i++, mask <<= 1) {
 		if ((field & mask) != 0)
-			return offset;
+			return pos;
 
-		if ((ctx->cache_rec.fields & mask) != 0) {
+		if ((cache_rec->fields & mask) != 0) {
 			if ((mask & MAIL_CACHE_FIXED_MASK) != 0)
 				data_size = mail_cache_field_sizes[i];
 			else {
-				memcpy(&data_size, buf + offset,
+				memcpy(&data_size, CONST_PTR_OFFSET(data, pos),
 				       sizeof(data_size));
-				offset += sizeof(data_size);
+				pos += sizeof(data_size);
 			}
-			offset += (data_size + 3) & ~3;
+			pos += (data_size + 3) & ~3;
 		}
 	}
 
 	i_unreached();
-	return offset;
+	return pos;
 }
 
-static int get_field_num(enum mail_cache_field field)
+void mail_cache_add(struct mail_cache_transaction_ctx *ctx, uint32_t seq,
+		    enum mail_cache_field field,
+		    const void *data, size_t data_size)
 {
-	unsigned int mask;
-	int i;
-
-	for (i = 0, mask = 1; i < 31; i++, mask <<= 1) {
-		if ((field & mask) != 0)
-			return i;
-	}
-
-	return -1;
-}
-
-int mail_cache_add(struct mail_cache_transaction_ctx *ctx, uint32_t seq,
-		   enum mail_cache_field field,
-		   const void *data, size_t data_size)
-{
+	struct mail_cache_record *cache_rec;
+	unsigned char *buf;
+	size_t full_size, pos;
 	uint32_t data_size32;
-	size_t full_size, offset;
-	unsigned char *buf;
-	int field_num;
+	unsigned int field_idx;
 
 	i_assert(data_size > 0);
 	i_assert(data_size < (uint32_t)-1);
@@ -439,38 +638,47 @@
 	data_size32 = (uint32_t)data_size;
 
 	if ((field & MAIL_CACHE_FIXED_MASK) != 0) {
-		field_num = get_field_num(field);
-		i_assert(field_num != -1);
-		i_assert(mail_cache_field_sizes[field_num] == data_size);
+		field_idx = mail_cache_field_index(field);
+		i_assert(mail_cache_field_sizes[field_idx] == data_size);
 	} else if ((field & MAIL_CACHE_STRING_MASK) != 0) {
 		i_assert(((char *) data)[data_size-1] == '\0');
 	}
 
-	if (ctx->prev_seq != seq && ctx->prev_seq != 0) {
-		if (mail_cache_write(ctx) < 0)
-			return -1;
+	if (ctx->prev_seq != seq) {
+		mail_cache_transaction_switch_seq(ctx);
+		ctx->prev_seq = seq;
+
+		/* remember roughly what we have modified, so cache lookups can
+		   look into transactions to see changes. */
+		if (seq < ctx->first_seq || ctx->first_seq == 0)
+			ctx->first_seq = seq;
+		if (seq > ctx->last_seq)
+			ctx->last_seq = seq;
+		ctx->fields |= field;
 	}
-	ctx->prev_seq = seq;
-
-	i_assert((ctx->cache_rec.fields & field) == 0);
 
 	full_size = (data_size + 3) & ~3;
 	if ((field & MAIL_CACHE_FIXED_MASK) == 0)
 		full_size += sizeof(data_size32);
 
+	if (buffer_get_used_size(ctx->cache_data) + full_size >
+	    buffer_get_size(ctx->cache_data)) {
+		/* time to flush our buffer */
+		if (mail_cache_transaction_flush(ctx) < 0)
+			return;
+	}
+
 	/* fields must be ordered. find where to insert it. */
-	if (field > ctx->cache_rec.fields)
-                buf = buffer_append_space_unsafe(ctx->cache_data, full_size);
-	else {
-		offset = get_insert_offset(ctx, field);
-		buffer_copy(ctx->cache_data, offset + full_size,
-			    ctx->cache_data, offset, (size_t)-1);
-		buf = buffer_get_space_unsafe(ctx->cache_data,
-					      offset, full_size);
-	}
-	ctx->cache_rec.fields |= field;
+	pos = mail_cache_transaction_get_insert_pos(ctx, field);
+	buffer_copy(ctx->cache_data, pos + full_size,
+		    ctx->cache_data, pos, (size_t)-1);
+
+	cache_rec = buffer_get_space_unsafe(ctx->cache_data, ctx->prev_pos,
+					    sizeof(*cache_rec));
+	cache_rec->fields |= field;
 
 	/* @UNSAFE */
+	buf = buffer_get_space_unsafe(ctx->cache_data, pos, full_size);
 	if ((field & MAIL_CACHE_FIXED_MASK) == 0) {
 		memcpy(buf, &data_size32, sizeof(data_size32));
 		buf += sizeof(data_size32);
@@ -478,73 +686,6 @@
 	memcpy(buf, data, data_size); buf += data_size;
 	if ((data_size & 3) != 0)
 		memset(buf, 0, 4 - (data_size & 3));
-
-	/* remember the transaction sequence range */
-	if (seq < ctx->first_seq || ctx->first_seq == 0)
-		ctx->first_seq = seq;
-	if (seq > ctx->last_seq)
-		ctx->last_seq = seq;
-	ctx->prev_fields |= field;
-
-	return 0;
-}
-
-int mail_cache_delete(struct mail_cache_transaction_ctx *ctx, uint32_t seq)
-{
-	struct mail_cache *cache = ctx->cache;
-	struct mail_cache_record *cache_rec;
-	uint32_t deleted_space;
-	uoff_t max_del_space;
-
-	cache_rec = mail_cache_lookup(ctx->view, seq, 0);
-	if (cache_rec == NULL)
-		return 0;
-
-	/* we'll only update the deleted_space in header. we can't really
-	   do any actual deleting as other processes might still be using
-	   the data. also it's actually useful as some index views are still
-	   able to ask cached data from messages that have already been
-	   expunged. */
-	deleted_space = cache->hdr->deleted_space;
-
-	do {
-		deleted_space += cache_rec->size;
-		cache_rec =
-			mail_cache_get_record(cache, cache_rec->prev_offset);
-	} while (cache_rec != NULL);
-
-	/* see if we've reached the max. deleted space in file */
-	max_del_space = ctx->used_file_size / 100 * COMPRESS_PERCENTAGE;
-	if (deleted_space >= max_del_space &&
-	    ctx->used_file_size >= COMPRESS_MIN_SIZE)
-		cache->need_compress = TRUE;
-
-	cache->hdr->deleted_space = deleted_space;
-	return 0;
-}
-
-int
-mail_cache_transaction_autocommit(struct mail_cache_view *view,
-				  uint32_t seq, enum mail_cache_field fields)
-{
-	struct mail_cache *cache = view->cache;
-
-	if (cache->trans_ctx != NULL &&
-	    cache->trans_ctx->first_seq <= seq &&
-	    cache->trans_ctx->last_seq >= seq &&
-	    (cache->trans_ctx->prev_seq != seq || fields == 0 ||
-	     (cache->trans_ctx->prev_fields & fields) != 0)) {
-		/* write non-index changes */
-		if (cache->trans_ctx->prev_seq == seq) {
-			if (mail_cache_write(cache->trans_ctx) < 0)
-				return -1;
-		}
-
-		if (mail_cache_transaction_commit(cache->trans_ctx) < 0)
-			return -1;
-	}
-
-	return 0;
 }
 
 int mail_cache_update_record_flags(struct mail_cache_view *view, uint32_t seq,
@@ -556,18 +697,49 @@
 int mail_cache_link(struct mail_cache *cache, uint32_t old_offset,
 		    uint32_t new_offset)
 {
+	i_assert(cache->locked);
+
+	if (new_offset + sizeof(struct mail_cache_record) >
+	    cache->hdr_copy.used_file_size) {
+		mail_cache_set_corrupted(cache,
+			"Cache record offset %u points outside file",
+			new_offset);
+		return -1;
+	}
+
+	new_offset += offsetof(struct mail_cache_record, prev_offset);
+	if (pwrite_full(cache->fd, &old_offset,
+			sizeof(old_offset), new_offset) < 0) {
+		mail_cache_set_syscall_error(cache, "pwrite_full()");
+		return -1;
+	}
+
+	cache->hdr_copy.continued_record_count++;
+	cache->hdr_modified = TRUE;
+	return 0;
+}
+
+int mail_cache_delete(struct mail_cache *cache, uint32_t offset)
+{
 	struct mail_cache_record *cache_rec;
 
-	i_assert(cache->locks > 0);
+	i_assert(cache->locked);
 
-	if (mail_cache_map(cache, new_offset, sizeof(*cache_rec)) < 0)
-		return -1;
+	cache_rec = mail_cache_get_record(cache, offset);
+	if (cache_rec == NULL)
+		return 0;
 
-	if (new_offset + sizeof(*cache_rec) > cache->mmap_length) {
-		mail_cache_set_corrupted(cache, "record points outside file");
-		return -1;
-	}
-	cache_rec = CACHE_RECORD(cache, new_offset);
-	cache_rec->prev_offset = old_offset;
+	/* we'll only update the deleted_space in header. we can't really
+	   do any actual deleting as other processes might still be using
+	   the data. also it's actually useful as some index views are still
+	   able to ask cached data from messages that have already been
+	   expunged. */
+	do {
+		cache->hdr_copy.deleted_space += cache_rec->size;
+		cache_rec =
+			mail_cache_get_record(cache, cache_rec->prev_offset);
+	} while (cache_rec != NULL);
+
+	cache->hdr_modified = TRUE;
 	return 0;
 }
--- a/src/lib-index/mail-cache.c	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-index/mail-cache.c	Sun Jul 04 23:00:47 2004 +0300
@@ -154,7 +154,7 @@
 
 static int mmap_verify_header(struct mail_cache *cache)
 {
-	struct mail_cache_header *hdr;
+	const struct mail_cache_header *hdr;
 
 	/* check that the header is still ok */
 	if (cache->mmap_length < sizeof(struct mail_cache_header)) {
@@ -174,13 +174,8 @@
 		return FALSE;
 	}
 
-	if (cache->trans_ctx != NULL) {
-		/* we've updated used_file_size, do nothing */
-		return TRUE;
-	}
-
 	/* only check the header if we're locked */
-	if (cache->locks == 0)
+	if (cache->locked)
 		return TRUE;
 
 	if (hdr->used_file_size < sizeof(struct mail_cache_header)) {
@@ -211,15 +206,6 @@
 	}
 
 	if (cache->mmap_base != NULL) {
-		if (cache->locks != 0) {
-			/* in the middle of transaction - write the changes */
-			if (msync(cache->mmap_base, cache->mmap_length,
-				  MS_SYNC) < 0) {
-				mail_cache_set_syscall_error(cache, "msync()");
-				return -1;
-			}
-		}
-
 		if (munmap(cache->mmap_base, cache->mmap_length) < 0)
 			mail_cache_set_syscall_error(cache, "munmap()");
 	} else {
@@ -234,7 +220,7 @@
 	cache->hdr = NULL;
 	cache->mmap_length = 0;
 
-	cache->mmap_base = mmap_rw_file(cache->fd, &cache->mmap_length);
+	cache->mmap_base = mmap_ro_file(cache->fd, &cache->mmap_length);
 	if (cache->mmap_base == MAP_FAILED) {
 		cache->mmap_base = NULL;
 		mail_cache_set_syscall_error(cache, "mmap()");
@@ -290,8 +276,6 @@
 
 void mail_cache_free(struct mail_cache *cache)
 {
-	i_assert(cache->trans_ctx == NULL);
-
 	mail_cache_file_close(cache);
 
 	pool_unref(cache->split_header_pool);
@@ -307,12 +291,11 @@
 	cache->never_cache_fields = never_cache_fields;
 }
 
-int mail_cache_lock(struct mail_cache *cache, int nonblock)
+int mail_cache_lock(struct mail_cache *cache)
 {
 	int i, ret;
 
-	if (cache->locks != 0)
-		return 1;
+	i_assert(!cache->locked);
 
 	if (MAIL_CACHE_IS_UNUSABLE(cache))
 		return 0;
@@ -324,26 +307,14 @@
 	}
 
 	for (i = 0; i < 3; i++) {
-		if (nonblock) {
-			ret = file_try_lock(cache->fd, F_WRLCK);
-			if (ret < 0) {
-				mail_cache_set_syscall_error(cache,
-							     "file_try_lock()");
-			}
-		} else {
-			ret = file_wait_lock(cache->fd, F_WRLCK);
-			if (ret <= 0) {
-				mail_cache_set_syscall_error(cache,
-					"file_wait_lock()");
-			}
+		if ((ret = file_wait_lock(cache->fd, F_WRLCK)) <= 0) {
+			mail_cache_set_syscall_error(cache, "file_wait_lock()");
+			break;
 		}
-
-		if (ret <= 0)
-			break;
+		cache->locked = TRUE;
 
 		if (cache->hdr->file_seq == cache->index->hdr->cache_file_seq) {
 			/* got it */
-			cache->locks++;
 			break;
 		}
 
@@ -353,23 +324,52 @@
 			return ret;
 		ret = 0;
 	}
+
+	if (ret > 0)
+		cache->hdr_copy = *cache->hdr;
+
 	return ret;
 }
 
+static void mail_cache_update_need_compress(struct mail_cache *cache)
+{
+	const struct mail_cache_header *hdr = cache->hdr;
+	unsigned int cont_percentage;
+	uoff_t max_del_space;
+
+        cont_percentage = hdr->continued_record_count * 100 /
+		cache->index->map->records_count;
+	if (cont_percentage >= COMPRESS_CONTINUED_PERCENTAGE &&
+	    hdr->used_file_size >= COMPRESS_MIN_SIZE) {
+		/* too many continued rows, compress */
+		cache->need_compress = TRUE;
+	}
+
+	/* see if we've reached the max. deleted space in file */
+	max_del_space = hdr->used_file_size / 100 * COMPRESS_PERCENTAGE;
+	if (hdr->deleted_space >= max_del_space &&
+	    hdr->used_file_size >= COMPRESS_MIN_SIZE)
+		cache->need_compress = TRUE;
+}
+
 void mail_cache_unlock(struct mail_cache *cache)
 {
-	if (--cache->locks > 0)
-		return;
+	i_assert(cache->locked);
+
+	cache->locked = FALSE;
+
+	if (cache->hdr_modified) {
+		cache->hdr_modified = FALSE;
+		if (pwrite_full(cache->fd, &cache->hdr_copy,
+				sizeof(cache->hdr_copy), 0) < 0)
+			mail_cache_set_syscall_error(cache, "pwrite_full()");
+                mail_cache_update_need_compress(cache);
+	}
 
 	if (file_wait_lock(cache->fd, F_UNLCK) <= 0)
 		mail_cache_set_syscall_error(cache, "file_wait_lock(F_UNLCK)");
 }
 
-int mail_cache_is_locked(struct mail_cache *cache)
-{
-	return cache->locks > 0;
-}
-
 struct mail_cache_view *
 mail_cache_view_open(struct mail_cache *cache, struct mail_index_view *iview)
 {
--- a/src/lib-index/mail-cache.h	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-index/mail-cache.h	Sun Jul 04 23:00:47 2004 +0300
@@ -83,30 +83,14 @@
 /* Compress cache file. */
 int mail_cache_compress(struct mail_cache *cache, struct mail_index_view *view);
 
-/* Explicitly lock the cache file. Returns -1 if error, 1 if ok, 0 if we
-   couldn't lock */
-int mail_cache_lock(struct mail_cache *cache, int nonblock);
-void mail_cache_unlock(struct mail_cache *cache);
-
-/* Returns TRUE if cache file is locked. */
-int mail_cache_is_locked(struct mail_cache *cache);
-
 struct mail_cache_view *
 mail_cache_view_open(struct mail_cache *cache, struct mail_index_view *iview);
 void mail_cache_view_close(struct mail_cache_view *view);
 
-/* Begin transaction. Cache transaction may be committed or rollbacked multiple
-   times. It will finish when index transaction is committed or rollbacked.
-   The transaction might also be partially committed automatically, so this
-   is kind of fake transaction, it's only purpose being optimizing writes.
-   Returns same as mail_cache_lock(). */
-int mail_cache_transaction_begin(struct mail_cache_view *view, int nonblock,
-				 struct mail_index_transaction *t,
-				 struct mail_cache_transaction_ctx **ctx_r);
-int mail_cache_transaction_commit(struct mail_cache_transaction_ctx *ctx);
-void mail_cache_transaction_rollback(struct mail_cache_transaction_ctx *ctx);
-
-void mail_cache_transaction_end(struct mail_cache_transaction_ctx *ctx);
+/* Get index transaction specific cache transaction. */
+struct mail_cache_transaction_ctx *
+mail_cache_get_transaction(struct mail_cache_view *view,
+			   struct mail_index_transaction *t);
 
 /* Return NULL-terminated list of headers for given index, or NULL if
    header index isn't used. */
@@ -117,14 +101,10 @@
 				 unsigned int idx, const char *const headers[]);
 
 /* Add new field to given record. Updates are not allowed. Fixed size fields
-   must be exactly the expected size and they're converted to network byte
-   order in disk. */
-int mail_cache_add(struct mail_cache_transaction_ctx *ctx, uint32_t seq,
-		   enum mail_cache_field field,
-		   const void *data, size_t data_size);
-
-/* Mark the given record deleted. */
-int mail_cache_delete(struct mail_cache_transaction_ctx *ctx, uint32_t seq);
+   must be exactly the expected size. */
+void mail_cache_add(struct mail_cache_transaction_ctx *ctx, uint32_t seq,
+		    enum mail_cache_field field,
+		    const void *data, size_t data_size);
 
 /* Return all fields that are currently cached for record. */
 enum mail_cache_field
--- a/src/lib-index/mail-index-private.h	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-index/mail-index-private.h	Sun Jul 04 23:00:47 2004 +0300
@@ -133,7 +133,8 @@
 void mail_index_reset_cache(struct mail_index_transaction *t,
 			    uint32_t new_file_seq);
 void mail_index_update_cache(struct mail_index_transaction *t,
-			     uint32_t seq, uint32_t offset);
+			     uint32_t seq, uint32_t offset,
+			     uint32_t *old_offset_r);
 
 int mail_index_fix_header(struct mail_index *index, struct mail_index_map *map,
 			  struct mail_index_header *hdr, const char **error_r);
--- a/src/lib-index/mail-index-sync-update.c	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-index/mail-index-sync-update.c	Sun Jul 04 23:00:47 2004 +0300
@@ -57,6 +57,18 @@
 		hdr->first_deleted_uid_lowwater = rec->uid;
 }
 
+static void mail_index_sync_cache_expunge(struct mail_index_sync_ctx *sync_ctx,
+					  uoff_t cache_offset)
+{
+	if (!sync_ctx->cache_locked) {
+		if (mail_cache_lock(sync_ctx->view->index->cache) <= 0)
+			return;
+		sync_ctx->cache_locked = TRUE;
+	}
+
+	(void)mail_cache_delete(sync_ctx->index->cache, cache_offset);
+}
+
 static int sync_expunge(const struct mail_transaction_expunge *e, void *context)
 {
         struct mail_index_sync_ctx *sync_ctx = context;
@@ -78,6 +90,11 @@
 	for (seq = seq1; seq <= seq2; seq++) {
                 rec = MAIL_INDEX_MAP_IDX(map, seq-1);
 		mail_index_header_update_counts(hdr, rec->flags, 0);
+		
+		if (rec->cache_offset != 0) {
+			mail_index_sync_cache_expunge(sync_ctx,
+						      rec->cache_offset);
+		}
 	}
 
 	/* @UNSAFE */
@@ -225,7 +242,7 @@
 	if (rec->cache_offset != 0) {
 		/* we'll need to link the old and new cache records */
 		if (!sync_ctx->cache_locked) {
-			if (mail_cache_lock(view->index->cache, FALSE) <= 0)
+			if (mail_cache_lock(view->index->cache) <= 0)
 				return -1;
 			sync_ctx->cache_locked = TRUE;
 		}
--- a/src/lib-index/mail-index-transaction-private.h	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-index/mail-index-transaction-private.h	Sun Jul 04 23:00:47 2004 +0300
@@ -19,11 +19,13 @@
 
 	unsigned char hdr_change[sizeof(struct mail_index_header)];
 	unsigned char hdr_mask[sizeof(struct mail_index_header)];
-	uint32_t new_cache_file_seq;
 
 	buffer_t *extra_rec_updates[MAIL_INDEX_MAX_EXTRA_RECORDS];
 
+	uint32_t new_cache_file_seq;
 	buffer_t *cache_updates;
+        struct mail_cache_transaction_ctx *cache_trans_ctx;
+
 	unsigned int hide_transaction:1;
 	unsigned int hdr_changed:1;
 };
--- a/src/lib-index/mail-index-transaction.c	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-index/mail-index-transaction.c	Sun Jul 04 23:00:47 2004 +0300
@@ -8,6 +8,7 @@
 #include "buffer.h"
 #include "mail-index-view-private.h"
 #include "mail-transaction-log.h"
+#include "mail-cache-private.h"
 #include "mail-index-transaction-private.h"
 
 #include <stddef.h>
@@ -124,10 +125,15 @@
 	int ret;
 
 	if (mail_index_view_is_inconsistent(t->view)) {
-		mail_index_transaction_unref(t);
+		mail_index_transaction_rollback(t);
 		return -1;
 	}
 
+	if (t->cache_trans_ctx != NULL) {
+		mail_cache_transaction_commit(t->cache_trans_ctx);
+                t->cache_trans_ctx = NULL;
+	}
+
 	if (t->last_update.uid1 != 0)
 		mail_index_transaction_add_last(t);
 
@@ -144,6 +150,10 @@
 
 void mail_index_transaction_rollback(struct mail_index_transaction *t)
 {
+	if (t->cache_trans_ctx != NULL) {
+		mail_cache_transaction_rollback(t->cache_trans_ctx);
+                t->cache_trans_ctx = NULL;
+	}
         mail_index_transaction_unref(t);
 }
 
@@ -499,8 +509,9 @@
 		      &update, sizeof(update));
 }
 
-static void mail_index_update_seq_buffer(buffer_t **buffer, uint32_t seq,
-					 const void *record, size_t record_size)
+static int mail_index_update_seq_buffer(buffer_t **buffer, uint32_t seq,
+					const void *record, size_t record_size,
+					void *old_record)
 {
 	unsigned int idx, left_idx, right_idx;
 	void *data;
@@ -530,8 +541,12 @@
 				right_idx = idx;
 			else {
 				/* already there, update */
+				if (old_record != NULL) {
+					memcpy(old_record, seq_p+1,
+					       record_size);
+				}
 				memcpy(seq_p+1, record, record_size);
-				return;
+				return TRUE;
 			}
 		}
 	}
@@ -545,6 +560,7 @@
 
 	*seq_p = seq;
 	memcpy(seq_p+1, record, record_size);
+	return FALSE;
 }
 
 void mail_index_reset_cache(struct mail_index_transaction *t,
@@ -554,17 +570,21 @@
 }
 
 void mail_index_update_cache(struct mail_index_transaction *t,
-			     uint32_t seq, uint32_t offset)
+			     uint32_t seq, uint32_t offset,
+			     uint32_t *old_offset_r)
 {
 	struct mail_index_record *rec;
 
 	if (seq >= t->first_new_seq) {
 		/* just appended message, modify it directly */
 		rec = mail_index_transaction_lookup(t, seq);
+		*old_offset_r = rec->cache_offset;
 		rec->cache_offset = offset;
 	} else {
-		mail_index_update_seq_buffer(&t->cache_updates, seq,
-					     &offset, sizeof(offset));
+		if (!mail_index_update_seq_buffer(&t->cache_updates, seq,
+						  &offset, sizeof(offset),
+						  old_offset_r))
+			*old_offset_r = 0;
 	}
 }
 
@@ -585,7 +605,7 @@
 		       data, index->extra_records[data_id].size);
 	} else {
 		mail_index_update_seq_buffer(&t->extra_rec_updates[data_id],
-			seq, data, index->extra_records[data_id].size);
+			seq, data, index->extra_records[data_id].size, NULL);
 	}
 }
 
--- a/src/lib-storage/index/index-mail-headers.c	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-storage/index/index-mail-headers.c	Sun Jul 04 23:00:47 2004 +0300
@@ -394,9 +394,7 @@
 	/* FIXME: add some smart checks here. we don't necessarily want to
 	   cache everything.. */
 
-	if (!index_mail_cache_transaction_begin(mail))
-		return FALSE;
-
+	index_mail_cache_transaction_begin(mail);
 	return TRUE;
 }
 
--- a/src/lib-storage/index/index-mail.c	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-storage/index/index-mail.c	Sun Jul 04 23:00:47 2004 +0300
@@ -123,26 +123,13 @@
 	}
 }
 
-int index_mail_cache_transaction_begin(struct index_mail *mail)
+void index_mail_cache_transaction_begin(struct index_mail *mail)
 {
-	if (mail->trans->cache_trans != NULL)
-		return TRUE;
-
-	if (mail->trans->cache_trans_failed) {
-		/* don't try more than once */
-		return FALSE;
+	if (mail->trans->cache_trans == NULL) {
+		mail->trans->cache_trans =
+			mail_cache_get_transaction(mail->trans->cache_view,
+						   mail->trans->trans);
 	}
-
-	if (mail_cache_transaction_begin(mail->trans->cache_view, TRUE,
-					 mail->trans->trans,
-					 &mail->trans->cache_trans) <= 0) {
-                mail->trans->cache_trans_failed = TRUE;
-		return FALSE;
-	}
-
-	mail->data.cached_fields =
-		mail_cache_get_fields(mail->trans->cache_view, mail->data.seq);
-	return TRUE;
 }
 
 static int index_mail_cache_can_add(struct index_mail *mail,
@@ -153,8 +140,7 @@
 
 	// FIXME: check if we really want to cache this
 
-	if (!index_mail_cache_transaction_begin(mail))
-		return FALSE;
+	index_mail_cache_transaction_begin(mail);
 
 	/* cached_fields may have changed, recheck */
 	if ((mail->data.cached_fields & field) != 0)
@@ -169,9 +155,8 @@
         if (!index_mail_cache_can_add(mail, field))
 		return;
 
-	if (mail_cache_add(mail->trans->cache_trans, mail->data.seq,
-			   field, data, size) < 0)
-		mail_cache_transaction_rollback(mail->trans->cache_trans);
+	mail_cache_add(mail->trans->cache_trans, mail->data.seq,
+		       field, data, size);
 
 	mail->data.cached_fields |= field;
 }
@@ -359,8 +344,7 @@
 		mail->mail.has_no_nuls = TRUE;
 	}
 
-	if (!index_mail_cache_transaction_begin(mail))
-		return;
+	index_mail_cache_transaction_begin(mail);
 
 	/* update cache_flags */
 	cache_flags = mail_cache_get_record_flags(mail->trans->cache_view,
--- a/src/lib-storage/index/index-mail.h	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-storage/index/index-mail.h	Sun Jul 04 23:00:47 2004 +0300
@@ -79,7 +79,7 @@
 			    struct message_header_line *hdr,
 			    struct index_mail *mail);
 
-int index_mail_cache_transaction_begin(struct index_mail *mail);
+void index_mail_cache_transaction_begin(struct index_mail *mail);
 void index_mail_cache_add(struct index_mail *mail, enum mail_cache_field field,
 			  const void *data, size_t size);
 
--- a/src/lib-storage/index/index-transaction.c	Sun Jul 04 17:26:22 2004 +0300
+++ b/src/lib-storage/index/index-transaction.c	Sun Jul 04 23:00:47 2004 +0300
@@ -15,9 +15,6 @@
 
 static void index_transaction_free(struct index_transaction_context *t)
 {
-	if (t->cache_trans != NULL)
-		mail_cache_transaction_end(t->cache_trans);
-
 	mail_cache_view_close(t->cache_view);
 	mail_index_view_close(t->trans_view);
 	mail_index_view_unlock(t->ibox->view);
@@ -35,9 +32,6 @@
 	uoff_t offset;
 	int ret;
 
-	if (t->cache_trans != NULL)
-		(void)mail_cache_transaction_commit(t->cache_trans);
-
 	ret = mail_index_transaction_commit(t->trans, &seq, &offset);
 	if (ret < 0)
 		mail_storage_set_index_error(t->ibox);