changeset 6296:205ee38f10d1 HEAD

Drop fields that haven't been used for 30 days when compressing.
author Timo Sirainen <tss@iki.fi>
date Mon, 13 Aug 2007 20:16:55 +0300
parents 30904b20782d
children 3f6fadbe6888
files src/lib-index/mail-cache-compress.c src/lib-index/mail-cache-fields.c src/lib-index/mail-cache-private.h src/lib-index/mail-cache-transaction.c
diffstat 4 files changed, 97 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-index/mail-cache-compress.c	Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-compress.c	Mon Aug 13 20:16:55 2007 +0300
@@ -18,6 +18,8 @@
 
 	buffer_t *buffer, *field_seen;
 	ARRAY_DEFINE(bitmask_pos, unsigned int);
+	uint32_t *field_file_map;
+
 	uint8_t field_seen_value;
 	bool new_msg;
 };
@@ -44,15 +46,19 @@
 mail_cache_compress_field(struct mail_cache_copy_context *ctx,
 			  const struct mail_cache_iterate_field *field)
 {
-	uint32_t field_idx = field->field_idx;
         struct mail_cache_field *cache_field;
 	enum mail_cache_decision_type dec;
+	uint32_t file_field_idx, size32;
 	uint8_t *field_seen;
-	uint32_t size32;
 
-	cache_field = &ctx->cache->fields[field_idx].field;
+	file_field_idx = ctx->field_file_map[field->field_idx];
+	if (file_field_idx == (uint32_t)-1)
+		return;
 
-	field_seen = buffer_get_space_unsafe(ctx->field_seen, field_idx, 1);
+	cache_field = &ctx->cache->fields[field->field_idx].field;
+
+	field_seen = buffer_get_space_unsafe(ctx->field_seen,
+					     field->field_idx, 1);
 	if (*field_seen == ctx->field_seen_value) {
 		/* duplicate */
 		if (cache_field->type == MAIL_CACHE_FIELD_BITMASK)
@@ -70,7 +76,7 @@
 			return;
 	}
 
-	buffer_append(ctx->buffer, &field_idx, sizeof(field_idx));
+	buffer_append(ctx->buffer, &file_field_idx, sizeof(file_field_idx));
 
 	if (cache_field->field_size == (unsigned int)-1) {
 		size32 = (uint32_t)field->size;
@@ -103,6 +109,38 @@
 	return file_seq != 0 ? file_seq : 1;
 }
 
+static void
+mail_cache_compress_get_fields(struct mail_cache_copy_context *ctx,
+			       unsigned int used_fields_count)
+{
+	struct mail_cache *cache = ctx->cache;
+	unsigned int i, j, idx;
+
+	/* Make mail_cache_header_fields_get() return the fields in
+	   the same order as we saved them. */
+	memcpy(cache->field_file_map, ctx->field_file_map,
+	       sizeof(uint32_t) * cache->fields_count);
+
+	/* reverse mapping */
+	cache->file_fields_count = used_fields_count;
+	i_free(cache->file_field_map);
+	cache->file_field_map = used_fields_count == 0 ? NULL :
+		i_new(unsigned int, used_fields_count);
+	for (i = j = 0; i < cache->fields_count; i++) {
+		idx = cache->field_file_map[i];
+		if (idx != (uint32_t)-1) {
+			i_assert(idx < used_fields_count &&
+				 cache->file_field_map[idx] == 0);
+			cache->file_field_map[idx] = i;
+			j++;
+		}
+	}
+	i_assert(j == used_fields_count);
+
+	buffer_set_used_size(ctx->buffer, 0);
+	mail_cache_header_fields_get(cache, ctx->buffer);
+}
+
 static int
 mail_cache_copy(struct mail_cache *cache, struct mail_index_transaction *trans,
 		int fd, uint32_t *file_seq_r,
@@ -117,8 +155,9 @@
 	struct mail_cache_header hdr;
 	struct mail_cache_record cache_rec;
 	struct ostream *output;
-	buffer_t *buffer;
-	uint32_t i, message_count, seq, first_new_seq, ext_offset;
+	uint32_t message_count, seq, first_new_seq, ext_offset;
+	unsigned int i, used_fields_count, orig_fields_count;
+	time_t max_drop_time;
 
 	view = mail_index_transaction_get_view(trans);
 
@@ -151,8 +190,21 @@
 	ctx.buffer = buffer_create_dynamic(default_pool, 4096);
 	ctx.field_seen = buffer_create_dynamic(default_pool, 64);
 	ctx.field_seen_value = 0;
+	ctx.field_file_map = t_new(uint32_t, cache->fields_count);
 	t_array_init(&ctx.bitmask_pos, 32);
 
+	/* @UNSAFE: drop unused fields and create a field mapping for
+	   used fields */
+	max_drop_time = idx_hdr->day_stamp - MAIL_CACHE_FIELD_DROP_SECS;
+	orig_fields_count = cache->fields_count;
+	for (i = used_fields_count = 0; i < orig_fields_count; i++) {
+		if (cache->fields[i].last_used < max_drop_time)
+			cache->fields[i].used = FALSE;
+
+		ctx.field_file_map[i] = !cache->fields[i].used ? (uint32_t)-1 :
+			used_fields_count++;
+	}
+
 	t_array_init(ext_offsets, message_count);
 	for (seq = 1; seq <= message_count; seq++) {
 		if (mail_index_transaction_is_expunged(trans, seq)) {
@@ -190,25 +242,11 @@
 		array_append(ext_offsets, &ext_offset, 1);
 	}
 	i_assert(array_count(ext_offsets) == message_count);
-
-	if (cache->fields_count != 0) {
-		hdr.field_header_offset =
-			mail_index_uint32_to_offset(output->offset);
+	i_assert(orig_fields_count == cache->fields_count);
 
-		/* we wrote everything using our internal field ids. so we want
-		   mail_cache_header_fields_get() to use them and ignore any
-		   existing id mappings in the old cache file. */
-		cache->file_fields_count = 0;
-		for (i = 0; i < cache->fields_count; i++)
-                        cache->field_file_map[i] = (uint32_t)-1;
-
-		t_push();
-		buffer = buffer_create_dynamic(pool_datastack_create(), 256);
-		mail_cache_header_fields_get(cache, buffer);
-		o_stream_send(output, buffer_get_data(buffer, NULL),
-			      buffer_get_used_size(buffer));
-		t_pop();
-	}
+	hdr.field_header_offset = mail_index_uint32_to_offset(output->offset);
+	mail_cache_compress_get_fields(&ctx, used_fields_count);
+	o_stream_send(output, ctx.buffer->data, ctx.buffer->used);
 
 	hdr.used_file_size = output->offset;
 	buffer_free(ctx.buffer);
--- a/src/lib-index/mail-cache-fields.c	Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-fields.c	Mon Aug 13 20:16:55 2007 +0300
@@ -211,6 +211,7 @@
 	const char *p, *names, *end;
 	void *orig_key, *orig_value;
 	unsigned int new_fields_count;
+	time_t max_drop_time;
 	uint32_t offset, i;
 
 	if (mail_cache_header_fields_get_offset(cache, &offset) < 0)
@@ -275,6 +276,9 @@
 	for (i = 0; i < cache->fields_count; i++)
 		cache->field_file_map[i] = (uint32_t)-1;
 
+	max_drop_time = cache->index->map->hdr.day_stamp -
+		MAIL_CACHE_FIELD_DROP_SECS;
+
 	memset(&field, 0, sizeof(field));
 	for (i = 0; i < field_hdr->fields_count; i++) {
 		for (p = names; p != end && *p != '\0'; p++) ;
@@ -317,6 +321,8 @@
 				"Duplicated field in header: %s", names);
 			return -1;
 		}
+		cache->fields[field.idx].used = TRUE;
+
 		cache->field_file_map[field.idx] = i;
 		cache->file_field_map[i] = field.idx;
 
@@ -324,6 +330,11 @@
 		if (last_used[i] > cache->fields[field.idx].last_used)
 			cache->fields[field.idx].last_used = last_used[i];
 
+		if (cache->fields[field.idx].last_used < max_drop_time) {
+			/* time to drop this field */
+			cache->need_compress_file_seq = cache->hdr->file_seq;
+		}
+
                 names = p + 1;
 	}
 	return 0;
@@ -341,7 +352,8 @@
 		buffer_append(dest, data, size);
 	}
 	for (i = 0; i < cache->fields_count; i++) {
-		if (cache->field_file_map[i] != (uint32_t)-1)
+		if (cache->field_file_map[i] != (uint32_t)-1 ||
+		    !cache->fields[i].used)
 			continue;
 		data = CONST_PTR_OFFSET(&cache->fields[i], offset);
 		buffer_append(dest, data, size);
@@ -362,7 +374,8 @@
 		buffer_append(dest, &byte, 1);
 	}
 	for (i = 0; i < cache->fields_count; i++) {
-		if (cache->field_file_map[i] != (uint32_t)-1)
+		if (cache->field_file_map[i] != (uint32_t)-1 ||
+		    !cache->fields[i].used)
 			continue;
 		data = CONST_PTR_OFFSET(&cache->fields[i], offset);
 		byte = (uint8_t)*data;
@@ -435,7 +448,12 @@
 	uint32_t i;
 
 	memset(&hdr, 0, sizeof(hdr));
-	hdr.fields_count = cache->fields_count;
+	hdr.fields_count = cache->file_fields_count;
+	for (i = 0; i < cache->fields_count; i++) {
+		if (cache->field_file_map[i] == (uint32_t)-1 &&
+		    cache->fields[i].used)
+			hdr.fields_count++;
+	}
 	buffer_append(dest, &hdr, sizeof(hdr));
 
 	/* we have to keep the field order for the existing fields. */
@@ -451,14 +469,17 @@
 	i_assert(buffer_get_used_size(dest) == sizeof(hdr) +
 		 (sizeof(uint32_t)*2 + 2) * hdr.fields_count);
 
+	/* add fields' names */
 	for (i = 0; i < cache->file_fields_count; i++) {
 		field = cache->file_field_map[i];
 		name = cache->fields[field].field.name;
 		buffer_append(dest, name, strlen(name)+1);
 	}
 	for (i = 0; i < cache->fields_count; i++) {
-		if (cache->field_file_map[i] != (uint32_t)-1)
+		if (cache->field_file_map[i] != (uint32_t)-1 ||
+		    !cache->fields[i].used)
 			continue;
+
 		name = cache->fields[i].field.name;
 		buffer_append(dest, name, strlen(name)+1);
 	}
--- a/src/lib-index/mail-cache-private.h	Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-private.h	Mon Aug 13 20:16:55 2007 +0300
@@ -7,6 +7,9 @@
 
 #define MAIL_CACHE_VERSION 1
 
+/* Drop fields that haven't been accessed for n seconds */
+#define MAIL_CACHE_FIELD_DROP_SECS (3600*24*30)
+
 /* Never compress the file if it's smaller than this */
 #define MAIL_CACHE_COMPRESS_MIN_SIZE (1024*50)
 
@@ -112,6 +115,8 @@
 	uint32_t uid_highwater;
 	uint32_t last_used;
 
+	/* Unused fields aren't written to cache file */
+	unsigned int used:1;
 	unsigned int decision_dirty:1;
 };
 
--- a/src/lib-index/mail-cache-transaction.c	Mon Aug 13 20:16:07 2007 +0300
+++ b/src/lib-index/mail-cache-transaction.c	Mon Aug 13 20:16:55 2007 +0300
@@ -1,6 +1,7 @@
 /* Copyright (C) 2003-2004 Timo Sirainen */
 
 #include "lib.h"
+#include "ioloop.h"
 #include "array.h"
 #include "buffer.h"
 #include "file-cache.h"
@@ -672,6 +673,9 @@
 	uint32_t offset, hdr_offset;
 	int ret = 0;
 
+	ctx->cache->fields[field_idx].last_used = ioloop_time;
+	ctx->cache->fields[field_idx].used = TRUE;
+
 	if ((ret = mail_cache_transaction_lock(ctx)) <= 0) {
 		/* create the cache file if it doesn't exist yet */
 		if (ctx->tried_compression)