changeset 4303:5f03738219a6 HEAD

Changed mail-storage API to do the mail sorting internally. Optimized it internally to keep a 32bit sort_id field in index for each used primary sort condition. Practically this should mean less disk I/O, memory and CPU usage when SORT command is used.
author Timo Sirainen <timo.sirainen@movial.fi>
date Thu, 08 Jun 2006 15:49:31 +0300
parents a9498883f44c
children 3bb4f35b99d4
files src/imap/cmd-sort.c src/imap/imap-sort.c src/lib-storage/index/Makefile.am src/lib-storage/index/dbox/dbox-storage.c src/lib-storage/index/index-mail.c src/lib-storage/index/index-search.c src/lib-storage/index/index-sort.c src/lib-storage/index/index-sort.h src/lib-storage/index/index-storage.h src/lib-storage/index/maildir/maildir-storage.c src/lib-storage/index/mbox/mbox-storage.c src/lib-storage/mail-storage-private.h src/lib-storage/mail-storage.c src/lib-storage/mail-storage.h
diffstat 14 files changed, 909 insertions(+), 739 deletions(-) [+]
line wrap: on
line diff
--- a/src/imap/cmd-sort.c	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/imap/cmd-sort.c	Thu Jun 08 15:49:31 2006 +0300
@@ -20,28 +20,32 @@
 	{ MAIL_SORT_SUBJECT,	"subject" },
 	{ MAIL_SORT_TO,		"to" },
 
-	{ MAIL_SORT_REVERSE,	"reverse" },
 	{ MAIL_SORT_END,	NULL }
 };
 
-static enum mail_sort_type *
-get_sort_program(struct client_command_context *cmd, struct imap_arg *args)
+static int
+get_sort_program(struct client_command_context *cmd, struct imap_arg *args,
+		 enum mail_sort_type program[MAX_SORT_PROGRAM_SIZE])
 {
-	enum mail_sort_type type;
-	buffer_t *buf;
-	int i;
+	enum mail_sort_type mask = 0;
+	unsigned int i, pos;
+	bool reverse;
 
 	if (args->type == IMAP_ARG_EOL) {
 		/* empyty list */
 		client_send_command_error(cmd, "Empty sort program.");
-		return NULL;
+		return -1;
 	}
 
-	buf = buffer_create_dynamic(pool_datastack_create(),
-				    32 * sizeof(enum mail_sort_type));
+	pos = 0; reverse = FALSE;
+	for (; args->type == IMAP_ARG_ATOM || args->type == IMAP_ARG_STRING;
+	     args++) {
+		const char *arg = IMAP_ARG_STR(args);
 
-	while (args->type == IMAP_ARG_ATOM || args->type == IMAP_ARG_STRING) {
-		const char *arg = IMAP_ARG_STR(args);
+		if (strcasecmp(arg, "reverse") == 0) {
+			reverse = !reverse;
+			continue;
+		}
 
 		for (i = 0; sort_names[i].type != MAIL_SORT_END; i++) {
 			if (strcasecmp(arg, sort_names[i].name) == 0)
@@ -51,31 +55,37 @@
 		if (sort_names[i].type == MAIL_SORT_END) {
 			client_send_command_error(cmd, t_strconcat(
 				"Unknown sort argument: ", arg, NULL));
-			return NULL;
+			return -1;
 		}
 
-		buffer_append(buf, &sort_names[i].type,
-			      sizeof(enum mail_sort_type));
-		args++;
+		if ((mask & sort_names[i].type) != 0)
+			continue;
+		mask |= sort_names[i].type;
+
+		/* @UNSAFE: mask check should prevent us from ever
+		   overflowing */
+		i_assert(pos < MAX_SORT_PROGRAM_SIZE-1);
+		program[pos++] = sort_names[i].type |
+			(reverse ? MAIL_SORT_FLAG_REVERSE : 0);
+		reverse = FALSE;
 	}
 
-	type = MAIL_SORT_END;
-	buffer_append(buf, &type, sizeof(type));
+	program[pos++] = MAIL_SORT_END;
 
 	if (args->type != IMAP_ARG_EOL) {
 		client_send_command_error(cmd,
 					  "Invalid sort list argument.");
-		return NULL;
+		return -1;
 	}
 
-	return buffer_free_without_data(buf);
+	return 0;
 }
 
 bool cmd_sort(struct client_command_context *cmd)
 {
 	struct client *client = cmd->client;
 	struct mail_search_arg *sargs;
-	enum mail_sort_type *sorting;
+	enum mail_sort_type sorting[MAX_SORT_PROGRAM_SIZE];
 	struct imap_arg *args;
 	int args_count;
 	pool_t pool;
@@ -100,8 +110,7 @@
 		return TRUE;
 	}
 
-	sorting = get_sort_program(cmd, IMAP_ARG_LIST(args)->args);
-	if (sorting == NULL)
+	if (get_sort_program(cmd, IMAP_ARG_LIST(args)->args, sorting) < 0)
 		return TRUE;
 	args++;
 
--- a/src/imap/imap-sort.c	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/imap/imap-sort.c	Thu Jun 08 15:49:31 2006 +0300
@@ -6,7 +6,7 @@
    reasonably fast. */
 
 #include "common.h"
-#include "buffer.h"
+#include "array.h"
 #include "hash.h"
 #include "ostream.h"
 #include "str.h"
@@ -28,701 +28,87 @@
 	((type) == MAIL_SORT_ARRIVAL || (type) == MAIL_SORT_DATE)
 
 struct sort_context {
-	struct mail_search_context *search_ctx;
-	struct mailbox_transaction_context *t;
-	struct mail *other_mail;
-
 	enum mail_sort_type sort_program[MAX_SORT_PROGRAM_SIZE];
-	enum mail_sort_type common_mask, cache_mask;
 
 	struct mailbox *box;
 	struct ostream *output;
 	string_t *str;
 
-	buffer_t *sort_buffer;
-	size_t sort_element_size;
-
-	pool_t str_pool;
-	struct hash_table *string_table;
-
-	time_t last_arrival, last_date;
-	uoff_t last_size;
-	char *last_cc, *last_from, *last_subject, *last_to;
-
-	bool written, id_is_uid;
+	bool written;
 };
 
-static void mail_sort_input(struct sort_context *ctx, struct mail *mail);
-static void mail_sort_flush(struct sort_context *ctx);
-
-static void
-mail_sort_normalize(const enum mail_sort_type *input, buffer_t *output)
-{
-        enum mail_sort_type type, mask = 0;
-	bool reverse;
-
-	reverse = FALSE;
-	for (; *input != MAIL_SORT_END; input++) {
-		if (*input == MAIL_SORT_REVERSE)
-			reverse = !reverse;
-		else {
-			if ((mask & *input) == 0) {
-				if (reverse) {
-					type = MAIL_SORT_REVERSE;
-					buffer_append(output,
-						      &type, sizeof(type));
-				}
-
-				buffer_append(output, input, sizeof(*input));
-				mask |= *input;
-			}
-
-			reverse = FALSE;
-		}
-	}
-
-	type = MAIL_SORT_END;
-	buffer_append(output, &type, sizeof(type));
-}
-
-static enum mail_sort_type
-mail_sort_get_common_mask(const enum mail_sort_type *sort1,
-			  const enum mail_sort_type *sort2,
-			  unsigned int *count)
-{
-	enum mail_sort_type mask = 0;
-
-	*count = 0;
-	while (*sort1 == *sort2 && *sort1 != MAIL_SORT_END) {
-		if (*sort1 != MAIL_SORT_REVERSE)
-			mask |= *sort1;
-		sort1++; sort2++; (*count)++;
-	}
-
-	return mask;
-}
-
-static enum mail_fetch_field
-init_sort_elements(struct sort_context *ctx,
-		   const char *wanted_headers[MAX_WANTED_HEADERS])
-{
-	unsigned int i;
-        enum mail_fetch_field fields;
-
-	/* figure out what data we'd like to cache */
-	ctx->sort_element_size = sizeof(unsigned int);
-	ctx->cache_mask = 0;
-
-	for (i = 0; ctx->sort_program[i] != MAIL_SORT_END; i++) {
-		enum mail_sort_type type = ctx->sort_program[i];
-
-		if (IS_SORT_STRING(type)) {
-			ctx->sort_element_size += sizeof(const char *);
-
-			/* cache the second rule as well, if available */
-			if (ctx->cache_mask != 0) {
-				ctx->cache_mask |= type;
-				break;
-			}
-			ctx->cache_mask |= type;
-		} else if (IS_SORT_TIME(type)) {
-			ctx->sort_element_size += sizeof(time_t);
-			ctx->cache_mask |= type;
-			break;
-		} else if (type == MAIL_SORT_SIZE) {
-			ctx->sort_element_size += sizeof(uoff_t);
-			ctx->cache_mask |= type;
-			break;
-		}
-	}
-
-	fields = 0;
-	if (ctx->cache_mask & MAIL_SORT_ARRIVAL)
-		fields |= MAIL_FETCH_RECEIVED_DATE;
-	if (ctx->cache_mask & MAIL_SORT_DATE)
-		fields |= MAIL_FETCH_DATE;
-	if (ctx->cache_mask & MAIL_SORT_SIZE)
-		fields |= MAIL_FETCH_VIRTUAL_SIZE;
-
-	/* @UNSAFE */
-	i_assert(MAX_WANTED_HEADERS > 4);
-	i = 0;
-	if (ctx->cache_mask & MAIL_SORT_CC)
-		wanted_headers[i++] = "cc";
-	if (ctx->cache_mask & MAIL_SORT_FROM)
-		wanted_headers[i++] = "from";
-	if (ctx->cache_mask & MAIL_SORT_TO)
-		wanted_headers[i++] = "to";
-	if (ctx->cache_mask & MAIL_SORT_SUBJECT)
-		wanted_headers[i++] = "subject";
-	wanted_headers[i] = NULL;
-
-	if ((ctx->cache_mask & MAIL_SORT_CC) ||
-	    (ctx->cache_mask & MAIL_SORT_FROM) ||
-	    (ctx->cache_mask & MAIL_SORT_TO) ||
-	    (ctx->cache_mask & MAIL_SORT_SUBJECT)) {
-		ctx->str_pool = pool_alloconly_create("sort str", 8192);
-		ctx->string_table = hash_create(default_pool, ctx->str_pool,
-						0, str_hash,
-						(hash_cmp_callback_t *)strcmp);
-	}
-
-	return fields;
-}
-
-static void mail_sort_deinit(struct sort_context *ctx)
-{
-	mail_sort_flush(ctx);
-
-	if (ctx->string_table != NULL)
-		hash_destroy(ctx->string_table);
-	if (ctx->str_pool != NULL)
-		pool_unref(ctx->str_pool);
-	buffer_free(ctx->sort_buffer);
-
-	i_free(ctx->last_cc);
-	i_free(ctx->last_from);
-	i_free(ctx->last_subject);
-	i_free(ctx->last_to);
-}
-
 int imap_sort(struct client_command_context *cmd, const char *charset,
 	      struct mail_search_arg *args,
 	      const enum mail_sort_type *sort_program)
 {
 	struct client *client = cmd->client;
-	enum mail_sort_type norm_prog[MAX_SORT_PROGRAM_SIZE];
-        enum mail_fetch_field wanted_fields;
-	const char *wanted_headers[MAX_WANTED_HEADERS];
+	const char *wanted_headers[2];
+	enum mail_fetch_field wanted_fields;
+	struct mail_search_context *search_ctx;
+	struct mailbox_transaction_context *t;
 	struct mailbox_header_lookup_ctx *headers_ctx;
-	struct sort_context *ctx;
 	struct mail *mail;
-	buffer_t *buf;
-	unsigned int count;
+	string_t *str;
+	bool written = FALSE;
 	int ret;
 
-	ctx = t_new(struct sort_context, 1);
-
-	/* normalize sorting program. note that although we're using a hard
-	   buffer size here, it shouldn't be possible to overflow it since
-	   the normalized sort program can't exceed MAX_SORT_PROGRAM_SIZE. */
-	buf = buffer_create_data(pool_datastack_create(),
-				 norm_prog, sizeof(norm_prog));
-	mail_sort_normalize(sort_program, buf);
-	memcpy(ctx->sort_program, norm_prog, sizeof(ctx->sort_program));
-
-	/* remove the common part from sort program, we already know input is
-	   sorted that much so we don't have to worry about it. */
-	if (mailbox_search_get_sorting(client->mailbox, norm_prog) < 0)
-		return -1;
-	ctx->common_mask = mail_sort_get_common_mask(ctx->sort_program,
-						     norm_prog, &count);
-	if (count > 0) {
-		memmove(ctx->sort_program, ctx->sort_program + count,
-			sizeof(ctx->sort_program) -
-			sizeof(ctx->sort_program[0]) * count);
+	wanted_fields = 0;
+	wanted_headers[0] = wanted_headers[1] = NULL;
+	switch (*sort_program & MAIL_SORT_MASK) {
+	case MAIL_SORT_ARRIVAL:
+		wanted_fields = MAIL_FETCH_RECEIVED_DATE;
+		break;
+	case MAIL_SORT_CC:
+		wanted_headers[0] = "Cc";
+		break;
+	case MAIL_SORT_DATE:
+		wanted_fields = MAIL_FETCH_DATE;
+		break;
+	case MAIL_SORT_FROM:
+		wanted_headers[0] = "From";
+		break;
+	case MAIL_SORT_SIZE:
+		wanted_fields = MAIL_FETCH_VIRTUAL_SIZE;
+		break;
+	case MAIL_SORT_SUBJECT:
+		wanted_headers[0] = "Subject";
+		break;
+	case MAIL_SORT_TO:
+		wanted_headers[0] = "To";
+		break;
 	}
 
-	memset(wanted_headers, 0, sizeof(wanted_headers));
-	wanted_fields = init_sort_elements(ctx, wanted_headers);
-	headers_ctx = *wanted_headers == NULL ? NULL :
+	headers_ctx = wanted_headers[0] == NULL ? NULL :
 		mailbox_header_lookup_init(client->mailbox, wanted_headers);
 
-	/* initialize searching */
-	ctx->t = mailbox_transaction_begin(client->mailbox, 0);
-	ctx->search_ctx = mailbox_search_init(ctx->t, charset, args, norm_prog);
-	ctx->other_mail = mail_alloc(ctx->t, wanted_fields, headers_ctx);
+	t = mailbox_transaction_begin(client->mailbox, 0);
+	search_ctx = mailbox_search_init(t, charset, args, sort_program);
 
-	ctx->box = client->mailbox;
-	ctx->output = client->output;
-	ctx->sort_buffer = buffer_create_dynamic(system_pool,
-						 128 * ctx->sort_element_size);
-
-	ctx->str = t_str_new(STRBUF_SIZE);
-	str_append(ctx->str, "* SORT");
+	str = t_str_new(STRBUF_SIZE);
+	str_append(str, "* SORT");
 
-        ctx->id_is_uid = cmd->uid;
-
-	mail = mail_alloc(ctx->t, wanted_fields, headers_ctx);
-	while (mailbox_search_next(ctx->search_ctx, mail) > 0)
-		mail_sort_input(ctx, mail);
+	mail = mail_alloc(t, wanted_fields, headers_ctx);
+	while (mailbox_search_next(search_ctx, mail) > 0) {
+		if (str_len(str) >= STRBUF_SIZE-MAX_INT_STRLEN) {
+			o_stream_send(client->output, str_data(str),
+				      str_len(str));
+			str_truncate(str, 0);
+			written = TRUE;
+		}
+		str_printfa(str, " %u", cmd->uid ? mail->uid : mail->seq);
+	}
+	ret = mailbox_search_deinit(&search_ctx);
+	mail_free(&mail);
 
-	mail_sort_flush(ctx);
-	ret = mailbox_search_deinit(&ctx->search_ctx);
-
-	mail_free(&mail);
-	mail_free(&ctx->other_mail);
-
-	if (mailbox_transaction_commit(&ctx->t, 0) < 0)
+	if (mailbox_transaction_commit(&t, 0) < 0)
 		ret = -1;
 
-	if (ctx->written || ret == 0) {
-		str_append(ctx->str, "\r\n");
-		o_stream_send(client->output, str_data(ctx->str),
-			      str_len(ctx->str));
+	if (written || ret == 0) {
+		str_append(str, "\r\n");
+		o_stream_send(client->output, str_data(str), str_len(str));
 	}
 
 	if (headers_ctx != NULL)
 		mailbox_header_lookup_deinit(&headers_ctx);
-        mail_sort_deinit(ctx);
 	return ret;
 }
-
-static const char *string_table_get(struct sort_context *ctx, const char *str)
-{
-	char *value;
-
-	if (str == NULL)
-		return NULL;
-	if (*str == '\0')
-		return "";
-
-	value = hash_lookup(ctx->string_table, str);
-	if (value == NULL) {
-		value = p_strdup(ctx->str_pool, str);
-		hash_insert(ctx->string_table, value, value);
-	}
-
-	return value;
-}
-
-static const char *get_first_mailbox(struct mail *mail, const char *field)
-{
-	struct message_address *addr;
-	const char *str;
-
-	str = mail_get_first_header(mail, field);
-	if (str == NULL)
-		return NULL;
-
-	addr = message_address_parse(pool_datastack_create(),
-				     (const unsigned char *)str,
-				     strlen(str), 1);
-	return addr != NULL ? addr->mailbox : NULL;
-}
-
-static void mail_sort_check_flush(struct sort_context *ctx, struct mail *mail)
-{
-	const char *str;
-	time_t t;
-	uoff_t size;
-	bool changed = FALSE;
-
-	if (ctx->common_mask & MAIL_SORT_ARRIVAL) {
-		t = mail_get_received_date(mail);
-		if (t != ctx->last_arrival) {
-			ctx->last_arrival = t;
-			changed = TRUE;
-		}
-	}
-
-	if (ctx->common_mask & MAIL_SORT_CC) {
-		str = get_first_mailbox(mail, "cc");
-		if (str != NULL)
-			str = t_str_ucase(str);
-
-		if (null_strcmp(str, ctx->last_cc) != 0) {
-			i_free(ctx->last_cc);
-			ctx->last_cc = i_strdup(str);
-			changed = TRUE;
-		}
-	}
-
-	if (ctx->common_mask & MAIL_SORT_DATE) {
-		t = mail_get_date(mail, NULL);
-		if (t != ctx->last_date) {
-			ctx->last_date = t;
-			changed = TRUE;
-		}
-	}
-
-	if (ctx->common_mask & MAIL_SORT_FROM) {
-		str = get_first_mailbox(mail, "from");
-		if (str != NULL)
-			str = t_str_ucase(str);
-
-		if (null_strcmp(str, ctx->last_from) != 0) {
-			i_free(ctx->last_from);
-			ctx->last_from = i_strdup(str);
-			changed = TRUE;
-		}
-	}
-
-	if (ctx->common_mask & MAIL_SORT_SIZE) {
-		size = mail_get_virtual_size(mail);
-		if (size != ctx->last_size) {
-			ctx->last_size = size;
-			changed = TRUE;
-		}
-	}
-
-	if (ctx->common_mask & MAIL_SORT_SUBJECT) {
-		str = mail_get_first_header(mail, "subject");
-		if (str != NULL) {
-			str = imap_get_base_subject_cased(
-				pool_datastack_create(), str, NULL);
-		}
-
-		if (null_strcmp(str, ctx->last_subject) != 0) {
-			i_free(ctx->last_subject);
-			ctx->last_subject = i_strdup(str);
-			changed = TRUE;
-		}
-	}
-
-	if (ctx->common_mask & MAIL_SORT_TO) {
-		str = get_first_mailbox(mail, "to");
-		if (str != NULL)
-			str = t_str_ucase(str);
-
-		if (null_strcmp(str, ctx->last_to) != 0) {
-			i_free(ctx->last_to);
-			ctx->last_to = i_strdup(str);
-			changed = TRUE;
-		}
-	}
-
-	if (changed)
-		mail_sort_flush(ctx);
-}
-
-static void mail_sort_input(struct sort_context *ctx, struct mail *mail)
-{
-	/* @UNSAFE */
-	unsigned char *buf;
-	unsigned int id;
-	time_t t;
-	uoff_t size;
-	const char *str;
-	size_t pos;
-
-	t_push();
-	if (ctx->common_mask != 0)
-		mail_sort_check_flush(ctx, mail);
-
-	buf = buffer_append_space_unsafe(ctx->sort_buffer,
-					 ctx->sort_element_size);
-	id = ctx->id_is_uid ? mail->uid : mail->seq;
-	memcpy(buf, &id, sizeof(id)); pos = sizeof(id);
-
-	if (ctx->cache_mask & MAIL_SORT_ARRIVAL) {
-		if (ctx->common_mask & MAIL_SORT_ARRIVAL)
-			t = ctx->last_arrival;
-		else
-			t = mail_get_received_date(mail);
-		memcpy(buf + pos, &t, sizeof(t)); pos += sizeof(t);
-	}
-
-	if (ctx->cache_mask & MAIL_SORT_DATE) {
-		if (ctx->common_mask & MAIL_SORT_DATE)
-			t = ctx->last_date;
-		else
-			t = mail_get_date(mail, NULL);
-		memcpy(buf + pos, &t, sizeof(t)); pos += sizeof(t);
-	}
-
-	if (ctx->cache_mask & MAIL_SORT_SIZE) {
-		if (ctx->common_mask & MAIL_SORT_SIZE)
-			size = ctx->last_size;
-		else
-			size = mail_get_virtual_size(mail);
-
-		memcpy(buf + pos, &size, sizeof(size)); pos += sizeof(size);
-	}
-
-	if (ctx->cache_mask & MAIL_SORT_CC) {
-		if (ctx->common_mask & MAIL_SORT_CC)
-			str = ctx->last_cc;
-		else {
-			str = get_first_mailbox(mail, "cc");
-			if (str != NULL)
-				str = t_str_ucase(str);
-		}
-		str = string_table_get(ctx, str);
-
-		memcpy(buf + pos, &str, sizeof(const char *));
-		pos += sizeof(const char *);
-	}
-
-	if (ctx->cache_mask & MAIL_SORT_FROM) {
-		if (ctx->common_mask & MAIL_SORT_FROM)
-			str = ctx->last_from;
-		else {
-			str = get_first_mailbox(mail, "from");
-			if (str != NULL)
-				str = t_str_ucase(str);
-		}
-		str = string_table_get(ctx, str);
-
-		memcpy(buf + pos, &str, sizeof(const char *));
-		pos += sizeof(const char *);
-	}
-
-	if (ctx->cache_mask & MAIL_SORT_TO) {
-		if (ctx->common_mask & MAIL_SORT_TO)
-			str = ctx->last_to;
-		else {
-			str = get_first_mailbox(mail, "to");
-			if (str != NULL)
-				str = t_str_ucase(str);
-		}
-		str = string_table_get(ctx, str);
-
-		memcpy(buf + pos, &str, sizeof(const char *));
-		pos += sizeof(const char *);
-	}
-
-	if (ctx->cache_mask & MAIL_SORT_SUBJECT) {
-		if (ctx->common_mask & MAIL_SORT_SUBJECT)
-			str = ctx->last_subject;
-		else {
-			str = mail_get_first_header(mail, "subject");
-
-			if (str != NULL) {
-				str = imap_get_base_subject_cased(
-					pool_datastack_create(), str, NULL);
-			}
-		}
-		str = string_table_get(ctx, str);
-
-		memcpy(buf + pos, &str, sizeof(const char *));
-		pos += sizeof(const char *);
-	}
-
-	i_assert(pos == ctx->sort_element_size);
-
-	t_pop();
-}
-
-static struct sort_context *qsort_context;
-
-static struct mail *get_mail(struct sort_context *ctx, const unsigned char *buf)
-{
-	unsigned int id = *((const unsigned int *)buf);
-	uint32_t seq;
-
-	if (!ctx->id_is_uid)
-		seq = id;
-	else {
-		if (mailbox_get_uids(ctx->box, id, id, &seq, &seq) < 0)
-			return NULL;
-		if (seq == 0)
-			return NULL;
-	}
-
-	if (mail_set_seq(ctx->other_mail, seq) < 0)
-		return NULL;
-	return ctx->other_mail;
-
-}
-
-static time_t get_time(enum mail_sort_type type, const unsigned char *buf,
-		       struct sort_context *ctx)
-{
-	time_t t;
-
-	if ((ctx->cache_mask & type) == 0) {
-		struct mail *mail = get_mail(ctx, buf);
-
-		if (mail == NULL)
-			return 0;
-
-		switch (type) {
-		case MAIL_SORT_ARRIVAL:
-			return mail_get_received_date(mail);
-		case MAIL_SORT_DATE:
-			t = mail_get_date(mail, NULL);
-			if (t == (time_t)-1)
-				t = 0;
-			return t;
-		default:
-			i_unreached();
-			return 0;
-		}
-	}
-
-	/* use memcpy() to avoid any alignment problems */
-	memcpy(&t, buf + sizeof(unsigned int), sizeof(t));
-	return t;
-}
-
-static uoff_t get_uofft(enum mail_sort_type type, const unsigned char *buf,
-			struct sort_context *ctx)
-{
-	uoff_t size;
-
-	if ((ctx->cache_mask & type) == 0) {
-		struct mail *mail = get_mail(ctx, buf);
-
-		if (mail == NULL)
-			return 0;
-
-		i_assert(type == MAIL_SORT_SIZE);
-
-		return mail_get_virtual_size(mail);
-	}
-
-	/* use memcpy() to avoid any alignment problems */
-	memcpy(&size, buf + sizeof(unsigned int), sizeof(size));
-	return size;
-}
-
-static const char *get_str(enum mail_sort_type type, const unsigned char *buf,
-			   struct sort_context *ctx)
-{
-	const char *str;
-	enum mail_sort_type type2;
-	pool_t pool;
-	int pos;
-
-	if ((ctx->cache_mask & type) == 0) {
-		struct mail *mail = get_mail(ctx, buf);
-
-		if (mail == NULL)
-			return NULL;
-
-		switch (type) {
-		case MAIL_SORT_SUBJECT:
-			str = mail_get_first_header(mail, "subject");
-			if (str == NULL)
-				return NULL;
-
-			pool = pool_datastack_create();
-			return imap_get_base_subject_cased(pool, str, NULL);
-		case MAIL_SORT_CC:
-			str = get_first_mailbox(mail, "cc");
-			break;
-		case MAIL_SORT_FROM:
-			str = get_first_mailbox(mail, "from");
-			break;
-		case MAIL_SORT_TO:
-			str = get_first_mailbox(mail, "to");
-			break;
-		default:
-			i_unreached();
-		}
-
-		if (str != NULL)
-			str = t_str_ucase(str);
-		return str;
-	}
-
-	/* figure out where it is. pretty ugly. */
-	type2 = (ctx->cache_mask & ~type);
-
-	if (type2 == 0)
-		pos = 0;
-	else if (IS_SORT_TIME(type2))
-		pos = sizeof(time_t);
-	else if (type2 == MAIL_SORT_SIZE)
-		pos = sizeof(uoff_t);
-	else {
-		if (type == MAIL_SORT_SUBJECT)
-			pos = sizeof(const char *);
-		else if (type2 != MAIL_SORT_SUBJECT && type > type2)
-			pos = sizeof(const char *);
-		else
-			pos = 0;
-	}
-
-	/* use memcpy() to avoid any alignment problems */
-	memcpy(&str, buf + pos + sizeof(unsigned int), sizeof(const char *));
-	return str;
-}
-
-static int mail_sort_qsort_func(const void *p1, const void *p2)
-{
-	enum mail_sort_type *sorting;
-	int ret;
-	bool reverse = FALSE;
-
-	sorting = qsort_context->sort_program;
-
-	t_push();
-
-	ret = 0;
-	for (; *sorting != MAIL_SORT_END && ret == 0; sorting++) {
-		if (*sorting == MAIL_SORT_REVERSE) {
-			reverse = !reverse;
-			continue;
-		}
-
-		switch (*sorting) {
-		case MAIL_SORT_ARRIVAL:
-		case MAIL_SORT_DATE: {
-			time_t r1, r2;
-
-			r1 = get_time(*sorting, p1, qsort_context);
-			r2 = get_time(*sorting, p2, qsort_context);
-			ret = r1 < r2 ? -1 : r1 > r2 ? 1 : 0;
-			break;
-		}
-		case MAIL_SORT_SIZE: {
-			uoff_t r1, r2;
-
-			r1 = get_uofft(*sorting, p1, qsort_context);
-			r2 = get_uofft(*sorting, p2, qsort_context);
-			ret = r1 < r2 ? -1 : r1 > r2 ? 1 : 0;
-			break;
-		}
-		case MAIL_SORT_CC:
-		case MAIL_SORT_FROM:
-		case MAIL_SORT_TO:
-		case MAIL_SORT_SUBJECT:
-			ret = null_strcmp(get_str(*sorting, p1, qsort_context),
-					  get_str(*sorting, p2, qsort_context));
-			break;
-		default:
-			i_unreached();
-		}
-
-		if (reverse) {
-			if (ret > 0)
-				ret = -1;
-			else if (ret < 0)
-				ret = 1;
-		}
-
-		reverse = FALSE;
-	}
-
-	t_pop();
-
-	return ret != 0 ? ret :
-		(*((const unsigned int *)p1) <
-		 *((const unsigned int *)p2) ? -1 : 1);
-}
-
-static void mail_sort_flush(struct sort_context *ctx)
-{
-	unsigned char *arr;
-	size_t i, count;
-
-	qsort_context = ctx;
-
-	arr = buffer_get_modifyable_data(ctx->sort_buffer, NULL);
-	count = buffer_get_used_size(ctx->sort_buffer) / ctx->sort_element_size;
-	if (count == 0)
-		return;
-
-	qsort(arr, count, ctx->sort_element_size, mail_sort_qsort_func);
-
-	for (i = 0; i < count; i++, arr += ctx->sort_element_size) {
-		if (str_len(ctx->str) >= STRBUF_SIZE-MAX_INT_STRLEN) {
-			/* flush */
-			o_stream_send(ctx->output,
-				      str_data(ctx->str), str_len(ctx->str));
-			str_truncate(ctx->str, 0);
-			ctx->written = TRUE;
-		}
-
-		str_printfa(ctx->str, " %u", *((unsigned int *) arr));
-	}
-
-	buffer_set_used_size(ctx->sort_buffer, 0);
-
-	if (ctx->string_table != NULL) {
-		hash_clear(ctx->string_table, TRUE);
-		p_clear(ctx->str_pool);
-	}
-}
--- a/src/lib-storage/index/Makefile.am	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/lib-storage/index/Makefile.am	Thu Jun 08 15:49:31 2006 +0300
@@ -15,6 +15,7 @@
 	index-mail-headers.c \
 	index-mailbox-check.c \
 	index-search.c \
+	index-sort.c \
 	index-status.c \
 	index-storage.c \
 	index-sync.c \
@@ -22,4 +23,5 @@
 
 noinst_HEADERS = \
 	index-mail.h \
+	index-sort.h \
 	index-storage.h
--- a/src/lib-storage/index/dbox/dbox-storage.c	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/lib-storage/index/dbox/dbox-storage.c	Thu Jun 08 15:49:31 2006 +0300
@@ -708,7 +708,6 @@
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_deinit,
-		index_storage_search_get_sorting,
 		index_storage_search_init,
 		index_storage_search_deinit,
 		index_storage_search_next,
--- a/src/lib-storage/index/index-mail.c	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/lib-storage/index/index-mail.c	Thu Jun 08 15:49:31 2006 +0300
@@ -873,6 +873,9 @@
         struct mail_cache_view *cache_view = mail->trans->cache_view;
 	const struct mail_index_record *rec;
 
+	if (data->seq == seq)
+		return 0;
+
 	if (mail_index_lookup(mail->trans->trans_view, seq, &rec) < 0) {
 		mail_storage_set_index_error(mail->ibox);
 		return -1;
--- a/src/lib-storage/index/index-search.c	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/lib-storage/index/index-search.c	Thu Jun 08 15:49:31 2006 +0300
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002 Timo Sirainen */
+/* Copyright (C) 2002-2006 Timo Sirainen */
 
 #include "lib.h"
 #include "istream.h"
@@ -11,6 +11,7 @@
 #include "imap-date.h"
 #include "index-storage.h"
 #include "index-mail.h"
+#include "index-sort.h"
 #include "mail-search.h"
 
 #include <stdlib.h>
@@ -25,6 +26,7 @@
 	struct index_mailbox *ibox;
 	char *charset;
 	struct mail_search_arg *args;
+	struct mail_search_sort_program *sort_program;
 
 	uint32_t seq1, seq2;
 	struct mail *mail;
@@ -34,6 +36,7 @@
 	const char *error;
 
 	unsigned int failed:1;
+	unsigned int sorted:1;
 	unsigned int have_seqsets:1;
 };
 
@@ -764,14 +767,6 @@
 	return 0;
 }
 
-int index_storage_search_get_sorting(struct mailbox *box __attr_unused__,
-				     enum mail_sort_type *sort_program)
-{
-	/* currently we don't support sorting */
-	*sort_program = MAIL_SORT_END;
-	return 0;
-}
-
 struct mail_search_context *
 index_storage_search_init(struct mailbox_transaction_context *_t,
 			  const char *charset, struct mail_search_arg *args,
@@ -781,17 +776,13 @@
 		(struct index_transaction_context *)_t;
 	struct index_search_context *ctx;
 
-	if (sort_program != NULL && *sort_program != MAIL_SORT_END) {
-		i_fatal("BUG: index_storage_search_init(): "
-			 "invalid sort_program");
-	}
-
 	ctx = i_new(struct index_search_context, 1);
 	ctx->mail_ctx.transaction = _t;
 	ctx->ibox = t->ibox;
 	ctx->view = t->trans_view;
 	ctx->charset = i_strdup(charset);
 	ctx->args = args;
+	ctx->sort_program = index_sort_program_init(_t, sort_program);
 
 	mail_search_args_reset(ctx->args, TRUE);
 
@@ -823,6 +814,8 @@
 	if (ctx->hdr_pool != NULL)
 		pool_unref(ctx->hdr_pool);
 
+	if (ctx->sort_program != NULL)
+		index_sort_program_deinit(&ctx->sort_program);
 	i_free(ctx->charset);
 	i_free(ctx);
 	return ret;
@@ -868,6 +861,9 @@
 	struct mailbox *box = _ctx->transaction->box;
 	int ret;
 
+	if (ctx->sorted)
+		return index_sort_list_next(ctx->sort_program, mail);
+
 	ctx->mail = mail;
 	ctx->imail = (struct index_mail *)mail;
 
@@ -885,14 +881,28 @@
 
 		if (ctx->error != NULL)
 			ret = -1;
-		if (ret != 0)
-			break;
+		if (ret != 0) {
+			if (ctx->sort_program == NULL)
+				break;
+
+			if (index_sort_list_add(ctx->sort_program, mail) < 0) {
+				ret = -1;
+				break;
+			}
+		}
 	}
 	if (ret < 0)
 		ctx->failed = TRUE;
 	ctx->mail = NULL;
 	ctx->imail = NULL;
 
+	if (ctx->sort_program != NULL && ret == 0) {
+		ctx->sorted = TRUE;
+		if (index_sort_list_finish(ctx->sort_program) < 0)
+			return -1;
+		return index_sort_list_next(ctx->sort_program, mail);
+	}
+
 	return ret;
 }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-storage/index/index-sort.c	Thu Jun 08 15:49:31 2006 +0300
@@ -0,0 +1,762 @@
+/* Copyright (C) 2006 Timo Sirainen */
+
+/* The idea in here is that for each used primary sort condition there's
+   a 32bit integer in the index file which specifies the sort order. So when
+   sorting we simply look up the sort IDs and sort the messages by them.
+
+   Sort IDs are allocated in two ways:
+
+   1) Time and size fields can be directly used as sort IDs, so we simply
+   use them directly as the missing sort IDs.
+
+   2) Strings can't be used as sort IDs directly. The way they're currently
+   handled is that the whole 32bit integer space is used for them and whenever
+   adding a string, the available space is halved and the new ID is added in
+   the middle. For example if we add one mail the first time, it gets ID
+   2^31. If we then add two mails which are sorted before the first one, they
+   get IDs 2^31/3 and 2^31/3*2. Once we run out of the available space between
+   IDs, a large amount of the IDs are renumbered.
+
+   We try to avoid looking at mails' contents as much as possible. For case 1)
+   IDs it's simple because we need to get only the new mails' sort fields once
+   and use them as sort IDs. For case 2) we'll have to start looking at the
+   strings from older mails as well. To minimize this, we first sort the
+   existing sort IDs. After that we start inserting the new mails into the
+   sorted array by looking the position using binary search. This minimizes
+   the number of lookups we have to do for the old mails. Usually only a few
+   mails have been added, so this should be faster than other sort methods.
+*/
+
+#include "lib.h"
+#include "array.h"
+#include "bsearch-insert-pos.h"
+#include "str.h"
+#include "message-address.h"
+#include "imap-base-subject.h"
+#include "index-storage.h"
+#include "index-sort.h"
+
+#include <stdlib.h>
+
+#define RENUMBER_SPACE 100
+
+struct mail_sort_node {
+	uint32_t seq;
+	uint32_t sort_id;
+};
+
+struct mail_search_sort_program {
+	struct mailbox_transaction_context *t;
+	enum mail_sort_type sort_program[MAX_SORT_PROGRAM_SIZE];
+	const char *primary_sort_header;
+	struct mail *temp_mail;
+
+	array_t ARRAY_DEFINE(nodes, struct mail_sort_node);
+	const struct mail_sort_node *nodes_ptr;
+	unsigned int nodes_count, iter_idx;
+
+	array_t ARRAY_DEFINE(all_nodes, struct mail_sort_node);
+
+	uint32_t ext_id;
+	uint32_t prev_seq, last_sorted_seq;
+
+	unsigned int reverse:1;
+	unsigned int skipped_mails:1;
+	unsigned int sort_ids_added:1;
+};
+
+struct sort_cmp_context {
+	struct mail_search_sort_program *program;
+	struct mail *mail;
+
+	uint32_t cache_seq;
+	enum mail_sort_type cache_type;
+	time_t cache_time;
+	uoff_t cache_size;
+	const char *cache_str;
+};
+
+static struct sort_cmp_context static_node_cmp_context;
+
+struct mail_search_sort_program *
+index_sort_program_init(struct mailbox_transaction_context *t,
+			const enum mail_sort_type *sort_program)
+{
+	struct index_mailbox *ibox = (struct index_mailbox *)t->box;
+	struct mail_search_sort_program *program;
+	const char *name;
+	unsigned int i;
+
+	if (sort_program == NULL || sort_program[0] == MAIL_SORT_END)
+		return NULL;
+
+	/* we support internal sorting by the primary condition */
+	program = i_new(struct mail_search_sort_program, 1);
+	program->t = t;
+	program->temp_mail = mail_alloc(t, 0, NULL);
+	program->reverse =
+		(program->sort_program[0] & MAIL_SORT_FLAG_REVERSE) != 0;
+	ARRAY_CREATE(&program->nodes, default_pool, struct mail_sort_node, 64);
+
+	for (i = 0; i < MAX_SORT_PROGRAM_SIZE; i++) {
+		program->sort_program[i] = sort_program[i];
+		if (sort_program[i] == MAIL_SORT_END)
+			break;
+	}
+	if (i == MAX_SORT_PROGRAM_SIZE)
+		i_panic("index_sort_program_init(): Invalid sort program");
+
+	switch (program->sort_program[0] & MAIL_SORT_MASK) {
+	case MAIL_SORT_ARRIVAL:
+		name = "rdate";
+		break;
+	case MAIL_SORT_CC:
+		name = "sort-c";
+		program->primary_sort_header = "Cc";
+		break;
+	case MAIL_SORT_DATE:
+		name = "date";
+		break;
+	case MAIL_SORT_FROM:
+		name = "sort-f";
+		program->primary_sort_header = "From";
+		break;
+	case MAIL_SORT_SIZE:
+		name = "size";
+		break;
+	case MAIL_SORT_SUBJECT:
+		name = "sort-s";
+		program->primary_sort_header = "Subject";
+		break;
+	case MAIL_SORT_TO:
+		name = "sort-t";
+		program->primary_sort_header = "To";
+		break;
+	default:
+		i_unreached();
+	}
+	program->ext_id =
+		mail_index_ext_register(ibox->index, name, 0,
+					sizeof(uint32_t), sizeof(uint32_t));
+	return program;
+}
+
+void index_sort_program_deinit(struct mail_search_sort_program **_program)
+{
+	struct mail_search_sort_program *program = *_program;
+
+	*_program = NULL;
+	mail_free(&program->temp_mail);
+	array_free(&program->nodes);
+	i_free(program);
+}
+
+static const char *get_first_mailbox(struct mail *mail, const char *header)
+{
+	struct message_address *addr;
+	const char *str;
+
+	str = mail_get_first_header(mail, header);
+	if (str == NULL)
+		return "";
+
+	addr = message_address_parse(pool_datastack_create(),
+				     (const unsigned char *)str,
+				     strlen(str), 1);
+	return addr != NULL ? addr->mailbox : "";
+}
+
+static const char *
+sort_header_get(enum mail_sort_type sort_type, struct mail *mail, uint32_t seq)
+{
+	const char *str;
+
+	mail_set_seq(mail, seq);
+	switch (sort_type & MAIL_SORT_MASK) {
+	case MAIL_SORT_SUBJECT:
+		str = mail_get_first_header(mail, "Subject");
+		return str == NULL ? "" :
+			imap_get_base_subject_cased(pool_datastack_create(),
+						    str, NULL);
+	case MAIL_SORT_CC:
+		return get_first_mailbox(mail, "Cc");
+	case MAIL_SORT_FROM:
+		return get_first_mailbox(mail, "From");
+	case MAIL_SORT_TO:
+		return get_first_mailbox(mail, "To");
+	default:
+		i_unreached();
+	}
+}
+
+static int sort_node_cmp_type(struct sort_cmp_context *ctx,
+			      const enum mail_sort_type *sort_program,
+			      const struct mail_sort_node *n1,
+			      const struct mail_sort_node *n2)
+{
+	enum mail_sort_type sort_type;
+	const char *str1, *str2;
+	time_t time1, time2;
+	uoff_t size1, size2;
+	int ret;
+
+	sort_type = *sort_program & MAIL_SORT_MASK;
+	switch (sort_type) {
+	case MAIL_SORT_CC:
+	case MAIL_SORT_FROM:
+	case MAIL_SORT_TO:
+	case MAIL_SORT_SUBJECT:
+		t_push();
+		str1 = n1->seq == ctx->cache_seq &&
+			ctx->cache_type == sort_type ? ctx->cache_str :
+			sort_header_get(sort_type, ctx->mail, n1->seq);
+		str2 = sort_header_get(sort_type, ctx->mail, n2->seq);
+
+		ret = strcasecmp(str1, str2);
+		t_pop();
+		break;
+	case MAIL_SORT_ARRIVAL:
+		if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type)
+			time1 = ctx->cache_time;
+		else {
+			mail_set_seq(ctx->mail, n1->seq);
+			time1 = mail_get_received_date(ctx->mail);
+			if (time1 == (time_t)-1) time1 = 0;
+		}
+
+		mail_set_seq(ctx->mail, n2->seq);
+		time2 = mail_get_received_date(ctx->mail);
+		if (time2 == (time_t)-1) time2 = 0;
+
+		ret = time1 < time2 ? -1 :
+			(time1 > time2 ? 1 : 0);
+		break;
+	case MAIL_SORT_DATE:
+		if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type)
+			time1 = ctx->cache_time;
+		else {
+			mail_set_seq(ctx->mail, n1->seq);
+			time1 = mail_get_date(ctx->mail, NULL);
+			if (time1 == (time_t)-1) time1 = 0;
+		}
+
+		mail_set_seq(ctx->mail, n2->seq);
+		time2 = mail_get_date(ctx->mail, NULL);
+		if (time2 == (time_t)-1) time2 = 0;
+
+		ret = time1 < time2 ? -1 :
+			(time1 > time2 ? 1 : 0);
+		break;
+	case MAIL_SORT_SIZE:
+		if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type)
+			size1 = ctx->cache_size;
+		else {
+			mail_set_seq(ctx->mail, n1->seq);
+			size1 = mail_get_virtual_size(ctx->mail);
+			if (size1 == (uoff_t)-1) size1 = 0;
+		}
+
+		mail_set_seq(ctx->mail, n2->seq);
+		size2 = mail_get_virtual_size(ctx->mail);
+		if (size2 == (uoff_t)-1) size2 = 0;
+
+		ret = size1 < size2 ? -1 :
+			(size1 > size2 ? 1 : 0);
+		break;
+	case MAIL_SORT_END:
+		return n1->seq < n2->seq ? -1 :
+			(n1->seq > n2->seq ? 1 : 0);
+	case MAIL_SORT_MASK:
+	case MAIL_SORT_FLAG_REVERSE:
+		i_unreached();
+	}
+
+	if (ret == 0)
+		return sort_node_cmp_type(ctx, sort_program+1, n1, n2);
+
+	if ((*sort_program & MAIL_SORT_FLAG_REVERSE) != 0)
+		ret = ret < 0 ? 1 : -1;
+	return ret;
+}
+
+static int sort_node_cmp(const void *p1, const void *p2)
+{
+	struct sort_cmp_context *ctx = &static_node_cmp_context;
+	const struct mail_sort_node *n1 = p1, *n2 = p2;
+
+	if (n1->sort_id < n2->sort_id)
+		return -1;
+	if (n1->sort_id > n2->sort_id)
+		return 1;
+
+	return sort_node_cmp_type(ctx, ctx->program->sort_program + 1, n1, n2);
+}
+
+static int sort_node_cmp_no_sort_id(const void *p1, const void *p2)
+{
+	struct sort_cmp_context *ctx = &static_node_cmp_context;
+
+	return sort_node_cmp_type(ctx, ctx->program->sort_program, p1, p2);
+}
+
+static void
+index_sort_save_ids(struct mail_search_sort_program *program,
+		    uint32_t first_seq)
+{
+	struct index_transaction_context *t =
+		(struct index_transaction_context *)program->t;
+	const struct mail_sort_node *nodes;
+	unsigned int i, count;
+
+	nodes = array_get(&program->all_nodes, &count);
+	for (i = 0; i < count; i++) {
+		if (nodes[i].seq < first_seq)
+			continue;
+
+		mail_index_update_ext(t->trans, nodes[i].seq,
+				      program->ext_id, &nodes[i].sort_id, NULL);
+	}
+}
+
+static int
+index_sort_add_ids_range(struct mail_search_sort_program *program,
+			 struct mail *mail, unsigned int idx1,
+			 unsigned int idx2)
+{
+	struct mail_sort_node *nodes;
+	unsigned int i, count;
+	const char *last_str = "";
+	uint32_t prev_id = 0, last_id = (uint32_t)-1;
+	string_t *prev_str;
+	const char *str;
+	unsigned int skip;
+	int ret = 1;
+
+	t_push();
+	nodes = array_get_modifyable(&program->all_nodes, &count);
+	if (nodes[idx2].sort_id != 0) {
+		i_assert(idx1 != idx2);
+		last_id = nodes[idx2].sort_id;
+
+		last_str = sort_header_get(program->sort_program[0], mail,
+					   nodes[idx2].seq);
+		idx2--;
+	}
+
+	prev_str = t_str_new(256);
+	if (nodes[idx1].sort_id != 0) {
+		prev_id = nodes[idx1].sort_id;
+
+		str_append(prev_str,
+			   sort_header_get(program->sort_program[0], mail,
+					   nodes[idx1].seq));
+		idx1++;
+	}
+
+	for (i = idx1; i <= idx2; i++) {
+		str = sort_header_get(program->sort_program[0], mail,
+				      nodes[i].seq);
+
+		if (i == idx2 && strcasecmp(str, last_str) == 0)
+			nodes[i].sort_id = last_id;
+		else if (strcasecmp(str, str_c(prev_str)) == 0 && prev_id != 0)
+			nodes[i].sort_id = prev_id;
+		else {
+			/* divide the available space so that each message gets
+			   an equal sized share. leave the same sized space
+			   also between the first and the last messages */
+			skip = (last_id - prev_id) / (idx2 - i + 2);
+			nodes[i].sort_id = prev_id + skip;
+			if (nodes[i].sort_id == prev_id)
+				nodes[i].sort_id++;
+			if (nodes[i].sort_id == last_id) {
+				/* we ran out of ID space. have to renumber
+				   the IDs. */
+				ret = 0;
+				break;
+			}
+
+			prev_id = nodes[i].sort_id;
+			str_truncate(prev_str, 0);
+			str_append(prev_str, str);
+		}
+	}
+	t_pop();
+	return ret;
+}
+
+static void
+index_sort_renumber_ids(struct mail_search_sort_program *program,
+			unsigned int idx)
+{
+	struct index_transaction_context *t =
+		(struct index_transaction_context *)program->t;
+	struct mail_sort_node *nodes;
+	unsigned int i, count;
+	uint32_t sort_id, prev_sort_id, skip;
+
+	nodes = array_get_modifyable(&program->all_nodes, &count);
+	prev_sort_id = (uint32_t)-1;
+	sort_id = nodes[idx].sort_id;
+	i_assert(sort_id == nodes[idx + 1].sort_id);
+
+	if (((uint32_t)-1 - sort_id) / (count - idx + 1) < RENUMBER_SPACE) {
+		/* space is running out, lets just renumber everything */
+		sort_id = 0;
+		skip = (uint32_t)-1 / (count + 1);
+		for (i = 0; i < idx; i++) {
+			if (sort_id != prev_sort_id)
+				sort_id += skip;
+			prev_sort_id = nodes[i].sort_id;
+
+			nodes[i].sort_id = sort_id;
+			mail_index_update_ext(t->trans, nodes[i].seq,
+					      program->ext_id,
+					      &nodes[i].sort_id, NULL);
+		}
+	} else {
+		skip = RENUMBER_SPACE;
+	}
+
+	for (i = idx; i < count && sort_id >= nodes[i].sort_id; i++) {
+		if (sort_id != prev_sort_id) {
+			i_assert(sort_id <= (uint32_t)-1 - skip);
+			sort_id += skip;
+		}
+		prev_sort_id = nodes[i].sort_id;
+
+		if (nodes[i].sort_id != 0) {
+			nodes[i].sort_id = sort_id;
+			mail_index_update_ext(t->trans, nodes[i].seq,
+					      program->ext_id,
+					      &nodes[i].sort_id, NULL);
+		}
+	}
+}
+
+static void
+index_sort_add_ids(struct mail_search_sort_program *program, struct mail *mail)
+{
+	const struct mail_sort_node *nodes;
+	unsigned int i, j, count;
+
+	nodes = array_get(&program->all_nodes, &count);
+	for (i = 0; i < count; i++) {
+		if (nodes[i].sort_id == 0) {
+			for (j = i + 1; j < count; j++) {
+				if (nodes[j].sort_id != 0)
+					break;
+			}
+			if (index_sort_add_ids_range(program, mail,
+						     i == 0 ? 0 : i-1,
+						     I_MIN(j, count-1)) == 0)
+				index_sort_renumber_ids(program, i);
+		}
+	}
+}
+
+static uint32_t get_sort_id_arrival(struct mail *mail)
+{
+	time_t time;
+
+	time = mail_get_received_date(mail);
+	return time == (time_t)-1 ? 0 : time;
+}
+
+static uint32_t get_sort_id_date(struct mail *mail)
+{
+	time_t time;
+
+	time = mail_get_date(mail, NULL);
+	return time == (time_t)-1 ? 0 : time;
+}
+
+static uint32_t get_sort_id_size(struct mail *mail)
+{
+	uoff_t size;
+
+	size = mail_get_virtual_size(mail);
+	if (size == (uoff_t)-1)
+		return 0;
+
+	/* FIXME: elsewhere we support 64bit message sizes, but here
+	   we support only 32bit sizes.. It's a bit too much trouble
+	   to support 64bit here currently, so until such messages
+	   actually start showing up somewhere, 32bit is enough */
+	i_assert(size <= (uint32_t)-1);
+	return size == size;
+}
+
+static void index_sort_preset_sort_ids(struct mail_search_sort_program *program,
+				       uint32_t last_seq)
+{
+	struct mail_sort_node node;
+	struct mail *mail;
+	uint32_t (*get_sort_id)(struct mail *);
+
+	switch (program->sort_program[0] & MAIL_SORT_MASK) {
+	case MAIL_SORT_ARRIVAL:
+		get_sort_id = get_sort_id_arrival;
+		break;
+	case MAIL_SORT_DATE:
+		get_sort_id = get_sort_id_date;
+		break;
+	case MAIL_SORT_SIZE:
+		get_sort_id = get_sort_id_size;
+		break;
+	default:
+		i_unreached();
+	}
+
+	/* add the missing nodes with their sort_ids */
+	mail = program->temp_mail;
+	node.seq = array_count(&program->all_nodes) + 1;
+	for (; node.seq <= last_seq; node.seq++) {
+		mail_set_seq(mail, node.seq);
+		node.sort_id = get_sort_id(mail);
+		array_append(&program->all_nodes, &node, 1);
+	}
+
+	/* @UNSAFE: and sort them */
+	memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context));
+	static_node_cmp_context.program = program;
+	static_node_cmp_context.mail = mail;
+
+	qsort(array_idx_modifyable(&program->all_nodes, 0), last_seq,
+	      sizeof(struct mail_sort_node), sort_node_cmp);
+}
+
+static void index_sort_cache_seq(struct sort_cmp_context *ctx,
+				 enum mail_sort_type sort_type, uint32_t seq)
+{
+	ctx->cache_seq = seq;
+	ctx->cache_type = sort_type & MAIL_SORT_MASK;
+
+	mail_set_seq(ctx->mail, seq);
+	switch (ctx->cache_type) {
+	case MAIL_SORT_ARRIVAL:
+		ctx->cache_time = get_sort_id_arrival(ctx->mail);
+		break;
+	case MAIL_SORT_DATE:
+		ctx->cache_time = get_sort_id_date(ctx->mail);
+		break;
+	case MAIL_SORT_SIZE:
+		ctx->cache_size = get_sort_id_size(ctx->mail);
+		break;
+	default:
+		ctx->cache_str = sort_header_get(sort_type, ctx->mail, seq);
+		break;
+	}
+}
+
+static void index_sort_headers(struct mail_search_sort_program *program,
+			       uint32_t last_seq)
+{
+	struct mail_sort_node *nodes, node;
+	const struct mail_sort_node *cnodes, *pos;
+	unsigned int count;
+
+	/* we wish to avoid reading the actual headers as much as possible.
+	   first sort the nodes which already have sort_ids, then start
+	   inserting the new nodes by finding their insertion position with
+	   binary search */
+	memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context));
+	static_node_cmp_context.program = program;
+	static_node_cmp_context.mail = program->temp_mail;
+
+	/* @UNSAFE */
+	nodes = array_get_modifyable(&program->all_nodes, &count);
+	if (program->last_sorted_seq != count) {
+		qsort(nodes, count, sizeof(struct mail_sort_node),
+		      sort_node_cmp);
+	}
+
+	node.sort_id = 0;
+	for (node.seq = count + 1; node.seq <= last_seq; node.seq++) {
+		index_sort_cache_seq(&static_node_cmp_context,
+				     program->sort_program[0], node.seq);
+
+		cnodes = array_get_modifyable(&program->nodes, &count);
+		pos = bsearch_insert_pos(&node, cnodes, count, sizeof(*cnodes),
+					 sort_node_cmp_no_sort_id);
+		array_insert(&program->nodes, pos - cnodes, &node, 1);
+	}
+
+	index_sort_add_ids(program, static_node_cmp_context.mail);
+}
+
+static int index_sort_build(struct mail_search_sort_program *program,
+			    uint32_t last_seq)
+{
+	struct index_mailbox *ibox = (struct index_mailbox *)program->t->box;
+	struct mail_sort_node node;
+	const void *data;
+	unsigned int i, first_missing_sort_id_seq;
+
+	i = array_count(&program->all_nodes);
+	if (i == 0) {
+		/* we're building the array from scratch. add here only the
+		   messages that have sort_ids set. */
+		program->last_sorted_seq = 0;
+		for (; i < last_seq; i++) {
+			node.seq = i+1;
+
+			if (mail_index_lookup_ext(ibox->view, i+1,
+						  program->ext_id, &data) < 0)
+				return -1;
+
+			node.sort_id = data == NULL ? 0 :
+				*(const uint32_t *)data;
+			if (node.sort_id == 0) {
+				/* the rest don't have sort_ids either */
+				break;
+			}
+			array_append(&program->all_nodes, &node, 1);
+		}
+	}
+	first_missing_sort_id_seq = i + 1;
+
+	switch (program->sort_program[0] & MAIL_SORT_MASK) {
+	case MAIL_SORT_ARRIVAL:
+	case MAIL_SORT_DATE:
+	case MAIL_SORT_SIZE:
+		index_sort_preset_sort_ids(program, last_seq);
+		break;
+	default:
+		index_sort_headers(program, last_seq);
+		break;
+	}
+	index_sort_save_ids(program, first_missing_sort_id_seq);
+	return 0;
+}
+
+static void index_sort_add_node(struct mail_search_sort_program *program,
+				const struct mail_sort_node *node)
+{
+	const struct mail_sort_node *nodes, *pos;
+	unsigned int count;
+
+	memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context));
+	static_node_cmp_context.program = program;
+	static_node_cmp_context.mail = program->temp_mail;
+
+	nodes = array_get(&program->nodes, &count);
+	pos = bsearch_insert_pos(node, nodes, count,
+				 sizeof(*node), sort_node_cmp);
+	array_insert(&program->nodes, pos - nodes, node, 1);
+
+	program->last_sorted_seq = node->seq;
+	program->prev_seq = node->seq;
+}
+
+int index_sort_list_add(struct mail_search_sort_program *program,
+			struct mail *mail)
+{
+	struct index_transaction_context *t =
+		(struct index_transaction_context *)program->t;
+	const struct mail_index_header *hdr;
+	const void *data;
+	struct mail_sort_node node;
+	int ret;
+
+	i_assert(mail->transaction == program->t);
+
+	if (program->prev_seq + 1 != mail->seq)
+		program->skipped_mails = TRUE;
+
+	node.seq = mail->seq;
+	if (program->last_sorted_seq == program->prev_seq) {
+		/* we're still on the fast path using sort_ids from the
+		   index file */
+		if (mail_index_lookup_ext(t->trans_view, mail->seq,
+					  program->ext_id, &data) < 0)
+			return -1;
+
+		node.sort_id = data == NULL ? 0 : *(const uint32_t *)data;
+		if (node.sort_id != 0) {
+			index_sort_add_node(program, &node);
+			return 0;
+		}
+		i_assert(!program->sort_ids_added);
+	} else {
+		node.sort_id = 0;
+	}
+
+	/* sort_ids are missing, have to generate them */
+	if (!program->skipped_mails) {
+		/* as long as we think we're returning all the mails sorted,
+		   which is the common case, we want to avoid duplicating the
+		   node array. so here we just keep counting the sequences
+		   until either we skip a sequence or we reach list_finish() */
+		program->prev_seq = mail->seq;
+		return 0;
+	}
+
+	/* we're not returning all the mails. have to create a temporary array
+	   for all the nodes so we can set all the missing sort_ids. */
+	hdr = mail_index_get_header(t->ibox->view);
+	ARRAY_CREATE(&program->all_nodes, default_pool,
+		     struct mail_sort_node, hdr->messages_count);
+	ret = index_sort_build(program, hdr->messages_count);
+	array_free(&program->all_nodes);
+	if (ret < 0)
+		return -1;
+
+	/* add the nodes in the middle */
+	node.seq = program->last_sorted_seq + 1;
+	for (; node.seq <= program->prev_seq; node.seq++) {
+		if (mail_index_lookup_ext(t->trans_view, mail->seq,
+					  program->ext_id, &data) < 0)
+			return -1;
+
+		node.sort_id = *(const uint32_t *)data;
+		i_assert(node.sort_id != 0);
+
+		index_sort_add_node(program, &node);
+	}
+
+	/* and add this last node */
+	program->sort_ids_added = TRUE;
+	return index_sort_list_add(program, mail);
+}
+
+int index_sort_list_finish(struct mail_search_sort_program *program)
+{
+	if (program->last_sorted_seq != program->prev_seq) {
+		/* nodes array contains a contiguous range of sequences from
+		   the beginning, with the last ones missing sort_id. we can
+		   just sort the array directly without copying it. */
+		i_assert(!program->sort_ids_added);
+
+		program->all_nodes = program->nodes;
+		if (index_sort_build(program, program->prev_seq) < 0)
+			return -1;
+	}
+
+	program->nodes_ptr =
+		array_get(&program->nodes, &program->nodes_count);
+
+	if (program->reverse)
+		program->iter_idx = program->nodes_count;
+	return 0;
+}
+
+int index_sort_list_next(struct mail_search_sort_program *program,
+			 struct mail *mail)
+{
+	const struct mail_sort_node *node;
+
+	if (!program->reverse) {
+		if (program->iter_idx == program->nodes_count)
+			return 0;
+
+		node = &program->nodes_ptr[program->iter_idx++];
+	} else {
+		if (program->iter_idx == 0)
+			return 0;
+
+		node = &program->nodes_ptr[--program->iter_idx];
+	}
+	mail_set_seq(mail, node->seq);
+	return 1;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib-storage/index/index-sort.h	Thu Jun 08 15:49:31 2006 +0300
@@ -0,0 +1,18 @@
+#ifndef __INDEX_SORT_H
+#define __INDEX_SORT_H
+
+struct mail_search_sort_program;
+
+struct mail_search_sort_program *
+index_sort_program_init(struct mailbox_transaction_context *t,
+			const enum mail_sort_type *sort_program);
+void index_sort_program_deinit(struct mail_search_sort_program **program);
+
+int index_sort_list_add(struct mail_search_sort_program *program,
+			struct mail *mail);
+int index_sort_list_finish(struct mail_search_sort_program *program);
+
+int index_sort_list_next(struct mail_search_sort_program *program,
+			 struct mail *mail);
+
+#endif
--- a/src/lib-storage/index/index-storage.h	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/lib-storage/index/index-storage.h	Thu Jun 08 15:49:31 2006 +0300
@@ -168,8 +168,6 @@
 index_header_lookup_init(struct mailbox *box, const char *const headers[]);
 void index_header_lookup_deinit(struct mailbox_header_lookup_ctx *ctx);
 
-int index_storage_search_get_sorting(struct mailbox *box,
-				     enum mail_sort_type *sort_program);
 struct mail_search_context *
 index_storage_search_init(struct mailbox_transaction_context *t,
 			  const char *charset, struct mail_search_arg *args,
--- a/src/lib-storage/index/maildir/maildir-storage.c	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/lib-storage/index/maildir/maildir-storage.c	Thu Jun 08 15:49:31 2006 +0300
@@ -966,7 +966,6 @@
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_deinit,
-		index_storage_search_get_sorting,
 		index_storage_search_init,
 		index_storage_search_deinit,
 		index_storage_search_next,
--- a/src/lib-storage/index/mbox/mbox-storage.c	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/lib-storage/index/mbox/mbox-storage.c	Thu Jun 08 15:49:31 2006 +0300
@@ -1121,7 +1121,6 @@
 		index_mail_alloc,
 		index_header_lookup_init,
 		index_header_lookup_deinit,
-		index_storage_search_get_sorting,
 		index_storage_search_init,
 		index_storage_search_deinit,
 		index_storage_search_next,
--- a/src/lib-storage/mail-storage-private.h	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/lib-storage/mail-storage-private.h	Thu Jun 08 15:49:31 2006 +0300
@@ -131,8 +131,6 @@
 				      const char *const headers[]);
 	void (*header_lookup_deinit)(struct mailbox_header_lookup_ctx *ctx);
 
-	int (*search_get_sorting)(struct mailbox *box,
-				  enum mail_sort_type *sort_program);
 	struct mail_search_context *
 	(*search_init)(struct mailbox_transaction_context *t,
 		       const char *charset, struct mail_search_arg *args,
--- a/src/lib-storage/mail-storage.c	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/lib-storage/mail-storage.c	Thu Jun 08 15:49:31 2006 +0300
@@ -444,12 +444,6 @@
 	ctx->box->v.header_lookup_deinit(ctx);
 }
 
-int mailbox_search_get_sorting(struct mailbox *box,
-			       enum mail_sort_type *sort_program)
-{
-	return box->v.search_get_sorting(box, sort_program);
-}
-
 struct mail_search_context *
 mailbox_search_init(struct mailbox_transaction_context *t,
 		    const char *charset, struct mail_search_arg *args,
--- a/src/lib-storage/mail-storage.h	Wed Jun 07 12:05:05 2006 +0300
+++ b/src/lib-storage/mail-storage.h	Thu Jun 08 15:49:31 2006 +0300
@@ -86,18 +86,19 @@
 };
 
 enum mail_sort_type {
-/* Maximum size for sort program, 2x for reverse + END */
-#define MAX_SORT_PROGRAM_SIZE (2*7 + 1)
+/* Maximum size for sort program (each one separately + END) */
+#define MAX_SORT_PROGRAM_SIZE (7 + 1)
 
-	MAIL_SORT_ARRIVAL	= 0x0010,
-	MAIL_SORT_CC		= 0x0020,
-	MAIL_SORT_DATE		= 0x0040,
-	MAIL_SORT_FROM		= 0x0080,
-	MAIL_SORT_SIZE		= 0x0100,
-	MAIL_SORT_SUBJECT	= 0x0200,
-	MAIL_SORT_TO		= 0x0400,
+	MAIL_SORT_ARRIVAL	= 0x0001,
+	MAIL_SORT_CC		= 0x0002,
+	MAIL_SORT_DATE		= 0x0004,
+	MAIL_SORT_FROM		= 0x0008,
+	MAIL_SORT_SIZE		= 0x0010,
+	MAIL_SORT_SUBJECT	= 0x0020,
+	MAIL_SORT_TO		= 0x0040,
 
-	MAIL_SORT_REVERSE	= 0x0001, /* reverse the next type */
+	MAIL_SORT_MASK		= 0x0fff,
+	MAIL_SORT_FLAG_REVERSE	= 0x1000, /* reverse this mask type */
 
 	MAIL_SORT_END		= 0x0000 /* ends sort program */
 };
@@ -373,17 +374,9 @@
 mailbox_header_lookup_init(struct mailbox *box, const char *const headers[]);
 void mailbox_header_lookup_deinit(struct mailbox_header_lookup_ctx **ctx);
 
-/* Modify sort_program to specify a sort program acceptable for
-   search_init(). If mailbox supports no sorting, it's simply set to
-   {MAIL_SORT_END}. */
-int mailbox_search_get_sorting(struct mailbox *box,
-			       enum mail_sort_type *sort_program);
-/* Initialize new search request. Search arguments are given so that
-   the storage can optimize the searching as it wants.
-
-   If sort_program is non-NULL, it requests that the returned messages
-   are sorted by the given criteria. sort_program must have gone
-   through search_get_sorting(). */
+/* Initialize new search request. charset specifies the character set used in
+   the search argument strings. If sort_program is non-NULL, the messages are
+   returned in the requested order, otherwise from first to last. */
 struct mail_search_context *
 mailbox_search_init(struct mailbox_transaction_context *t,
 		    const char *charset, struct mail_search_arg *args,