Mercurial > dovecot > original-hg > dovecot-1.2
changeset 4303:5f03738219a6 HEAD
Changed mail-storage API to do the mail sorting internally. Optimized it
internally to keep a 32bit sort_id field in index for each used primary sort
condition. Practically this should mean less disk I/O, memory and CPU usage
when SORT command is used.
author | Timo Sirainen <timo.sirainen@movial.fi> |
---|---|
date | Thu, 08 Jun 2006 15:49:31 +0300 |
parents | a9498883f44c |
children | 3bb4f35b99d4 |
files | src/imap/cmd-sort.c src/imap/imap-sort.c src/lib-storage/index/Makefile.am src/lib-storage/index/dbox/dbox-storage.c src/lib-storage/index/index-mail.c src/lib-storage/index/index-search.c src/lib-storage/index/index-sort.c src/lib-storage/index/index-sort.h src/lib-storage/index/index-storage.h src/lib-storage/index/maildir/maildir-storage.c src/lib-storage/index/mbox/mbox-storage.c src/lib-storage/mail-storage-private.h src/lib-storage/mail-storage.c src/lib-storage/mail-storage.h |
diffstat | 14 files changed, 909 insertions(+), 739 deletions(-) [+] |
line wrap: on
line diff
--- a/src/imap/cmd-sort.c Wed Jun 07 12:05:05 2006 +0300 +++ b/src/imap/cmd-sort.c Thu Jun 08 15:49:31 2006 +0300 @@ -20,28 +20,32 @@ { MAIL_SORT_SUBJECT, "subject" }, { MAIL_SORT_TO, "to" }, - { MAIL_SORT_REVERSE, "reverse" }, { MAIL_SORT_END, NULL } }; -static enum mail_sort_type * -get_sort_program(struct client_command_context *cmd, struct imap_arg *args) +static int +get_sort_program(struct client_command_context *cmd, struct imap_arg *args, + enum mail_sort_type program[MAX_SORT_PROGRAM_SIZE]) { - enum mail_sort_type type; - buffer_t *buf; - int i; + enum mail_sort_type mask = 0; + unsigned int i, pos; + bool reverse; if (args->type == IMAP_ARG_EOL) { /* empyty list */ client_send_command_error(cmd, "Empty sort program."); - return NULL; + return -1; } - buf = buffer_create_dynamic(pool_datastack_create(), - 32 * sizeof(enum mail_sort_type)); + pos = 0; reverse = FALSE; + for (; args->type == IMAP_ARG_ATOM || args->type == IMAP_ARG_STRING; + args++) { + const char *arg = IMAP_ARG_STR(args); - while (args->type == IMAP_ARG_ATOM || args->type == IMAP_ARG_STRING) { - const char *arg = IMAP_ARG_STR(args); + if (strcasecmp(arg, "reverse") == 0) { + reverse = !reverse; + continue; + } for (i = 0; sort_names[i].type != MAIL_SORT_END; i++) { if (strcasecmp(arg, sort_names[i].name) == 0) @@ -51,31 +55,37 @@ if (sort_names[i].type == MAIL_SORT_END) { client_send_command_error(cmd, t_strconcat( "Unknown sort argument: ", arg, NULL)); - return NULL; + return -1; } - buffer_append(buf, &sort_names[i].type, - sizeof(enum mail_sort_type)); - args++; + if ((mask & sort_names[i].type) != 0) + continue; + mask |= sort_names[i].type; + + /* @UNSAFE: mask check should prevent us from ever + overflowing */ + i_assert(pos < MAX_SORT_PROGRAM_SIZE-1); + program[pos++] = sort_names[i].type | + (reverse ? MAIL_SORT_FLAG_REVERSE : 0); + reverse = FALSE; } - type = MAIL_SORT_END; - buffer_append(buf, &type, sizeof(type)); + program[pos++] = MAIL_SORT_END; if (args->type != IMAP_ARG_EOL) { client_send_command_error(cmd, "Invalid sort list argument."); - return NULL; + return -1; } - return buffer_free_without_data(buf); + return 0; } bool cmd_sort(struct client_command_context *cmd) { struct client *client = cmd->client; struct mail_search_arg *sargs; - enum mail_sort_type *sorting; + enum mail_sort_type sorting[MAX_SORT_PROGRAM_SIZE]; struct imap_arg *args; int args_count; pool_t pool; @@ -100,8 +110,7 @@ return TRUE; } - sorting = get_sort_program(cmd, IMAP_ARG_LIST(args)->args); - if (sorting == NULL) + if (get_sort_program(cmd, IMAP_ARG_LIST(args)->args, sorting) < 0) return TRUE; args++;
--- a/src/imap/imap-sort.c Wed Jun 07 12:05:05 2006 +0300 +++ b/src/imap/imap-sort.c Thu Jun 08 15:49:31 2006 +0300 @@ -6,7 +6,7 @@ reasonably fast. */ #include "common.h" -#include "buffer.h" +#include "array.h" #include "hash.h" #include "ostream.h" #include "str.h" @@ -28,701 +28,87 @@ ((type) == MAIL_SORT_ARRIVAL || (type) == MAIL_SORT_DATE) struct sort_context { - struct mail_search_context *search_ctx; - struct mailbox_transaction_context *t; - struct mail *other_mail; - enum mail_sort_type sort_program[MAX_SORT_PROGRAM_SIZE]; - enum mail_sort_type common_mask, cache_mask; struct mailbox *box; struct ostream *output; string_t *str; - buffer_t *sort_buffer; - size_t sort_element_size; - - pool_t str_pool; - struct hash_table *string_table; - - time_t last_arrival, last_date; - uoff_t last_size; - char *last_cc, *last_from, *last_subject, *last_to; - - bool written, id_is_uid; + bool written; }; -static void mail_sort_input(struct sort_context *ctx, struct mail *mail); -static void mail_sort_flush(struct sort_context *ctx); - -static void -mail_sort_normalize(const enum mail_sort_type *input, buffer_t *output) -{ - enum mail_sort_type type, mask = 0; - bool reverse; - - reverse = FALSE; - for (; *input != MAIL_SORT_END; input++) { - if (*input == MAIL_SORT_REVERSE) - reverse = !reverse; - else { - if ((mask & *input) == 0) { - if (reverse) { - type = MAIL_SORT_REVERSE; - buffer_append(output, - &type, sizeof(type)); - } - - buffer_append(output, input, sizeof(*input)); - mask |= *input; - } - - reverse = FALSE; - } - } - - type = MAIL_SORT_END; - buffer_append(output, &type, sizeof(type)); -} - -static enum mail_sort_type -mail_sort_get_common_mask(const enum mail_sort_type *sort1, - const enum mail_sort_type *sort2, - unsigned int *count) -{ - enum mail_sort_type mask = 0; - - *count = 0; - while (*sort1 == *sort2 && *sort1 != MAIL_SORT_END) { - if (*sort1 != MAIL_SORT_REVERSE) - mask |= *sort1; - sort1++; sort2++; (*count)++; - } - - return mask; -} - -static enum mail_fetch_field -init_sort_elements(struct sort_context *ctx, - const char *wanted_headers[MAX_WANTED_HEADERS]) -{ - unsigned int i; - enum mail_fetch_field fields; - - /* figure out what data we'd like to cache */ - ctx->sort_element_size = sizeof(unsigned int); - ctx->cache_mask = 0; - - for (i = 0; ctx->sort_program[i] != MAIL_SORT_END; i++) { - enum mail_sort_type type = ctx->sort_program[i]; - - if (IS_SORT_STRING(type)) { - ctx->sort_element_size += sizeof(const char *); - - /* cache the second rule as well, if available */ - if (ctx->cache_mask != 0) { - ctx->cache_mask |= type; - break; - } - ctx->cache_mask |= type; - } else if (IS_SORT_TIME(type)) { - ctx->sort_element_size += sizeof(time_t); - ctx->cache_mask |= type; - break; - } else if (type == MAIL_SORT_SIZE) { - ctx->sort_element_size += sizeof(uoff_t); - ctx->cache_mask |= type; - break; - } - } - - fields = 0; - if (ctx->cache_mask & MAIL_SORT_ARRIVAL) - fields |= MAIL_FETCH_RECEIVED_DATE; - if (ctx->cache_mask & MAIL_SORT_DATE) - fields |= MAIL_FETCH_DATE; - if (ctx->cache_mask & MAIL_SORT_SIZE) - fields |= MAIL_FETCH_VIRTUAL_SIZE; - - /* @UNSAFE */ - i_assert(MAX_WANTED_HEADERS > 4); - i = 0; - if (ctx->cache_mask & MAIL_SORT_CC) - wanted_headers[i++] = "cc"; - if (ctx->cache_mask & MAIL_SORT_FROM) - wanted_headers[i++] = "from"; - if (ctx->cache_mask & MAIL_SORT_TO) - wanted_headers[i++] = "to"; - if (ctx->cache_mask & MAIL_SORT_SUBJECT) - wanted_headers[i++] = "subject"; - wanted_headers[i] = NULL; - - if ((ctx->cache_mask & MAIL_SORT_CC) || - (ctx->cache_mask & MAIL_SORT_FROM) || - (ctx->cache_mask & MAIL_SORT_TO) || - (ctx->cache_mask & MAIL_SORT_SUBJECT)) { - ctx->str_pool = pool_alloconly_create("sort str", 8192); - ctx->string_table = hash_create(default_pool, ctx->str_pool, - 0, str_hash, - (hash_cmp_callback_t *)strcmp); - } - - return fields; -} - -static void mail_sort_deinit(struct sort_context *ctx) -{ - mail_sort_flush(ctx); - - if (ctx->string_table != NULL) - hash_destroy(ctx->string_table); - if (ctx->str_pool != NULL) - pool_unref(ctx->str_pool); - buffer_free(ctx->sort_buffer); - - i_free(ctx->last_cc); - i_free(ctx->last_from); - i_free(ctx->last_subject); - i_free(ctx->last_to); -} - int imap_sort(struct client_command_context *cmd, const char *charset, struct mail_search_arg *args, const enum mail_sort_type *sort_program) { struct client *client = cmd->client; - enum mail_sort_type norm_prog[MAX_SORT_PROGRAM_SIZE]; - enum mail_fetch_field wanted_fields; - const char *wanted_headers[MAX_WANTED_HEADERS]; + const char *wanted_headers[2]; + enum mail_fetch_field wanted_fields; + struct mail_search_context *search_ctx; + struct mailbox_transaction_context *t; struct mailbox_header_lookup_ctx *headers_ctx; - struct sort_context *ctx; struct mail *mail; - buffer_t *buf; - unsigned int count; + string_t *str; + bool written = FALSE; int ret; - ctx = t_new(struct sort_context, 1); - - /* normalize sorting program. note that although we're using a hard - buffer size here, it shouldn't be possible to overflow it since - the normalized sort program can't exceed MAX_SORT_PROGRAM_SIZE. */ - buf = buffer_create_data(pool_datastack_create(), - norm_prog, sizeof(norm_prog)); - mail_sort_normalize(sort_program, buf); - memcpy(ctx->sort_program, norm_prog, sizeof(ctx->sort_program)); - - /* remove the common part from sort program, we already know input is - sorted that much so we don't have to worry about it. */ - if (mailbox_search_get_sorting(client->mailbox, norm_prog) < 0) - return -1; - ctx->common_mask = mail_sort_get_common_mask(ctx->sort_program, - norm_prog, &count); - if (count > 0) { - memmove(ctx->sort_program, ctx->sort_program + count, - sizeof(ctx->sort_program) - - sizeof(ctx->sort_program[0]) * count); + wanted_fields = 0; + wanted_headers[0] = wanted_headers[1] = NULL; + switch (*sort_program & MAIL_SORT_MASK) { + case MAIL_SORT_ARRIVAL: + wanted_fields = MAIL_FETCH_RECEIVED_DATE; + break; + case MAIL_SORT_CC: + wanted_headers[0] = "Cc"; + break; + case MAIL_SORT_DATE: + wanted_fields = MAIL_FETCH_DATE; + break; + case MAIL_SORT_FROM: + wanted_headers[0] = "From"; + break; + case MAIL_SORT_SIZE: + wanted_fields = MAIL_FETCH_VIRTUAL_SIZE; + break; + case MAIL_SORT_SUBJECT: + wanted_headers[0] = "Subject"; + break; + case MAIL_SORT_TO: + wanted_headers[0] = "To"; + break; } - memset(wanted_headers, 0, sizeof(wanted_headers)); - wanted_fields = init_sort_elements(ctx, wanted_headers); - headers_ctx = *wanted_headers == NULL ? NULL : + headers_ctx = wanted_headers[0] == NULL ? NULL : mailbox_header_lookup_init(client->mailbox, wanted_headers); - /* initialize searching */ - ctx->t = mailbox_transaction_begin(client->mailbox, 0); - ctx->search_ctx = mailbox_search_init(ctx->t, charset, args, norm_prog); - ctx->other_mail = mail_alloc(ctx->t, wanted_fields, headers_ctx); + t = mailbox_transaction_begin(client->mailbox, 0); + search_ctx = mailbox_search_init(t, charset, args, sort_program); - ctx->box = client->mailbox; - ctx->output = client->output; - ctx->sort_buffer = buffer_create_dynamic(system_pool, - 128 * ctx->sort_element_size); - - ctx->str = t_str_new(STRBUF_SIZE); - str_append(ctx->str, "* SORT"); + str = t_str_new(STRBUF_SIZE); + str_append(str, "* SORT"); - ctx->id_is_uid = cmd->uid; - - mail = mail_alloc(ctx->t, wanted_fields, headers_ctx); - while (mailbox_search_next(ctx->search_ctx, mail) > 0) - mail_sort_input(ctx, mail); + mail = mail_alloc(t, wanted_fields, headers_ctx); + while (mailbox_search_next(search_ctx, mail) > 0) { + if (str_len(str) >= STRBUF_SIZE-MAX_INT_STRLEN) { + o_stream_send(client->output, str_data(str), + str_len(str)); + str_truncate(str, 0); + written = TRUE; + } + str_printfa(str, " %u", cmd->uid ? mail->uid : mail->seq); + } + ret = mailbox_search_deinit(&search_ctx); + mail_free(&mail); - mail_sort_flush(ctx); - ret = mailbox_search_deinit(&ctx->search_ctx); - - mail_free(&mail); - mail_free(&ctx->other_mail); - - if (mailbox_transaction_commit(&ctx->t, 0) < 0) + if (mailbox_transaction_commit(&t, 0) < 0) ret = -1; - if (ctx->written || ret == 0) { - str_append(ctx->str, "\r\n"); - o_stream_send(client->output, str_data(ctx->str), - str_len(ctx->str)); + if (written || ret == 0) { + str_append(str, "\r\n"); + o_stream_send(client->output, str_data(str), str_len(str)); } if (headers_ctx != NULL) mailbox_header_lookup_deinit(&headers_ctx); - mail_sort_deinit(ctx); return ret; } - -static const char *string_table_get(struct sort_context *ctx, const char *str) -{ - char *value; - - if (str == NULL) - return NULL; - if (*str == '\0') - return ""; - - value = hash_lookup(ctx->string_table, str); - if (value == NULL) { - value = p_strdup(ctx->str_pool, str); - hash_insert(ctx->string_table, value, value); - } - - return value; -} - -static const char *get_first_mailbox(struct mail *mail, const char *field) -{ - struct message_address *addr; - const char *str; - - str = mail_get_first_header(mail, field); - if (str == NULL) - return NULL; - - addr = message_address_parse(pool_datastack_create(), - (const unsigned char *)str, - strlen(str), 1); - return addr != NULL ? addr->mailbox : NULL; -} - -static void mail_sort_check_flush(struct sort_context *ctx, struct mail *mail) -{ - const char *str; - time_t t; - uoff_t size; - bool changed = FALSE; - - if (ctx->common_mask & MAIL_SORT_ARRIVAL) { - t = mail_get_received_date(mail); - if (t != ctx->last_arrival) { - ctx->last_arrival = t; - changed = TRUE; - } - } - - if (ctx->common_mask & MAIL_SORT_CC) { - str = get_first_mailbox(mail, "cc"); - if (str != NULL) - str = t_str_ucase(str); - - if (null_strcmp(str, ctx->last_cc) != 0) { - i_free(ctx->last_cc); - ctx->last_cc = i_strdup(str); - changed = TRUE; - } - } - - if (ctx->common_mask & MAIL_SORT_DATE) { - t = mail_get_date(mail, NULL); - if (t != ctx->last_date) { - ctx->last_date = t; - changed = TRUE; - } - } - - if (ctx->common_mask & MAIL_SORT_FROM) { - str = get_first_mailbox(mail, "from"); - if (str != NULL) - str = t_str_ucase(str); - - if (null_strcmp(str, ctx->last_from) != 0) { - i_free(ctx->last_from); - ctx->last_from = i_strdup(str); - changed = TRUE; - } - } - - if (ctx->common_mask & MAIL_SORT_SIZE) { - size = mail_get_virtual_size(mail); - if (size != ctx->last_size) { - ctx->last_size = size; - changed = TRUE; - } - } - - if (ctx->common_mask & MAIL_SORT_SUBJECT) { - str = mail_get_first_header(mail, "subject"); - if (str != NULL) { - str = imap_get_base_subject_cased( - pool_datastack_create(), str, NULL); - } - - if (null_strcmp(str, ctx->last_subject) != 0) { - i_free(ctx->last_subject); - ctx->last_subject = i_strdup(str); - changed = TRUE; - } - } - - if (ctx->common_mask & MAIL_SORT_TO) { - str = get_first_mailbox(mail, "to"); - if (str != NULL) - str = t_str_ucase(str); - - if (null_strcmp(str, ctx->last_to) != 0) { - i_free(ctx->last_to); - ctx->last_to = i_strdup(str); - changed = TRUE; - } - } - - if (changed) - mail_sort_flush(ctx); -} - -static void mail_sort_input(struct sort_context *ctx, struct mail *mail) -{ - /* @UNSAFE */ - unsigned char *buf; - unsigned int id; - time_t t; - uoff_t size; - const char *str; - size_t pos; - - t_push(); - if (ctx->common_mask != 0) - mail_sort_check_flush(ctx, mail); - - buf = buffer_append_space_unsafe(ctx->sort_buffer, - ctx->sort_element_size); - id = ctx->id_is_uid ? mail->uid : mail->seq; - memcpy(buf, &id, sizeof(id)); pos = sizeof(id); - - if (ctx->cache_mask & MAIL_SORT_ARRIVAL) { - if (ctx->common_mask & MAIL_SORT_ARRIVAL) - t = ctx->last_arrival; - else - t = mail_get_received_date(mail); - memcpy(buf + pos, &t, sizeof(t)); pos += sizeof(t); - } - - if (ctx->cache_mask & MAIL_SORT_DATE) { - if (ctx->common_mask & MAIL_SORT_DATE) - t = ctx->last_date; - else - t = mail_get_date(mail, NULL); - memcpy(buf + pos, &t, sizeof(t)); pos += sizeof(t); - } - - if (ctx->cache_mask & MAIL_SORT_SIZE) { - if (ctx->common_mask & MAIL_SORT_SIZE) - size = ctx->last_size; - else - size = mail_get_virtual_size(mail); - - memcpy(buf + pos, &size, sizeof(size)); pos += sizeof(size); - } - - if (ctx->cache_mask & MAIL_SORT_CC) { - if (ctx->common_mask & MAIL_SORT_CC) - str = ctx->last_cc; - else { - str = get_first_mailbox(mail, "cc"); - if (str != NULL) - str = t_str_ucase(str); - } - str = string_table_get(ctx, str); - - memcpy(buf + pos, &str, sizeof(const char *)); - pos += sizeof(const char *); - } - - if (ctx->cache_mask & MAIL_SORT_FROM) { - if (ctx->common_mask & MAIL_SORT_FROM) - str = ctx->last_from; - else { - str = get_first_mailbox(mail, "from"); - if (str != NULL) - str = t_str_ucase(str); - } - str = string_table_get(ctx, str); - - memcpy(buf + pos, &str, sizeof(const char *)); - pos += sizeof(const char *); - } - - if (ctx->cache_mask & MAIL_SORT_TO) { - if (ctx->common_mask & MAIL_SORT_TO) - str = ctx->last_to; - else { - str = get_first_mailbox(mail, "to"); - if (str != NULL) - str = t_str_ucase(str); - } - str = string_table_get(ctx, str); - - memcpy(buf + pos, &str, sizeof(const char *)); - pos += sizeof(const char *); - } - - if (ctx->cache_mask & MAIL_SORT_SUBJECT) { - if (ctx->common_mask & MAIL_SORT_SUBJECT) - str = ctx->last_subject; - else { - str = mail_get_first_header(mail, "subject"); - - if (str != NULL) { - str = imap_get_base_subject_cased( - pool_datastack_create(), str, NULL); - } - } - str = string_table_get(ctx, str); - - memcpy(buf + pos, &str, sizeof(const char *)); - pos += sizeof(const char *); - } - - i_assert(pos == ctx->sort_element_size); - - t_pop(); -} - -static struct sort_context *qsort_context; - -static struct mail *get_mail(struct sort_context *ctx, const unsigned char *buf) -{ - unsigned int id = *((const unsigned int *)buf); - uint32_t seq; - - if (!ctx->id_is_uid) - seq = id; - else { - if (mailbox_get_uids(ctx->box, id, id, &seq, &seq) < 0) - return NULL; - if (seq == 0) - return NULL; - } - - if (mail_set_seq(ctx->other_mail, seq) < 0) - return NULL; - return ctx->other_mail; - -} - -static time_t get_time(enum mail_sort_type type, const unsigned char *buf, - struct sort_context *ctx) -{ - time_t t; - - if ((ctx->cache_mask & type) == 0) { - struct mail *mail = get_mail(ctx, buf); - - if (mail == NULL) - return 0; - - switch (type) { - case MAIL_SORT_ARRIVAL: - return mail_get_received_date(mail); - case MAIL_SORT_DATE: - t = mail_get_date(mail, NULL); - if (t == (time_t)-1) - t = 0; - return t; - default: - i_unreached(); - return 0; - } - } - - /* use memcpy() to avoid any alignment problems */ - memcpy(&t, buf + sizeof(unsigned int), sizeof(t)); - return t; -} - -static uoff_t get_uofft(enum mail_sort_type type, const unsigned char *buf, - struct sort_context *ctx) -{ - uoff_t size; - - if ((ctx->cache_mask & type) == 0) { - struct mail *mail = get_mail(ctx, buf); - - if (mail == NULL) - return 0; - - i_assert(type == MAIL_SORT_SIZE); - - return mail_get_virtual_size(mail); - } - - /* use memcpy() to avoid any alignment problems */ - memcpy(&size, buf + sizeof(unsigned int), sizeof(size)); - return size; -} - -static const char *get_str(enum mail_sort_type type, const unsigned char *buf, - struct sort_context *ctx) -{ - const char *str; - enum mail_sort_type type2; - pool_t pool; - int pos; - - if ((ctx->cache_mask & type) == 0) { - struct mail *mail = get_mail(ctx, buf); - - if (mail == NULL) - return NULL; - - switch (type) { - case MAIL_SORT_SUBJECT: - str = mail_get_first_header(mail, "subject"); - if (str == NULL) - return NULL; - - pool = pool_datastack_create(); - return imap_get_base_subject_cased(pool, str, NULL); - case MAIL_SORT_CC: - str = get_first_mailbox(mail, "cc"); - break; - case MAIL_SORT_FROM: - str = get_first_mailbox(mail, "from"); - break; - case MAIL_SORT_TO: - str = get_first_mailbox(mail, "to"); - break; - default: - i_unreached(); - } - - if (str != NULL) - str = t_str_ucase(str); - return str; - } - - /* figure out where it is. pretty ugly. */ - type2 = (ctx->cache_mask & ~type); - - if (type2 == 0) - pos = 0; - else if (IS_SORT_TIME(type2)) - pos = sizeof(time_t); - else if (type2 == MAIL_SORT_SIZE) - pos = sizeof(uoff_t); - else { - if (type == MAIL_SORT_SUBJECT) - pos = sizeof(const char *); - else if (type2 != MAIL_SORT_SUBJECT && type > type2) - pos = sizeof(const char *); - else - pos = 0; - } - - /* use memcpy() to avoid any alignment problems */ - memcpy(&str, buf + pos + sizeof(unsigned int), sizeof(const char *)); - return str; -} - -static int mail_sort_qsort_func(const void *p1, const void *p2) -{ - enum mail_sort_type *sorting; - int ret; - bool reverse = FALSE; - - sorting = qsort_context->sort_program; - - t_push(); - - ret = 0; - for (; *sorting != MAIL_SORT_END && ret == 0; sorting++) { - if (*sorting == MAIL_SORT_REVERSE) { - reverse = !reverse; - continue; - } - - switch (*sorting) { - case MAIL_SORT_ARRIVAL: - case MAIL_SORT_DATE: { - time_t r1, r2; - - r1 = get_time(*sorting, p1, qsort_context); - r2 = get_time(*sorting, p2, qsort_context); - ret = r1 < r2 ? -1 : r1 > r2 ? 1 : 0; - break; - } - case MAIL_SORT_SIZE: { - uoff_t r1, r2; - - r1 = get_uofft(*sorting, p1, qsort_context); - r2 = get_uofft(*sorting, p2, qsort_context); - ret = r1 < r2 ? -1 : r1 > r2 ? 1 : 0; - break; - } - case MAIL_SORT_CC: - case MAIL_SORT_FROM: - case MAIL_SORT_TO: - case MAIL_SORT_SUBJECT: - ret = null_strcmp(get_str(*sorting, p1, qsort_context), - get_str(*sorting, p2, qsort_context)); - break; - default: - i_unreached(); - } - - if (reverse) { - if (ret > 0) - ret = -1; - else if (ret < 0) - ret = 1; - } - - reverse = FALSE; - } - - t_pop(); - - return ret != 0 ? ret : - (*((const unsigned int *)p1) < - *((const unsigned int *)p2) ? -1 : 1); -} - -static void mail_sort_flush(struct sort_context *ctx) -{ - unsigned char *arr; - size_t i, count; - - qsort_context = ctx; - - arr = buffer_get_modifyable_data(ctx->sort_buffer, NULL); - count = buffer_get_used_size(ctx->sort_buffer) / ctx->sort_element_size; - if (count == 0) - return; - - qsort(arr, count, ctx->sort_element_size, mail_sort_qsort_func); - - for (i = 0; i < count; i++, arr += ctx->sort_element_size) { - if (str_len(ctx->str) >= STRBUF_SIZE-MAX_INT_STRLEN) { - /* flush */ - o_stream_send(ctx->output, - str_data(ctx->str), str_len(ctx->str)); - str_truncate(ctx->str, 0); - ctx->written = TRUE; - } - - str_printfa(ctx->str, " %u", *((unsigned int *) arr)); - } - - buffer_set_used_size(ctx->sort_buffer, 0); - - if (ctx->string_table != NULL) { - hash_clear(ctx->string_table, TRUE); - p_clear(ctx->str_pool); - } -}
--- a/src/lib-storage/index/Makefile.am Wed Jun 07 12:05:05 2006 +0300 +++ b/src/lib-storage/index/Makefile.am Thu Jun 08 15:49:31 2006 +0300 @@ -15,6 +15,7 @@ index-mail-headers.c \ index-mailbox-check.c \ index-search.c \ + index-sort.c \ index-status.c \ index-storage.c \ index-sync.c \ @@ -22,4 +23,5 @@ noinst_HEADERS = \ index-mail.h \ + index-sort.h \ index-storage.h
--- a/src/lib-storage/index/dbox/dbox-storage.c Wed Jun 07 12:05:05 2006 +0300 +++ b/src/lib-storage/index/dbox/dbox-storage.c Thu Jun 08 15:49:31 2006 +0300 @@ -708,7 +708,6 @@ index_mail_alloc, index_header_lookup_init, index_header_lookup_deinit, - index_storage_search_get_sorting, index_storage_search_init, index_storage_search_deinit, index_storage_search_next,
--- a/src/lib-storage/index/index-mail.c Wed Jun 07 12:05:05 2006 +0300 +++ b/src/lib-storage/index/index-mail.c Thu Jun 08 15:49:31 2006 +0300 @@ -873,6 +873,9 @@ struct mail_cache_view *cache_view = mail->trans->cache_view; const struct mail_index_record *rec; + if (data->seq == seq) + return 0; + if (mail_index_lookup(mail->trans->trans_view, seq, &rec) < 0) { mail_storage_set_index_error(mail->ibox); return -1;
--- a/src/lib-storage/index/index-search.c Wed Jun 07 12:05:05 2006 +0300 +++ b/src/lib-storage/index/index-search.c Thu Jun 08 15:49:31 2006 +0300 @@ -1,4 +1,4 @@ -/* Copyright (C) 2002 Timo Sirainen */ +/* Copyright (C) 2002-2006 Timo Sirainen */ #include "lib.h" #include "istream.h" @@ -11,6 +11,7 @@ #include "imap-date.h" #include "index-storage.h" #include "index-mail.h" +#include "index-sort.h" #include "mail-search.h" #include <stdlib.h> @@ -25,6 +26,7 @@ struct index_mailbox *ibox; char *charset; struct mail_search_arg *args; + struct mail_search_sort_program *sort_program; uint32_t seq1, seq2; struct mail *mail; @@ -34,6 +36,7 @@ const char *error; unsigned int failed:1; + unsigned int sorted:1; unsigned int have_seqsets:1; }; @@ -764,14 +767,6 @@ return 0; } -int index_storage_search_get_sorting(struct mailbox *box __attr_unused__, - enum mail_sort_type *sort_program) -{ - /* currently we don't support sorting */ - *sort_program = MAIL_SORT_END; - return 0; -} - struct mail_search_context * index_storage_search_init(struct mailbox_transaction_context *_t, const char *charset, struct mail_search_arg *args, @@ -781,17 +776,13 @@ (struct index_transaction_context *)_t; struct index_search_context *ctx; - if (sort_program != NULL && *sort_program != MAIL_SORT_END) { - i_fatal("BUG: index_storage_search_init(): " - "invalid sort_program"); - } - ctx = i_new(struct index_search_context, 1); ctx->mail_ctx.transaction = _t; ctx->ibox = t->ibox; ctx->view = t->trans_view; ctx->charset = i_strdup(charset); ctx->args = args; + ctx->sort_program = index_sort_program_init(_t, sort_program); mail_search_args_reset(ctx->args, TRUE); @@ -823,6 +814,8 @@ if (ctx->hdr_pool != NULL) pool_unref(ctx->hdr_pool); + if (ctx->sort_program != NULL) + index_sort_program_deinit(&ctx->sort_program); i_free(ctx->charset); i_free(ctx); return ret; @@ -868,6 +861,9 @@ struct mailbox *box = _ctx->transaction->box; int ret; + if (ctx->sorted) + return index_sort_list_next(ctx->sort_program, mail); + ctx->mail = mail; ctx->imail = (struct index_mail *)mail; @@ -885,14 +881,28 @@ if (ctx->error != NULL) ret = -1; - if (ret != 0) - break; + if (ret != 0) { + if (ctx->sort_program == NULL) + break; + + if (index_sort_list_add(ctx->sort_program, mail) < 0) { + ret = -1; + break; + } + } } if (ret < 0) ctx->failed = TRUE; ctx->mail = NULL; ctx->imail = NULL; + if (ctx->sort_program != NULL && ret == 0) { + ctx->sorted = TRUE; + if (index_sort_list_finish(ctx->sort_program) < 0) + return -1; + return index_sort_list_next(ctx->sort_program, mail); + } + return ret; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-storage/index/index-sort.c Thu Jun 08 15:49:31 2006 +0300 @@ -0,0 +1,762 @@ +/* Copyright (C) 2006 Timo Sirainen */ + +/* The idea in here is that for each used primary sort condition there's + a 32bit integer in the index file which specifies the sort order. So when + sorting we simply look up the sort IDs and sort the messages by them. + + Sort IDs are allocated in two ways: + + 1) Time and size fields can be directly used as sort IDs, so we simply + use them directly as the missing sort IDs. + + 2) Strings can't be used as sort IDs directly. The way they're currently + handled is that the whole 32bit integer space is used for them and whenever + adding a string, the available space is halved and the new ID is added in + the middle. For example if we add one mail the first time, it gets ID + 2^31. If we then add two mails which are sorted before the first one, they + get IDs 2^31/3 and 2^31/3*2. Once we run out of the available space between + IDs, a large amount of the IDs are renumbered. + + We try to avoid looking at mails' contents as much as possible. For case 1) + IDs it's simple because we need to get only the new mails' sort fields once + and use them as sort IDs. For case 2) we'll have to start looking at the + strings from older mails as well. To minimize this, we first sort the + existing sort IDs. After that we start inserting the new mails into the + sorted array by looking the position using binary search. This minimizes + the number of lookups we have to do for the old mails. Usually only a few + mails have been added, so this should be faster than other sort methods. +*/ + +#include "lib.h" +#include "array.h" +#include "bsearch-insert-pos.h" +#include "str.h" +#include "message-address.h" +#include "imap-base-subject.h" +#include "index-storage.h" +#include "index-sort.h" + +#include <stdlib.h> + +#define RENUMBER_SPACE 100 + +struct mail_sort_node { + uint32_t seq; + uint32_t sort_id; +}; + +struct mail_search_sort_program { + struct mailbox_transaction_context *t; + enum mail_sort_type sort_program[MAX_SORT_PROGRAM_SIZE]; + const char *primary_sort_header; + struct mail *temp_mail; + + array_t ARRAY_DEFINE(nodes, struct mail_sort_node); + const struct mail_sort_node *nodes_ptr; + unsigned int nodes_count, iter_idx; + + array_t ARRAY_DEFINE(all_nodes, struct mail_sort_node); + + uint32_t ext_id; + uint32_t prev_seq, last_sorted_seq; + + unsigned int reverse:1; + unsigned int skipped_mails:1; + unsigned int sort_ids_added:1; +}; + +struct sort_cmp_context { + struct mail_search_sort_program *program; + struct mail *mail; + + uint32_t cache_seq; + enum mail_sort_type cache_type; + time_t cache_time; + uoff_t cache_size; + const char *cache_str; +}; + +static struct sort_cmp_context static_node_cmp_context; + +struct mail_search_sort_program * +index_sort_program_init(struct mailbox_transaction_context *t, + const enum mail_sort_type *sort_program) +{ + struct index_mailbox *ibox = (struct index_mailbox *)t->box; + struct mail_search_sort_program *program; + const char *name; + unsigned int i; + + if (sort_program == NULL || sort_program[0] == MAIL_SORT_END) + return NULL; + + /* we support internal sorting by the primary condition */ + program = i_new(struct mail_search_sort_program, 1); + program->t = t; + program->temp_mail = mail_alloc(t, 0, NULL); + program->reverse = + (program->sort_program[0] & MAIL_SORT_FLAG_REVERSE) != 0; + ARRAY_CREATE(&program->nodes, default_pool, struct mail_sort_node, 64); + + for (i = 0; i < MAX_SORT_PROGRAM_SIZE; i++) { + program->sort_program[i] = sort_program[i]; + if (sort_program[i] == MAIL_SORT_END) + break; + } + if (i == MAX_SORT_PROGRAM_SIZE) + i_panic("index_sort_program_init(): Invalid sort program"); + + switch (program->sort_program[0] & MAIL_SORT_MASK) { + case MAIL_SORT_ARRIVAL: + name = "rdate"; + break; + case MAIL_SORT_CC: + name = "sort-c"; + program->primary_sort_header = "Cc"; + break; + case MAIL_SORT_DATE: + name = "date"; + break; + case MAIL_SORT_FROM: + name = "sort-f"; + program->primary_sort_header = "From"; + break; + case MAIL_SORT_SIZE: + name = "size"; + break; + case MAIL_SORT_SUBJECT: + name = "sort-s"; + program->primary_sort_header = "Subject"; + break; + case MAIL_SORT_TO: + name = "sort-t"; + program->primary_sort_header = "To"; + break; + default: + i_unreached(); + } + program->ext_id = + mail_index_ext_register(ibox->index, name, 0, + sizeof(uint32_t), sizeof(uint32_t)); + return program; +} + +void index_sort_program_deinit(struct mail_search_sort_program **_program) +{ + struct mail_search_sort_program *program = *_program; + + *_program = NULL; + mail_free(&program->temp_mail); + array_free(&program->nodes); + i_free(program); +} + +static const char *get_first_mailbox(struct mail *mail, const char *header) +{ + struct message_address *addr; + const char *str; + + str = mail_get_first_header(mail, header); + if (str == NULL) + return ""; + + addr = message_address_parse(pool_datastack_create(), + (const unsigned char *)str, + strlen(str), 1); + return addr != NULL ? addr->mailbox : ""; +} + +static const char * +sort_header_get(enum mail_sort_type sort_type, struct mail *mail, uint32_t seq) +{ + const char *str; + + mail_set_seq(mail, seq); + switch (sort_type & MAIL_SORT_MASK) { + case MAIL_SORT_SUBJECT: + str = mail_get_first_header(mail, "Subject"); + return str == NULL ? "" : + imap_get_base_subject_cased(pool_datastack_create(), + str, NULL); + case MAIL_SORT_CC: + return get_first_mailbox(mail, "Cc"); + case MAIL_SORT_FROM: + return get_first_mailbox(mail, "From"); + case MAIL_SORT_TO: + return get_first_mailbox(mail, "To"); + default: + i_unreached(); + } +} + +static int sort_node_cmp_type(struct sort_cmp_context *ctx, + const enum mail_sort_type *sort_program, + const struct mail_sort_node *n1, + const struct mail_sort_node *n2) +{ + enum mail_sort_type sort_type; + const char *str1, *str2; + time_t time1, time2; + uoff_t size1, size2; + int ret; + + sort_type = *sort_program & MAIL_SORT_MASK; + switch (sort_type) { + case MAIL_SORT_CC: + case MAIL_SORT_FROM: + case MAIL_SORT_TO: + case MAIL_SORT_SUBJECT: + t_push(); + str1 = n1->seq == ctx->cache_seq && + ctx->cache_type == sort_type ? ctx->cache_str : + sort_header_get(sort_type, ctx->mail, n1->seq); + str2 = sort_header_get(sort_type, ctx->mail, n2->seq); + + ret = strcasecmp(str1, str2); + t_pop(); + break; + case MAIL_SORT_ARRIVAL: + if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type) + time1 = ctx->cache_time; + else { + mail_set_seq(ctx->mail, n1->seq); + time1 = mail_get_received_date(ctx->mail); + if (time1 == (time_t)-1) time1 = 0; + } + + mail_set_seq(ctx->mail, n2->seq); + time2 = mail_get_received_date(ctx->mail); + if (time2 == (time_t)-1) time2 = 0; + + ret = time1 < time2 ? -1 : + (time1 > time2 ? 1 : 0); + break; + case MAIL_SORT_DATE: + if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type) + time1 = ctx->cache_time; + else { + mail_set_seq(ctx->mail, n1->seq); + time1 = mail_get_date(ctx->mail, NULL); + if (time1 == (time_t)-1) time1 = 0; + } + + mail_set_seq(ctx->mail, n2->seq); + time2 = mail_get_date(ctx->mail, NULL); + if (time2 == (time_t)-1) time2 = 0; + + ret = time1 < time2 ? -1 : + (time1 > time2 ? 1 : 0); + break; + case MAIL_SORT_SIZE: + if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type) + size1 = ctx->cache_size; + else { + mail_set_seq(ctx->mail, n1->seq); + size1 = mail_get_virtual_size(ctx->mail); + if (size1 == (uoff_t)-1) size1 = 0; + } + + mail_set_seq(ctx->mail, n2->seq); + size2 = mail_get_virtual_size(ctx->mail); + if (size2 == (uoff_t)-1) size2 = 0; + + ret = size1 < size2 ? -1 : + (size1 > size2 ? 1 : 0); + break; + case MAIL_SORT_END: + return n1->seq < n2->seq ? -1 : + (n1->seq > n2->seq ? 1 : 0); + case MAIL_SORT_MASK: + case MAIL_SORT_FLAG_REVERSE: + i_unreached(); + } + + if (ret == 0) + return sort_node_cmp_type(ctx, sort_program+1, n1, n2); + + if ((*sort_program & MAIL_SORT_FLAG_REVERSE) != 0) + ret = ret < 0 ? 1 : -1; + return ret; +} + +static int sort_node_cmp(const void *p1, const void *p2) +{ + struct sort_cmp_context *ctx = &static_node_cmp_context; + const struct mail_sort_node *n1 = p1, *n2 = p2; + + if (n1->sort_id < n2->sort_id) + return -1; + if (n1->sort_id > n2->sort_id) + return 1; + + return sort_node_cmp_type(ctx, ctx->program->sort_program + 1, n1, n2); +} + +static int sort_node_cmp_no_sort_id(const void *p1, const void *p2) +{ + struct sort_cmp_context *ctx = &static_node_cmp_context; + + return sort_node_cmp_type(ctx, ctx->program->sort_program, p1, p2); +} + +static void +index_sort_save_ids(struct mail_search_sort_program *program, + uint32_t first_seq) +{ + struct index_transaction_context *t = + (struct index_transaction_context *)program->t; + const struct mail_sort_node *nodes; + unsigned int i, count; + + nodes = array_get(&program->all_nodes, &count); + for (i = 0; i < count; i++) { + if (nodes[i].seq < first_seq) + continue; + + mail_index_update_ext(t->trans, nodes[i].seq, + program->ext_id, &nodes[i].sort_id, NULL); + } +} + +static int +index_sort_add_ids_range(struct mail_search_sort_program *program, + struct mail *mail, unsigned int idx1, + unsigned int idx2) +{ + struct mail_sort_node *nodes; + unsigned int i, count; + const char *last_str = ""; + uint32_t prev_id = 0, last_id = (uint32_t)-1; + string_t *prev_str; + const char *str; + unsigned int skip; + int ret = 1; + + t_push(); + nodes = array_get_modifyable(&program->all_nodes, &count); + if (nodes[idx2].sort_id != 0) { + i_assert(idx1 != idx2); + last_id = nodes[idx2].sort_id; + + last_str = sort_header_get(program->sort_program[0], mail, + nodes[idx2].seq); + idx2--; + } + + prev_str = t_str_new(256); + if (nodes[idx1].sort_id != 0) { + prev_id = nodes[idx1].sort_id; + + str_append(prev_str, + sort_header_get(program->sort_program[0], mail, + nodes[idx1].seq)); + idx1++; + } + + for (i = idx1; i <= idx2; i++) { + str = sort_header_get(program->sort_program[0], mail, + nodes[i].seq); + + if (i == idx2 && strcasecmp(str, last_str) == 0) + nodes[i].sort_id = last_id; + else if (strcasecmp(str, str_c(prev_str)) == 0 && prev_id != 0) + nodes[i].sort_id = prev_id; + else { + /* divide the available space so that each message gets + an equal sized share. leave the same sized space + also between the first and the last messages */ + skip = (last_id - prev_id) / (idx2 - i + 2); + nodes[i].sort_id = prev_id + skip; + if (nodes[i].sort_id == prev_id) + nodes[i].sort_id++; + if (nodes[i].sort_id == last_id) { + /* we ran out of ID space. have to renumber + the IDs. */ + ret = 0; + break; + } + + prev_id = nodes[i].sort_id; + str_truncate(prev_str, 0); + str_append(prev_str, str); + } + } + t_pop(); + return ret; +} + +static void +index_sort_renumber_ids(struct mail_search_sort_program *program, + unsigned int idx) +{ + struct index_transaction_context *t = + (struct index_transaction_context *)program->t; + struct mail_sort_node *nodes; + unsigned int i, count; + uint32_t sort_id, prev_sort_id, skip; + + nodes = array_get_modifyable(&program->all_nodes, &count); + prev_sort_id = (uint32_t)-1; + sort_id = nodes[idx].sort_id; + i_assert(sort_id == nodes[idx + 1].sort_id); + + if (((uint32_t)-1 - sort_id) / (count - idx + 1) < RENUMBER_SPACE) { + /* space is running out, lets just renumber everything */ + sort_id = 0; + skip = (uint32_t)-1 / (count + 1); + for (i = 0; i < idx; i++) { + if (sort_id != prev_sort_id) + sort_id += skip; + prev_sort_id = nodes[i].sort_id; + + nodes[i].sort_id = sort_id; + mail_index_update_ext(t->trans, nodes[i].seq, + program->ext_id, + &nodes[i].sort_id, NULL); + } + } else { + skip = RENUMBER_SPACE; + } + + for (i = idx; i < count && sort_id >= nodes[i].sort_id; i++) { + if (sort_id != prev_sort_id) { + i_assert(sort_id <= (uint32_t)-1 - skip); + sort_id += skip; + } + prev_sort_id = nodes[i].sort_id; + + if (nodes[i].sort_id != 0) { + nodes[i].sort_id = sort_id; + mail_index_update_ext(t->trans, nodes[i].seq, + program->ext_id, + &nodes[i].sort_id, NULL); + } + } +} + +static void +index_sort_add_ids(struct mail_search_sort_program *program, struct mail *mail) +{ + const struct mail_sort_node *nodes; + unsigned int i, j, count; + + nodes = array_get(&program->all_nodes, &count); + for (i = 0; i < count; i++) { + if (nodes[i].sort_id == 0) { + for (j = i + 1; j < count; j++) { + if (nodes[j].sort_id != 0) + break; + } + if (index_sort_add_ids_range(program, mail, + i == 0 ? 0 : i-1, + I_MIN(j, count-1)) == 0) + index_sort_renumber_ids(program, i); + } + } +} + +static uint32_t get_sort_id_arrival(struct mail *mail) +{ + time_t time; + + time = mail_get_received_date(mail); + return time == (time_t)-1 ? 0 : time; +} + +static uint32_t get_sort_id_date(struct mail *mail) +{ + time_t time; + + time = mail_get_date(mail, NULL); + return time == (time_t)-1 ? 0 : time; +} + +static uint32_t get_sort_id_size(struct mail *mail) +{ + uoff_t size; + + size = mail_get_virtual_size(mail); + if (size == (uoff_t)-1) + return 0; + + /* FIXME: elsewhere we support 64bit message sizes, but here + we support only 32bit sizes.. It's a bit too much trouble + to support 64bit here currently, so until such messages + actually start showing up somewhere, 32bit is enough */ + i_assert(size <= (uint32_t)-1); + return size == size; +} + +static void index_sort_preset_sort_ids(struct mail_search_sort_program *program, + uint32_t last_seq) +{ + struct mail_sort_node node; + struct mail *mail; + uint32_t (*get_sort_id)(struct mail *); + + switch (program->sort_program[0] & MAIL_SORT_MASK) { + case MAIL_SORT_ARRIVAL: + get_sort_id = get_sort_id_arrival; + break; + case MAIL_SORT_DATE: + get_sort_id = get_sort_id_date; + break; + case MAIL_SORT_SIZE: + get_sort_id = get_sort_id_size; + break; + default: + i_unreached(); + } + + /* add the missing nodes with their sort_ids */ + mail = program->temp_mail; + node.seq = array_count(&program->all_nodes) + 1; + for (; node.seq <= last_seq; node.seq++) { + mail_set_seq(mail, node.seq); + node.sort_id = get_sort_id(mail); + array_append(&program->all_nodes, &node, 1); + } + + /* @UNSAFE: and sort them */ + memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context)); + static_node_cmp_context.program = program; + static_node_cmp_context.mail = mail; + + qsort(array_idx_modifyable(&program->all_nodes, 0), last_seq, + sizeof(struct mail_sort_node), sort_node_cmp); +} + +static void index_sort_cache_seq(struct sort_cmp_context *ctx, + enum mail_sort_type sort_type, uint32_t seq) +{ + ctx->cache_seq = seq; + ctx->cache_type = sort_type & MAIL_SORT_MASK; + + mail_set_seq(ctx->mail, seq); + switch (ctx->cache_type) { + case MAIL_SORT_ARRIVAL: + ctx->cache_time = get_sort_id_arrival(ctx->mail); + break; + case MAIL_SORT_DATE: + ctx->cache_time = get_sort_id_date(ctx->mail); + break; + case MAIL_SORT_SIZE: + ctx->cache_size = get_sort_id_size(ctx->mail); + break; + default: + ctx->cache_str = sort_header_get(sort_type, ctx->mail, seq); + break; + } +} + +static void index_sort_headers(struct mail_search_sort_program *program, + uint32_t last_seq) +{ + struct mail_sort_node *nodes, node; + const struct mail_sort_node *cnodes, *pos; + unsigned int count; + + /* we wish to avoid reading the actual headers as much as possible. + first sort the nodes which already have sort_ids, then start + inserting the new nodes by finding their insertion position with + binary search */ + memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context)); + static_node_cmp_context.program = program; + static_node_cmp_context.mail = program->temp_mail; + + /* @UNSAFE */ + nodes = array_get_modifyable(&program->all_nodes, &count); + if (program->last_sorted_seq != count) { + qsort(nodes, count, sizeof(struct mail_sort_node), + sort_node_cmp); + } + + node.sort_id = 0; + for (node.seq = count + 1; node.seq <= last_seq; node.seq++) { + index_sort_cache_seq(&static_node_cmp_context, + program->sort_program[0], node.seq); + + cnodes = array_get_modifyable(&program->nodes, &count); + pos = bsearch_insert_pos(&node, cnodes, count, sizeof(*cnodes), + sort_node_cmp_no_sort_id); + array_insert(&program->nodes, pos - cnodes, &node, 1); + } + + index_sort_add_ids(program, static_node_cmp_context.mail); +} + +static int index_sort_build(struct mail_search_sort_program *program, + uint32_t last_seq) +{ + struct index_mailbox *ibox = (struct index_mailbox *)program->t->box; + struct mail_sort_node node; + const void *data; + unsigned int i, first_missing_sort_id_seq; + + i = array_count(&program->all_nodes); + if (i == 0) { + /* we're building the array from scratch. add here only the + messages that have sort_ids set. */ + program->last_sorted_seq = 0; + for (; i < last_seq; i++) { + node.seq = i+1; + + if (mail_index_lookup_ext(ibox->view, i+1, + program->ext_id, &data) < 0) + return -1; + + node.sort_id = data == NULL ? 0 : + *(const uint32_t *)data; + if (node.sort_id == 0) { + /* the rest don't have sort_ids either */ + break; + } + array_append(&program->all_nodes, &node, 1); + } + } + first_missing_sort_id_seq = i + 1; + + switch (program->sort_program[0] & MAIL_SORT_MASK) { + case MAIL_SORT_ARRIVAL: + case MAIL_SORT_DATE: + case MAIL_SORT_SIZE: + index_sort_preset_sort_ids(program, last_seq); + break; + default: + index_sort_headers(program, last_seq); + break; + } + index_sort_save_ids(program, first_missing_sort_id_seq); + return 0; +} + +static void index_sort_add_node(struct mail_search_sort_program *program, + const struct mail_sort_node *node) +{ + const struct mail_sort_node *nodes, *pos; + unsigned int count; + + memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context)); + static_node_cmp_context.program = program; + static_node_cmp_context.mail = program->temp_mail; + + nodes = array_get(&program->nodes, &count); + pos = bsearch_insert_pos(node, nodes, count, + sizeof(*node), sort_node_cmp); + array_insert(&program->nodes, pos - nodes, node, 1); + + program->last_sorted_seq = node->seq; + program->prev_seq = node->seq; +} + +int index_sort_list_add(struct mail_search_sort_program *program, + struct mail *mail) +{ + struct index_transaction_context *t = + (struct index_transaction_context *)program->t; + const struct mail_index_header *hdr; + const void *data; + struct mail_sort_node node; + int ret; + + i_assert(mail->transaction == program->t); + + if (program->prev_seq + 1 != mail->seq) + program->skipped_mails = TRUE; + + node.seq = mail->seq; + if (program->last_sorted_seq == program->prev_seq) { + /* we're still on the fast path using sort_ids from the + index file */ + if (mail_index_lookup_ext(t->trans_view, mail->seq, + program->ext_id, &data) < 0) + return -1; + + node.sort_id = data == NULL ? 0 : *(const uint32_t *)data; + if (node.sort_id != 0) { + index_sort_add_node(program, &node); + return 0; + } + i_assert(!program->sort_ids_added); + } else { + node.sort_id = 0; + } + + /* sort_ids are missing, have to generate them */ + if (!program->skipped_mails) { + /* as long as we think we're returning all the mails sorted, + which is the common case, we want to avoid duplicating the + node array. so here we just keep counting the sequences + until either we skip a sequence or we reach list_finish() */ + program->prev_seq = mail->seq; + return 0; + } + + /* we're not returning all the mails. have to create a temporary array + for all the nodes so we can set all the missing sort_ids. */ + hdr = mail_index_get_header(t->ibox->view); + ARRAY_CREATE(&program->all_nodes, default_pool, + struct mail_sort_node, hdr->messages_count); + ret = index_sort_build(program, hdr->messages_count); + array_free(&program->all_nodes); + if (ret < 0) + return -1; + + /* add the nodes in the middle */ + node.seq = program->last_sorted_seq + 1; + for (; node.seq <= program->prev_seq; node.seq++) { + if (mail_index_lookup_ext(t->trans_view, mail->seq, + program->ext_id, &data) < 0) + return -1; + + node.sort_id = *(const uint32_t *)data; + i_assert(node.sort_id != 0); + + index_sort_add_node(program, &node); + } + + /* and add this last node */ + program->sort_ids_added = TRUE; + return index_sort_list_add(program, mail); +} + +int index_sort_list_finish(struct mail_search_sort_program *program) +{ + if (program->last_sorted_seq != program->prev_seq) { + /* nodes array contains a contiguous range of sequences from + the beginning, with the last ones missing sort_id. we can + just sort the array directly without copying it. */ + i_assert(!program->sort_ids_added); + + program->all_nodes = program->nodes; + if (index_sort_build(program, program->prev_seq) < 0) + return -1; + } + + program->nodes_ptr = + array_get(&program->nodes, &program->nodes_count); + + if (program->reverse) + program->iter_idx = program->nodes_count; + return 0; +} + +int index_sort_list_next(struct mail_search_sort_program *program, + struct mail *mail) +{ + const struct mail_sort_node *node; + + if (!program->reverse) { + if (program->iter_idx == program->nodes_count) + return 0; + + node = &program->nodes_ptr[program->iter_idx++]; + } else { + if (program->iter_idx == 0) + return 0; + + node = &program->nodes_ptr[--program->iter_idx]; + } + mail_set_seq(mail, node->seq); + return 1; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/lib-storage/index/index-sort.h Thu Jun 08 15:49:31 2006 +0300 @@ -0,0 +1,18 @@ +#ifndef __INDEX_SORT_H +#define __INDEX_SORT_H + +struct mail_search_sort_program; + +struct mail_search_sort_program * +index_sort_program_init(struct mailbox_transaction_context *t, + const enum mail_sort_type *sort_program); +void index_sort_program_deinit(struct mail_search_sort_program **program); + +int index_sort_list_add(struct mail_search_sort_program *program, + struct mail *mail); +int index_sort_list_finish(struct mail_search_sort_program *program); + +int index_sort_list_next(struct mail_search_sort_program *program, + struct mail *mail); + +#endif
--- a/src/lib-storage/index/index-storage.h Wed Jun 07 12:05:05 2006 +0300 +++ b/src/lib-storage/index/index-storage.h Thu Jun 08 15:49:31 2006 +0300 @@ -168,8 +168,6 @@ index_header_lookup_init(struct mailbox *box, const char *const headers[]); void index_header_lookup_deinit(struct mailbox_header_lookup_ctx *ctx); -int index_storage_search_get_sorting(struct mailbox *box, - enum mail_sort_type *sort_program); struct mail_search_context * index_storage_search_init(struct mailbox_transaction_context *t, const char *charset, struct mail_search_arg *args,
--- a/src/lib-storage/index/maildir/maildir-storage.c Wed Jun 07 12:05:05 2006 +0300 +++ b/src/lib-storage/index/maildir/maildir-storage.c Thu Jun 08 15:49:31 2006 +0300 @@ -966,7 +966,6 @@ index_mail_alloc, index_header_lookup_init, index_header_lookup_deinit, - index_storage_search_get_sorting, index_storage_search_init, index_storage_search_deinit, index_storage_search_next,
--- a/src/lib-storage/index/mbox/mbox-storage.c Wed Jun 07 12:05:05 2006 +0300 +++ b/src/lib-storage/index/mbox/mbox-storage.c Thu Jun 08 15:49:31 2006 +0300 @@ -1121,7 +1121,6 @@ index_mail_alloc, index_header_lookup_init, index_header_lookup_deinit, - index_storage_search_get_sorting, index_storage_search_init, index_storage_search_deinit, index_storage_search_next,
--- a/src/lib-storage/mail-storage-private.h Wed Jun 07 12:05:05 2006 +0300 +++ b/src/lib-storage/mail-storage-private.h Thu Jun 08 15:49:31 2006 +0300 @@ -131,8 +131,6 @@ const char *const headers[]); void (*header_lookup_deinit)(struct mailbox_header_lookup_ctx *ctx); - int (*search_get_sorting)(struct mailbox *box, - enum mail_sort_type *sort_program); struct mail_search_context * (*search_init)(struct mailbox_transaction_context *t, const char *charset, struct mail_search_arg *args,
--- a/src/lib-storage/mail-storage.c Wed Jun 07 12:05:05 2006 +0300 +++ b/src/lib-storage/mail-storage.c Thu Jun 08 15:49:31 2006 +0300 @@ -444,12 +444,6 @@ ctx->box->v.header_lookup_deinit(ctx); } -int mailbox_search_get_sorting(struct mailbox *box, - enum mail_sort_type *sort_program) -{ - return box->v.search_get_sorting(box, sort_program); -} - struct mail_search_context * mailbox_search_init(struct mailbox_transaction_context *t, const char *charset, struct mail_search_arg *args,
--- a/src/lib-storage/mail-storage.h Wed Jun 07 12:05:05 2006 +0300 +++ b/src/lib-storage/mail-storage.h Thu Jun 08 15:49:31 2006 +0300 @@ -86,18 +86,19 @@ }; enum mail_sort_type { -/* Maximum size for sort program, 2x for reverse + END */ -#define MAX_SORT_PROGRAM_SIZE (2*7 + 1) +/* Maximum size for sort program (each one separately + END) */ +#define MAX_SORT_PROGRAM_SIZE (7 + 1) - MAIL_SORT_ARRIVAL = 0x0010, - MAIL_SORT_CC = 0x0020, - MAIL_SORT_DATE = 0x0040, - MAIL_SORT_FROM = 0x0080, - MAIL_SORT_SIZE = 0x0100, - MAIL_SORT_SUBJECT = 0x0200, - MAIL_SORT_TO = 0x0400, + MAIL_SORT_ARRIVAL = 0x0001, + MAIL_SORT_CC = 0x0002, + MAIL_SORT_DATE = 0x0004, + MAIL_SORT_FROM = 0x0008, + MAIL_SORT_SIZE = 0x0010, + MAIL_SORT_SUBJECT = 0x0020, + MAIL_SORT_TO = 0x0040, - MAIL_SORT_REVERSE = 0x0001, /* reverse the next type */ + MAIL_SORT_MASK = 0x0fff, + MAIL_SORT_FLAG_REVERSE = 0x1000, /* reverse this mask type */ MAIL_SORT_END = 0x0000 /* ends sort program */ }; @@ -373,17 +374,9 @@ mailbox_header_lookup_init(struct mailbox *box, const char *const headers[]); void mailbox_header_lookup_deinit(struct mailbox_header_lookup_ctx **ctx); -/* Modify sort_program to specify a sort program acceptable for - search_init(). If mailbox supports no sorting, it's simply set to - {MAIL_SORT_END}. */ -int mailbox_search_get_sorting(struct mailbox *box, - enum mail_sort_type *sort_program); -/* Initialize new search request. Search arguments are given so that - the storage can optimize the searching as it wants. - - If sort_program is non-NULL, it requests that the returned messages - are sorted by the given criteria. sort_program must have gone - through search_get_sorting(). */ +/* Initialize new search request. charset specifies the character set used in + the search argument strings. If sort_program is non-NULL, the messages are + returned in the requested order, otherwise from first to last. */ struct mail_search_context * mailbox_search_init(struct mailbox_transaction_context *t, const char *charset, struct mail_search_arg *args,