Mercurial > dovecot > original-hg > dovecot-1.2
changeset 6968:09e1e8d4aa53 HEAD
Simplified and optimized the sorting code.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 08 Dec 2007 21:36:20 +0200 |
parents | 4c3002f3cd51 |
children | c13473a5c0e4 |
files | src/lib-storage/index/index-sort.c |
diffstat | 1 files changed, 137 insertions(+), 242 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-storage/index/index-sort.c Sat Dec 08 21:28:46 2007 +0200 +++ b/src/lib-storage/index/index-sort.c Sat Dec 08 21:36:20 2007 +0200 @@ -53,28 +53,21 @@ const char *primary_sort_header; struct mail *temp_mail; - ARRAY_TYPE(mail_sort_node) nodes; + ARRAY_TYPE(mail_sort_node) nodes, all_nodes; const struct mail_sort_node *nodes_ptr; unsigned int nodes_count, iter_idx; - ARRAY_TYPE(mail_sort_node) all_nodes; - uint32_t ext_id; - uint32_t prev_seq, last_sorted_seq; + unsigned int first_missing_sort_id_idx; unsigned int reverse:1; - unsigned int skipped_mails:1; unsigned int sort_ids_added:1; + unsigned int missing_sort_ids:1; }; struct sort_cmp_context { struct mail_search_sort_program *program; struct mail *mail; - - uint32_t cache_seq; - enum mail_sort_type cache_type; - uint32_t cache_value; - const char *cache_str; }; static struct sort_cmp_context static_node_cmp_context; @@ -168,19 +161,21 @@ return addr != NULL ? addr->mailbox : ""; } -static const char * -sort_header_get(enum mail_sort_type sort_type, struct mail *mail, uint32_t seq) +static void +sort_header_get(string_t *dest, enum mail_sort_type sort_type, + struct mail *mail, uint32_t seq) { const char *str; - string_t *buf; mail_set_seq(mail, seq); switch (sort_type & MAIL_SORT_MASK) { case MAIL_SORT_SUBJECT: if (mail_get_first_header(mail, "Subject", &str) <= 0) - return ""; - return imap_get_base_subject_cased(pool_datastack_create(), - str, NULL); + return; + str = imap_get_base_subject_cased(pool_datastack_create(), + str, NULL); + str_append(dest, str); + return; case MAIL_SORT_CC: str = get_first_mailbox(mail, "Cc"); break; @@ -194,9 +189,7 @@ i_unreached(); } - buf = t_str_new(128); - (void)uni_utf8_to_decomposed_titlecase(str, (size_t)-1, buf); - return str_c(buf); + (void)uni_utf8_to_decomposed_titlecase(str, (size_t)-1, dest); } static uint32_t sort_get_arrival(struct mail *mail) @@ -259,23 +252,19 @@ case MAIL_SORT_TO: case MAIL_SORT_SUBJECT: T_FRAME_BEGIN { - const char *str1, *str2; + string_t *str1, *str2; - str1 = n1->seq == ctx->cache_seq && - ctx->cache_type == sort_type ? ctx->cache_str : - sort_header_get(sort_type, ctx->mail, n1->seq); - str2 = sort_header_get(sort_type, ctx->mail, n2->seq); + str1 = t_str_new(256); + str2 = t_str_new(256); + sort_header_get(str1, sort_type, ctx->mail, n1->seq); + sort_header_get(str2, sort_type, ctx->mail, n2->seq); - ret = strcmp(str1, str2); + ret = strcmp(str_c(str1), str_c(str2)); } T_FRAME_END; break; case MAIL_SORT_ARRIVAL: - if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type) - time1 = ctx->cache_value; - else { - mail_set_seq(ctx->mail, n1->seq); - time1 = sort_get_arrival(ctx->mail); - } + mail_set_seq(ctx->mail, n1->seq); + time1 = sort_get_arrival(ctx->mail); mail_set_seq(ctx->mail, n2->seq); time2 = sort_get_arrival(ctx->mail); @@ -284,12 +273,8 @@ (time1 > time2 ? 1 : 0); break; case MAIL_SORT_DATE: - if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type) - time1 = ctx->cache_value; - else { - mail_set_seq(ctx->mail, n1->seq); - time1 = sort_get_date(ctx->mail); - } + mail_set_seq(ctx->mail, n1->seq); + time1 = sort_get_date(ctx->mail); mail_set_seq(ctx->mail, n2->seq); time2 = sort_get_date(ctx->mail); @@ -298,12 +283,8 @@ (time1 > time2 ? 1 : 0); break; case MAIL_SORT_SIZE: - if (n1->seq == ctx->cache_seq && ctx->cache_type == sort_type) - size1 = ctx->cache_value; - else { - mail_set_seq(ctx->mail, n1->seq); - size1 = sort_get_size(ctx->mail); - } + mail_set_seq(ctx->mail, n1->seq); + size1 = sort_get_size(ctx->mail); mail_set_seq(ctx->mail, n2->seq); size2 = sort_get_size(ctx->mail); @@ -341,31 +322,24 @@ return sort_node_cmp_type(ctx, ctx->program->sort_program + 1, n1, n2); } -static int sort_node_cmp_no_sort_id(const void *p1, const void *p2) +static int sort_node_cmp_nozero_sort_id(const void *p1, const void *p2) { struct sort_cmp_context *ctx = &static_node_cmp_context; - - return sort_node_cmp_type(ctx, ctx->program->sort_program, p1, p2); -} + const struct mail_sort_node *n1 = p1, *n2 = p2; + const enum mail_sort_type *sort_program; -static void -index_sort_save_ids(struct mail_search_sort_program *program, - uint32_t first_seq) -{ - struct index_transaction_context *t = - (struct index_transaction_context *)program->t; - const struct mail_sort_node *nodes; - unsigned int i, count; + /* Use sort IDs only if both have them */ + if (n1->sort_id != 0 && n2->sort_id != 0) { + if (n1->sort_id < n2->sort_id) + return -1; + if (n1->sort_id > n2->sort_id) + return 1; + sort_program = ctx->program->sort_program + 1; + } else { + sort_program = ctx->program->sort_program; + } - nodes = array_get(&program->all_nodes, &count); - for (i = 0; i < count; i++) { - if (nodes[i].seq < first_seq) - continue; - - i_assert(nodes[i].sort_id != 0); - mail_index_update_ext(t->trans, nodes[i].seq, - program->ext_id, &nodes[i].sort_id, NULL); - } + return sort_node_cmp_type(ctx, ctx->program->sort_program, n1, n2); } static bool @@ -375,19 +349,18 @@ { struct mail_sort_node *nodes; unsigned int i, count; - const char *last_str = ""; uint32_t prev_id = 0, last_id = (uint32_t)-1; - string_t *prev_str; - const char *str; + string_t *last_str, *prev_str, *str; unsigned int skip; + last_str = t_str_new(256); nodes = array_get_modifiable(&program->all_nodes, &count); if (nodes[idx2].sort_id != 0) { i_assert(idx1 != idx2); last_id = nodes[idx2].sort_id; - last_str = sort_header_get(program->sort_program[0], mail, - nodes[idx2].seq); + sort_header_get(last_str, program->sort_program[0], mail, + nodes[idx2].seq); idx2--; } @@ -395,19 +368,21 @@ if (nodes[idx1].sort_id != 0) { prev_id = nodes[idx1].sort_id; - str_append(prev_str, - sort_header_get(program->sort_program[0], mail, - nodes[idx1].seq)); + sort_header_get(prev_str, program->sort_program[0], mail, + nodes[idx1].seq); idx1++; } + str = str_new(default_pool, 256); for (i = idx1; i <= idx2; i++) { - str = sort_header_get(program->sort_program[0], mail, - nodes[i].seq); + T_FRAME( + sort_header_get(str, program->sort_program[0], mail, + nodes[i].seq); + ); - if (i == idx2 && strcmp(str, last_str) == 0) + if (i == idx2 && str_equals(str, last_str)) nodes[i].sort_id = last_id; - else if (strcmp(str, str_c(prev_str)) == 0 && prev_id != 0) + else if (prev_id != 0 && str_equals(str, prev_str) == 0) nodes[i].sort_id = prev_id; else { /* divide the available space so that each message gets @@ -420,14 +395,16 @@ if (nodes[i].sort_id == last_id) { /* we ran out of ID space. have to renumber the IDs. */ + str_free(&str); return FALSE; } prev_id = nodes[i].sort_id; str_truncate(prev_str, 0); - str_append(prev_str, str); + str_append_str(prev_str, str); } } + str_free(&str); return TRUE; } @@ -516,7 +493,6 @@ uint32_t last_seq) { struct mail_sort_node node; - struct mail *mail; uint32_t (*get_sort_id)(struct mail *); switch (program->sort_program[0] & MAIL_SORT_MASK) { @@ -534,114 +510,77 @@ } /* add the missing nodes with their sort_ids */ - mail = program->temp_mail; node.seq = array_count(&program->all_nodes) + 1; for (; node.seq <= last_seq; node.seq++) { - mail_set_seq(mail, node.seq); - node.sort_id = get_sort_id(mail); + mail_set_seq(program->temp_mail, node.seq); + node.sort_id = get_sort_id(program->temp_mail); + i_assert(node.sort_id != 0); array_append(&program->all_nodes, &node, 1); } - - /* @UNSAFE: and sort them */ - memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context)); - static_node_cmp_context.program = program; - static_node_cmp_context.mail = mail; - - qsort(array_idx_modifiable(&program->all_nodes, 0), last_seq, - sizeof(struct mail_sort_node), sort_node_cmp); -} - -static void index_sort_cache_seq(struct sort_cmp_context *ctx, - enum mail_sort_type sort_type, uint32_t seq) -{ - ctx->cache_seq = seq; - ctx->cache_type = sort_type & MAIL_SORT_MASK; - - mail_set_seq(ctx->mail, seq); - switch (ctx->cache_type) { - case MAIL_SORT_ARRIVAL: - ctx->cache_value = sort_get_arrival(ctx->mail); - break; - case MAIL_SORT_DATE: - ctx->cache_value = sort_get_date(ctx->mail); - break; - case MAIL_SORT_SIZE: - ctx->cache_value = sort_get_size(ctx->mail); - break; - default: - ctx->cache_str = sort_header_get(sort_type, ctx->mail, seq); - break; - } } static void index_sort_headers(struct mail_search_sort_program *program, uint32_t last_seq) { - struct mail_sort_node *nodes, node; - const struct mail_sort_node *cnodes; - unsigned int count, idx; + ARRAY_TYPE(mail_sort_node) seq_nodes_arr; + struct mail_sort_node *nodes, node, *seq_nodes; + unsigned int i, count, count2; - /* we wish to avoid reading the actual headers as much as possible. - first sort the nodes which already have sort_ids, then start - inserting the new nodes by finding their insertion position with - binary search */ - memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context)); - static_node_cmp_context.program = program; - static_node_cmp_context.mail = program->temp_mail; + /* insert missing nodes */ + node.seq = array_count(&program->all_nodes) + 1; + for (; node.seq <= last_seq; node.seq++) + array_append(&program->all_nodes, &node, 1); - /* @UNSAFE */ + /* sort everything. use sort_ids whenever possible */ nodes = array_get_modifiable(&program->all_nodes, &count); - if (program->last_sorted_seq != count) { - qsort(nodes, count, sizeof(struct mail_sort_node), - sort_node_cmp); - } + i_assert(count == last_seq); + qsort(nodes, count, sizeof(struct mail_sort_node), + sort_node_cmp_nozero_sort_id); + + /* we can now build the sort_ids */ + index_sort_add_ids(program, static_node_cmp_context.mail); - node.sort_id = 0; - for (node.seq = count + 1; node.seq <= last_seq; node.seq++) { - index_sort_cache_seq(&static_node_cmp_context, - program->sort_program[0], node.seq); + /* @UNSAFE: and finally get the range sorted back by sequence */ + i_array_init(&seq_nodes_arr, count); + (void)array_idx_modifiable(&seq_nodes_arr, count-1); + seq_nodes = array_get_modifiable(&seq_nodes_arr, &count2); + i_assert(count2 == count); + for (i = 0; i < count; i++) + seq_nodes[nodes[i].seq-1] = nodes[i]; - cnodes = array_get_modifiable(&program->all_nodes, &count); - bsearch_insert_pos(&node, cnodes, count, sizeof(*cnodes), - sort_node_cmp_no_sort_id, - &idx); - array_insert(&program->all_nodes, idx, &node, 1); - } - - index_sort_add_ids(program, static_node_cmp_context.mail); + array_free(&program->all_nodes); + program->all_nodes = seq_nodes_arr; } -static void index_sort_build(struct mail_search_sort_program *program, - uint32_t last_seq) +static void index_sort_build(struct mail_search_sort_program *program) { - struct index_mailbox *ibox = (struct index_mailbox *)program->t->box; - struct mail_sort_node node; + struct index_transaction_context *t = + (struct index_transaction_context *)program->t; + struct mail_sort_node node, *all_nodes, *nodes; const void *data; - unsigned int i, first_missing_sort_id_seq; + uint32_t last_seq; + unsigned int seq, i, count, count2; - i = array_count(&program->all_nodes); - if (i == 0) { - /* we're building the array from scratch. add here only the - messages that have sort_ids set. */ - program->last_sorted_seq = 0; - for (; i < last_seq; i++) { - node.seq = i+1; - - mail_index_lookup_ext(ibox->view, i+1, program->ext_id, - &data, NULL); + /* add messages that have sort_ids. they're always at the beginning + of the mailbox. */ + last_seq = mail_index_view_get_messages_count(t->ibox->view); + i_array_init(&program->all_nodes, last_seq); + for (seq = 1; seq <= last_seq; seq++) { + node.seq = seq; - node.sort_id = data == NULL ? 0 : - *(const uint32_t *)data; - if (node.sort_id == 0) { - /* the rest don't have sort_ids either */ - break; - } - array_append(&program->all_nodes, &node, 1); + mail_index_lookup_ext(t->ibox->view, seq, program->ext_id, + &data, NULL); + node.sort_id = data == NULL ? 0 : *(const uint32_t *)data; + if (node.sort_id == 0) { + /* the rest don't have sort_ids either */ + break; } + array_append(&program->all_nodes, &node, 1); } - first_missing_sort_id_seq = i + 1; + i_assert(seq <= last_seq); + /* add the new sort_ids and sort them all */ switch (program->sort_program[0] & MAIL_SORT_MASK) { case MAIL_SORT_ARRIVAL: case MAIL_SORT_DATE: @@ -652,27 +591,25 @@ index_sort_headers(program, last_seq); break; } - index_sort_save_ids(program, first_missing_sort_id_seq); -} - -static void index_sort_add_node(struct mail_search_sort_program *program, - const struct mail_sort_node *node) -{ - const struct mail_sort_node *nodes; - unsigned int count, idx; - memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context)); - static_node_cmp_context.program = program; - static_node_cmp_context.mail = program->temp_mail; + /* add the missing sort IDs to index. also update sort_id in + wanted nodes. */ + all_nodes = array_get_modifiable(&program->all_nodes, &count); + nodes = array_get_modifiable(&program->nodes, &count2); + i = program->first_missing_sort_id_idx; + i_assert(nodes[i].seq <= seq); + for (; seq <= count; seq++) { + i_assert(all_nodes[seq-1].seq == seq); + i_assert(all_nodes[seq-1].sort_id != 0); + if (nodes[i].seq == seq) { + nodes[i].sort_id = all_nodes[seq-1].sort_id; + i++; + } - nodes = array_get(&program->nodes, &count); - bsearch_insert_pos(node, nodes, count, - sizeof(*node), sort_node_cmp, - &idx); - array_insert(&program->nodes, idx, node, 1); - - program->last_sorted_seq = node->seq; - program->prev_seq = node->seq; + mail_index_update_ext(t->trans, seq, program->ext_id, + &all_nodes[seq-1].sort_id, NULL); + } + array_free(&program->all_nodes); } void index_sort_list_add(struct mail_search_sort_program *program, @@ -680,82 +617,40 @@ { struct index_transaction_context *t = (struct index_transaction_context *)program->t; - const struct mail_index_header *hdr; const void *data; struct mail_sort_node node; - uint32_t last_seq; i_assert(mail->transaction == program->t); - if (program->prev_seq + 1 != mail->seq) - program->skipped_mails = TRUE; - + mail_index_lookup_ext(t->trans_view, mail->seq, + program->ext_id, &data, NULL); node.seq = mail->seq; - if (program->last_sorted_seq == program->prev_seq) { - /* we're still on the fast path using sort_ids from the - index file */ - mail_index_lookup_ext(t->trans_view, mail->seq, - program->ext_id, &data, NULL); - node.sort_id = data == NULL ? 0 : *(const uint32_t *)data; - if (node.sort_id != 0) { - index_sort_add_node(program, &node); - return; - } - i_assert(!program->sort_ids_added); - } else { - node.sort_id = 0; - } + node.sort_id = data == NULL ? 0 : *(const uint32_t *)data; - /* sort_ids are missing, have to generate them */ - if (!program->skipped_mails) { - /* as long as we think we're returning all the mails sorted, - which is the common case, we want to avoid duplicating the - node array. so here we just keep counting the sequences - until either we skip a sequence or we reach list_finish() */ - program->prev_seq = mail->seq; - return; + if (node.sort_id == 0 && !program->missing_sort_ids) { + program->missing_sort_ids = TRUE; + program->first_missing_sort_id_idx = + array_count(&program->nodes); } - - /* we're not returning all the mails. have to create a temporary array - for all the nodes so we can set all the missing sort_ids. */ - hdr = mail_index_get_header(t->ibox->view); - i_array_init(&program->all_nodes, hdr->messages_count); - index_sort_build(program, hdr->messages_count); - array_free(&program->all_nodes); - - /* add the nodes in the middle */ - node.seq = program->last_sorted_seq + 1; - last_seq = program->prev_seq; - for (; node.seq <= last_seq; node.seq++) { - mail_index_lookup_ext(t->trans_view, mail->seq, program->ext_id, - &data, NULL); - - node.sort_id = *(const uint32_t *)data; - i_assert(node.sort_id != 0); - - index_sort_add_node(program, &node); - } - - /* and add this last node */ - program->sort_ids_added = TRUE; - index_sort_list_add(program, mail); + array_append(&program->nodes, &node, 1); } void index_sort_list_finish(struct mail_search_sort_program *program) { - if (program->last_sorted_seq != program->prev_seq) { - /* nodes array contains a contiguous range of sequences from - the beginning, with the last ones missing sort_id. we can - just sort the array directly without copying it. */ - i_assert(!program->sort_ids_added); + struct mail_sort_node *nodes; + + memset(&static_node_cmp_context, 0, sizeof(static_node_cmp_context)); + static_node_cmp_context.program = program; + static_node_cmp_context.mail = program->temp_mail; - program->all_nodes = program->nodes; - index_sort_build(program, program->prev_seq); - } + if (program->missing_sort_ids) + index_sort_build(program); - program->nodes_ptr = - array_get(&program->nodes, &program->nodes_count); + nodes = array_get_modifiable(&program->nodes, &program->nodes_count); + qsort(nodes, program->nodes_count, sizeof(struct mail_sort_node), + sort_node_cmp); + program->nodes_ptr = nodes; if (program->reverse) program->iter_idx = program->nodes_count; }