Mercurial > dovecot > core-2.2
view src/imap/imap-sort.c @ 988:8028c4dcf38f HEAD
mail-storage.h interface changes, affects pretty much everything.
FETCH, SEARCH, SORT and THREAD handling were pretty much moved from
lib-storage/ to imap/ so adding non-index storages would be much easier now.
Also POP3 server can now be easily implemented with lib-storage.
Not too well tested, and at least one major problem: partial fetching is
_slow_.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Mon, 20 Jan 2003 16:52:51 +0200 |
parents | |
children | 60646878858e |
line wrap: on
line source
/* Copyright (C) 2002 Timo Sirainen */ /* Implementation of draft-ietf-imapext-sort-10 sorting algorithm. Pretty messy code actually, adding any sort types requires care. This is pretty fast however and takes only as much memory as needed to be reasonably fast. */ #include "common.h" #include "buffer.h" #include "hash.h" #include "ostream.h" #include "str.h" #include "imap-base-subject.h" #include "imap-sort.h" #include <stdlib.h> #define MAX_WANTED_HEADERS 10 #define STRBUF_SIZE 1024 #define IS_SORT_STRING(type) \ ((type) == MAIL_SORT_CC || (type) == MAIL_SORT_FROM || \ (type) == MAIL_SORT_SUBJECT || (type) == MAIL_SORT_TO) #define IS_SORT_TIME(type) \ ((type) == MAIL_SORT_ARRIVAL || (type) == MAIL_SORT_DATE) struct sort_context { struct mail_search_context *search_ctx; enum mail_sort_type sort_program[MAX_SORT_PROGRAM_SIZE]; enum mail_sort_type common_mask, cache_mask; struct mailbox *box; struct ostream *output; string_t *str; buffer_t *sort_buffer; size_t sort_element_size; pool_t temp_pool, str_pool; struct hash_table *string_table; time_t last_arrival, last_date; uoff_t last_size; char *last_cc, *last_from, *last_subject, *last_to; int written, id_is_uid; }; static void mail_sort_input(struct sort_context *ctx, struct mail *mail); static void mail_sort_flush(struct sort_context *ctx); static enum mail_sort_type mail_sort_normalize(const enum mail_sort_type *input, buffer_t *output) { enum mail_sort_type type, mask = 0; int pos, reverse; reverse = FALSE; for (pos = 0; *input != MAIL_SORT_END; input++) { if (*input == MAIL_SORT_REVERSE) reverse = !reverse; else { if ((mask & *input) == 0) { if (reverse) { type = MAIL_SORT_REVERSE; buffer_append(output, &type, sizeof(type)); } buffer_append(output, input, sizeof(*input)); mask |= *input; } reverse = FALSE; } } type = MAIL_SORT_END; buffer_append(output, &type, sizeof(type)); return mask; } static enum mail_sort_type mail_sort_get_common_mask(const enum mail_sort_type *sort1, const enum mail_sort_type *sort2, unsigned int *count) { enum mail_sort_type mask = 0; *count = 0; while (*sort1 == *sort2 && *sort1 != MAIL_SORT_END) { if (*sort1 != MAIL_SORT_REVERSE) mask |= *sort1; sort1++; sort2++; (*count)++; } return mask; } static enum mail_fetch_field init_sort_elements(struct sort_context *ctx, const char *wanted_headers[MAX_WANTED_HEADERS]) { unsigned int i; enum mail_fetch_field fields; /* figure out what data we'd like to cache */ ctx->sort_element_size = sizeof(unsigned int); ctx->cache_mask = 0; for (i = 0; ctx->sort_program[i] != MAIL_SORT_END; i++) { enum mail_sort_type type = ctx->sort_program[i]; if (IS_SORT_STRING(type)) { ctx->sort_element_size += sizeof(const char *); /* cache the second rule as well, if available */ if (ctx->cache_mask != 0) { ctx->cache_mask |= type; break; } ctx->cache_mask |= type; } else if (IS_SORT_TIME(type)) { ctx->sort_element_size += sizeof(time_t); ctx->cache_mask |= type; break; } else if (type == MAIL_SORT_SIZE) { ctx->sort_element_size += sizeof(uoff_t); ctx->cache_mask |= type; break; } } fields = 0; if (ctx->cache_mask & MAIL_SORT_ARRIVAL) fields |= MAIL_FETCH_RECEIVED_DATE; if (ctx->cache_mask & MAIL_SORT_DATE) fields |= MAIL_FETCH_DATE; if (ctx->cache_mask & MAIL_SORT_SIZE) fields |= MAIL_FETCH_SIZE; /* @UNSAFE */ i_assert(MAX_WANTED_HEADERS > 4); i = 0; if (ctx->cache_mask & MAIL_SORT_CC) wanted_headers[i++] = "cc"; if (ctx->cache_mask & MAIL_SORT_FROM) wanted_headers[i++] = "from"; if (ctx->cache_mask & MAIL_SORT_TO) wanted_headers[i++] = "to"; if (ctx->cache_mask & MAIL_SORT_SUBJECT) wanted_headers[i++] = "subject"; wanted_headers[i] = NULL; if ((ctx->cache_mask & MAIL_SORT_CC) || (ctx->cache_mask & MAIL_SORT_FROM) || (ctx->cache_mask & MAIL_SORT_TO) || (ctx->cache_mask & MAIL_SORT_SUBJECT)) { ctx->str_pool = pool_alloconly_create("sort str", 8192); ctx->string_table = hash_create(default_pool, ctx->str_pool, 0, str_hash, (hash_cmp_callback_t)strcmp); } return fields; } static void mail_sort_deinit(struct sort_context *ctx) { mail_sort_flush(ctx); if (ctx->string_table != NULL) hash_destroy(ctx->string_table); if (ctx->str_pool != NULL) pool_unref(ctx->str_pool); buffer_free(ctx->sort_buffer); pool_unref(ctx->temp_pool); i_free(ctx->last_cc); i_free(ctx->last_from); i_free(ctx->last_subject); i_free(ctx->last_to); } int imap_sort(struct client *client, const char *charset, struct mail_search_arg *args, const enum mail_sort_type *sort_program) { enum mail_sort_type norm_prog[MAX_SORT_PROGRAM_SIZE]; enum mail_fetch_field wanted_fields; const char *wanted_headers[MAX_WANTED_HEADERS]; struct sort_context *ctx; struct mail *mail; buffer_t *buf; unsigned int count; int ret; ctx = t_new(struct sort_context, 1); /* normalize sorting program */ buf = buffer_create_data(data_stack_pool, norm_prog, sizeof(norm_prog)); mail_sort_normalize(sort_program, buf); memcpy(ctx->sort_program, norm_prog, sizeof(ctx->sort_program)); /* remove the common part from sort program, we already know input is sorted that much so we don't have to worry about it. */ if (!client->mailbox->search_get_sorting(client->mailbox, norm_prog)) return FALSE; ctx->common_mask = mail_sort_get_common_mask(ctx->sort_program, norm_prog, &count); if (count > 0) { memmove(ctx->sort_program, ctx->sort_program + count, sizeof(ctx->sort_program) - sizeof(ctx->sort_program[0]) * count); } memset(wanted_headers, 0, sizeof(wanted_headers)); wanted_fields = init_sort_elements(ctx, wanted_headers); /* initialize searching */ ctx->search_ctx = client->mailbox-> search_init(client->mailbox, charset, args, norm_prog, wanted_fields, wanted_headers); if (ctx->search_ctx == NULL) return FALSE; ctx->box = client->mailbox; ctx->output = client->output; ctx->temp_pool = pool_alloconly_create("sort temp", 8192); ctx->sort_buffer = buffer_create_dynamic(system_pool, 128 * ctx->sort_element_size, (size_t)-1); ctx->str = t_str_new(STRBUF_SIZE); str_append(ctx->str, "* SORT"); ctx->id_is_uid = client->cmd_uid; while ((mail = client->mailbox->search_next(ctx->search_ctx)) != NULL) mail_sort_input(ctx, mail); mail_sort_flush(ctx); ret = client->mailbox->search_deinit(ctx->search_ctx); if (ctx->written || ret) { str_append(ctx->str, "\r\n"); o_stream_send(client->output, str_data(ctx->str), str_len(ctx->str)); } mail_sort_deinit(ctx); return ret; } static const char *string_table_get(struct sort_context *ctx, const char *str) { char *value; if (str == NULL) return NULL; if (*str == '\0') return ""; value = hash_lookup(ctx->string_table, str); if (value == NULL) { value = p_strdup(ctx->str_pool, str); hash_insert(ctx->string_table, value, value); } return value; } static void mail_sort_check_flush(struct sort_context *ctx, struct mail *mail) { const char *str; time_t t; uoff_t size; int changed = FALSE; if (ctx->common_mask & MAIL_SORT_ARRIVAL) { t = mail->get_received_date(mail); if (t != ctx->last_arrival) { ctx->last_arrival = t; changed = TRUE; } } if (ctx->common_mask & MAIL_SORT_CC) { str = mail->get_first_mailbox(mail, "cc"); if (str != NULL) str = str_ucase(t_strdup_noconst(str)); if (null_strcmp(str, ctx->last_cc) != 0) { i_free(ctx->last_cc); ctx->last_cc = i_strdup(str); changed = TRUE; } } if (ctx->common_mask & MAIL_SORT_DATE) { t = mail->get_date(mail, NULL); if (t != ctx->last_date) { ctx->last_date = t; changed = TRUE; } } if (ctx->common_mask & MAIL_SORT_FROM) { str = mail->get_first_mailbox(mail, "from"); if (str != NULL) str = str_ucase(t_strdup_noconst(str)); if (null_strcmp(str, ctx->last_from) != 0) { i_free(ctx->last_from); ctx->last_from = i_strdup(str); changed = TRUE; } } if (ctx->common_mask & MAIL_SORT_SIZE) { size = mail->get_size(mail); if (size != ctx->last_size) { ctx->last_size = size; changed = TRUE; } } if (ctx->common_mask & MAIL_SORT_SUBJECT) { str = mail->get_header(mail, "subject"); if (str != NULL) { p_clear(ctx->temp_pool); str = imap_get_base_subject_cased(ctx->temp_pool, str, NULL); } if (null_strcmp(str, ctx->last_subject) != 0) { i_free(ctx->last_subject); ctx->last_subject = i_strdup(str); changed = TRUE; } } if (ctx->common_mask & MAIL_SORT_TO) { str = mail->get_first_mailbox(mail, "to"); if (str != NULL) str = str_ucase(t_strdup_noconst(str)); if (null_strcmp(str, ctx->last_to) != 0) { i_free(ctx->last_to); ctx->last_to = i_strdup(str); changed = TRUE; } } if (changed) mail_sort_flush(ctx); } static void mail_sort_input(struct sort_context *ctx, struct mail *mail) { /* @UNSAFE */ unsigned char *buf; unsigned int id; time_t t; uoff_t size; const char *str; size_t pos; t_push(); if (ctx->common_mask != 0) mail_sort_check_flush(ctx, mail); buf = buffer_append_space(ctx->sort_buffer, ctx->sort_element_size); id = ctx->id_is_uid ? mail->uid : mail->seq; memcpy(buf, &id, sizeof(id)); pos = sizeof(id); if (ctx->cache_mask & MAIL_SORT_ARRIVAL) { if (ctx->common_mask & MAIL_SORT_ARRIVAL) t = ctx->last_arrival; else t = mail->get_received_date(mail); memcpy(buf + pos, &t, sizeof(t)); pos += sizeof(t); } if (ctx->cache_mask & MAIL_SORT_DATE) { if (ctx->common_mask & MAIL_SORT_DATE) t = ctx->last_date; else t = mail->get_date(mail, NULL); memcpy(buf + pos, &t, sizeof(t)); pos += sizeof(t); } if (ctx->cache_mask & MAIL_SORT_SIZE) { if (ctx->common_mask & MAIL_SORT_SIZE) size = ctx->last_size; else size = mail->get_size(mail); memcpy(buf + pos, &size, sizeof(size)); pos += sizeof(size); } if (ctx->cache_mask & MAIL_SORT_CC) { if (ctx->common_mask & MAIL_SORT_CC) str = ctx->last_cc; else { str = mail->get_first_mailbox(mail, "cc"); if (str != NULL) str = str_ucase(t_strdup_noconst(str)); } str = string_table_get(ctx, str); memcpy(buf + pos, &str, sizeof(const char *)); pos += sizeof(const char *); } if (ctx->cache_mask & MAIL_SORT_FROM) { if (ctx->common_mask & MAIL_SORT_FROM) str = ctx->last_from; else { str = mail->get_first_mailbox(mail, "from"); if (str != NULL) str = str_ucase(t_strdup_noconst(str)); } str = string_table_get(ctx, str); memcpy(buf + pos, &str, sizeof(const char *)); pos += sizeof(const char *); } if (ctx->cache_mask & MAIL_SORT_TO) { if (ctx->common_mask & MAIL_SORT_TO) str = ctx->last_to; else { str = mail->get_first_mailbox(mail, "to"); if (str != NULL) str = str_ucase(t_strdup_noconst(str)); } str = string_table_get(ctx, str); memcpy(buf + pos, &str, sizeof(const char *)); pos += sizeof(const char *); } if (ctx->cache_mask & MAIL_SORT_SUBJECT) { if (ctx->common_mask & MAIL_SORT_SUBJECT) str = ctx->last_subject; else { str = mail->get_header(mail, "subject"); if (str != NULL) { p_clear(ctx->temp_pool); str = imap_get_base_subject_cased( ctx->temp_pool, str, NULL); } } str = string_table_get(ctx, str); memcpy(buf + pos, &str, sizeof(const char *)); pos += sizeof(const char *); } i_assert(pos == ctx->sort_element_size); t_pop(); } static struct sort_context *qsort_context; static struct mail *get_mail(struct sort_context *ctx, const unsigned char *buf) { unsigned int id = *((unsigned int *) buf); if (ctx->id_is_uid) return ctx->box->fetch_uid(ctx->box, id, 0); else return ctx->box->fetch_seq(ctx->box, id, 0); } static time_t get_time(enum mail_sort_type type, const unsigned char *buf, struct sort_context *ctx) { time_t t; if ((ctx->cache_mask & type) == 0) { struct mail *mail = get_mail(ctx, buf); if (mail == NULL) return 0; switch (type) { case MAIL_SORT_ARRIVAL: return mail->get_received_date(mail); case MAIL_SORT_DATE: t = mail->get_date(mail, NULL); if (t == (time_t)-1) t = 0; return t; default: i_unreached(); return 0; } } /* use memcpy() to avoid any alignment problems */ memcpy(&t, buf + sizeof(unsigned int), sizeof(t)); return t; } static time_t get_uofft(enum mail_sort_type type, const unsigned char *buf, struct sort_context *ctx) { uoff_t size; if ((ctx->cache_mask & type) == 0) { struct mail *mail = get_mail(ctx, buf); if (mail == NULL) return 0; i_assert(type == MAIL_SORT_SIZE); return mail->get_size(mail); } /* use memcpy() to avoid any alignment problems */ memcpy(&size, buf + sizeof(unsigned int), sizeof(size)); return size; } static const char *get_str(enum mail_sort_type type, const unsigned char *buf, struct sort_context *ctx) { const char *str; enum mail_sort_type type2; int pos; if ((ctx->cache_mask & type) == 0) { struct mail *mail = get_mail(ctx, buf); if (mail == NULL) return NULL; switch (type) { case MAIL_SORT_SUBJECT: str = mail->get_header(mail, "subject"); if (str == NULL) return NULL; p_clear(ctx->temp_pool); return imap_get_base_subject_cased(ctx->temp_pool, str, NULL); case MAIL_SORT_CC: str = mail->get_first_mailbox(mail, "cc"); break; case MAIL_SORT_FROM: str = mail->get_first_mailbox(mail, "from"); break; case MAIL_SORT_TO: str = mail->get_first_mailbox(mail, "to"); break; default: i_unreached(); } if (str != NULL) str = str_ucase(t_strdup_noconst(str)); return str; } /* figure out where it is. pretty ugly. */ type2 = (ctx->cache_mask & ~type); if (type2 == 0) pos = 0; else if (IS_SORT_TIME(type2)) pos = sizeof(time_t); else if (type2 == MAIL_SORT_SIZE) pos = sizeof(uoff_t); else { if (type == MAIL_SORT_SUBJECT) pos = sizeof(const char *); else if (type2 != MAIL_SORT_SUBJECT && type > type2) pos = sizeof(const char *); else pos = 0; } /* use memcpy() to avoid any alignment problems */ memcpy(&str, buf + pos + sizeof(unsigned int), sizeof(const char *)); return str; } static int mail_sort_qsort_func(const void *p1, const void *p2) { enum mail_sort_type *sorting; int ret, reverse = FALSE; sorting = qsort_context->sort_program; t_push(); ret = 0; for (; *sorting != MAIL_SORT_END && ret == 0; sorting++) { if (*sorting == MAIL_SORT_REVERSE) { reverse = !reverse; continue; } switch (*sorting) { case MAIL_SORT_ARRIVAL: case MAIL_SORT_DATE: { time_t r1, r2; r1 = get_time(*sorting, p1, qsort_context); r2 = get_time(*sorting, p2, qsort_context); ret = r1 < r2 ? -1 : r1 > r2 ? 1 : 0; break; } case MAIL_SORT_SIZE: { uoff_t r1, r2; r1 = get_uofft(*sorting, p1, qsort_context); r2 = get_uofft(*sorting, p2, qsort_context); ret = r1 < r2 ? -1 : r1 > r2 ? 1 : 0; break; } case MAIL_SORT_CC: case MAIL_SORT_FROM: case MAIL_SORT_TO: case MAIL_SORT_SUBJECT: ret = null_strcmp(get_str(*sorting, p1, qsort_context), get_str(*sorting, p2, qsort_context)); break; default: i_unreached(); } if (reverse) { if (ret > 0) ret = -1; else if (ret < 0) ret = 1; } reverse = FALSE; } t_pop(); return ret != 0 ? ret : (*((unsigned int *) p1) < *((unsigned int *) p2) ? -1 : 1); } static void mail_sort_flush(struct sort_context *ctx) { unsigned char *arr; size_t i, count; qsort_context = ctx; arr = buffer_get_modifyable_data(ctx->sort_buffer, NULL); count = buffer_get_used_size(ctx->sort_buffer) / ctx->sort_element_size; if (count == 0) return; qsort(arr, count, ctx->sort_element_size, mail_sort_qsort_func); for (i = 0; i < count; i++, arr += ctx->sort_element_size) { if (str_len(ctx->str) >= STRBUF_SIZE-MAX_INT_STRLEN) { /* flush */ o_stream_send(ctx->output, str_data(ctx->str), str_len(ctx->str)); str_truncate(ctx->str, 0); ctx->written = TRUE; } str_printfa(ctx->str, " %u", *((unsigned int *) arr)); } buffer_set_used_size(ctx->sort_buffer, 0); if (ctx->string_table != NULL) { hash_clear(ctx->string_table, TRUE); p_clear(ctx->str_pool); } }