Mercurial > dovecot > original-hg > dovecot-1.2
changeset 4849:7f2fee02b07c HEAD
Resize the hash when needed. Also other fixes and cleanups.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sun, 26 Nov 2006 17:20:11 +0200 |
parents | 967de900c73a |
children | 0c540b6a0551 |
files | src/imap/imap-thread.c src/lib-index/mail-hash.c src/lib-index/mail-hash.h |
diffstat | 3 files changed, 245 insertions(+), 95 deletions(-) [+] |
line wrap: on
line diff
--- a/src/imap/imap-thread.c Sun Nov 26 00:17:39 2006 +0200 +++ b/src/imap/imap-thread.c Sun Nov 26 17:20:11 2006 +0200 @@ -202,6 +202,12 @@ pool_unref(ctx->msgid_pool); } +static uint32_t crc32_str_nonzero(const char *str) +{ + uint32_t value = crc32_str(str); + return value == 0 ? 1 : value; +} + static int mail_thread_rec_idx(struct thread_context *ctx, uint32_t idx, const struct mail_thread_rec **rec_r) @@ -217,7 +223,7 @@ return 0; } -static unsigned int mail_thread_rec_hash(const void *key) +static unsigned int mail_thread_hash_key(const void *key) { const struct msgid_rec *key_rec = key; @@ -241,7 +247,7 @@ msgids = mail_get_first_header(ctx->tmp_mail, HDR_IN_REPLY_TO); msgid = msgids == NULL ? NULL : message_id_get_next(&msgids); if (msgid != NULL) { - if (crc32_str(msgid) == msgid_crc32) + if (crc32_str_nonzero(msgid) == msgid_crc32) found_msgid = msgid; } @@ -252,7 +258,7 @@ } while ((msgid = message_id_get_next(&msgids)) != NULL) { - if (crc32_str(msgid) == msgid_crc32) { + if (crc32_str_nonzero(msgid) == msgid_crc32) { if (found_msgid != NULL) { /* hash collisions, we can't figure this out */ return -1; @@ -284,6 +290,12 @@ /* each non-dummy child must have a valid In-Reply-To or References header pointing to the parent, otherwise it wouldn't be our child */ + if (parent_rec->msgid_crc32 == 0) { + mail_hash_set_corrupted(ctx->msgid_hash, + "msgid_crc32=0 node has children"); + return NULL; + } + if (mail_thread_find_child_msgid(ctx, child_rec->rec.uid, parent_rec->msgid_crc32, &msgid) == 0) @@ -324,8 +336,8 @@ return *p; } -static bool mail_thread_rec_msgid_cmp(const void *key, const void *data, - void *context) +static bool mail_thread_hash_cmp(const void *key, const void *data, + void *context) { struct imap_thread_mailbox *tbox = context; struct thread_context *ctx = tbox->ctx; @@ -350,6 +362,13 @@ return strcmp(msgid, key_rec->msgid) == 0; } +static unsigned int mail_thread_hash_rec(const void *p) +{ + const struct mail_thread_rec *rec = p; + + return rec->msgid_crc32; +} + static int imap_thread_context_init(struct imap_thread_mailbox *tbox, struct client *client, const char *charset, @@ -364,6 +383,15 @@ STATUS_MESSAGES | STATUS_UIDNEXT, &status) < 0) return -1; + if (search_args->type != SEARCH_ALL || + search_args->next != NULL) { + /* each search condition requires their own separate thread + index. for now we don't bother to support anything else than + "search all" threading, since that's what pretty much all + the clients do anyway. */ + ctx->msgid_hash = NULL; + } + if (ctx->msgid_hash != NULL) { if (mail_hash_lock(ctx->msgid_hash) <= 0) ctx->msgid_hash = NULL; @@ -380,36 +408,34 @@ ctx->msgid_hash = NULL; } else { /* rebuild */ - if (mail_hash_reset(ctx->msgid_hash) < 0) { - mail_hash_unlock(ctx->msgid_hash); + if (mail_hash_reset(ctx->msgid_hash, 0) < 0) ctx->msgid_hash = NULL; - } + } + } else if (hdr->last_uid != 0) { + /* non-empty hash. add only the new messages in there. */ + if (mailbox_get_uids(client->mailbox, 1, hdr->last_uid, + &ctx->seqset.seq1, + &ctx->seqset.seq2) < 0) { + mail_hash_unlock(ctx->msgid_hash); + return -1; } - } else if (search_args->next == NULL && - search_args->type == SEARCH_ALL) { - /* threading everything. this is the only case we're trying - to optimize. */ - if (hdr->last_uid != 0) { - /* messages already exist in the hash. add only the - new messages in there. */ - if (mailbox_get_uids(client->mailbox, 1, hdr->last_uid, - &ctx->seqset.seq1, - &ctx->seqset.seq2) < 0) - return -1; - if (ctx->seqset.seq2 != hdr->message_count) { - /* some messages have been expunged. - have to rebuild. */ - if (mail_hash_reset(ctx->msgid_hash) < 0) { - mail_hash_unlock(ctx->msgid_hash); - ctx->msgid_hash = NULL; - } - } else { - ctx->tmp_search_arg.type = SEARCH_SEQSET; - ctx->tmp_search_arg.value.seqset = &ctx->seqset; - ctx->tmp_search_arg.not = TRUE; - search_args = &ctx->tmp_search_arg; - } + if (ctx->seqset.seq2 != hdr->message_count) { + /* some messages have been expunged. have to rebuild. */ + if (mail_hash_reset(ctx->msgid_hash, 0) < 0) + ctx->msgid_hash = NULL; + } else { + /* after all these checks, this is the only case we + can actually optimize. */ + ctx->tmp_search_arg.type = SEARCH_SEQSET; + ctx->tmp_search_arg.value.seqset = &ctx->seqset; + ctx->tmp_search_arg.not = TRUE; + search_args = &ctx->tmp_search_arg; + + if (mail_hash_resize_if_needed(ctx->msgid_hash, + status.messages - + hdr->message_count) < 0) + ctx->msgid_hash = NULL; } } @@ -422,9 +448,10 @@ mail_hash_open(ibox->index, ".thread", MAIL_HASH_OPEN_FLAG_CREATE | MAIL_HASH_OPEN_FLAG_IN_MEMORY, - sizeof(struct mail_thread_rec), - mail_thread_rec_hash, - mail_thread_rec_msgid_cmp, + sizeof(struct mail_thread_rec), 0, + mail_thread_hash_key, + mail_thread_hash_rec, + mail_thread_hash_cmp, tbox); } @@ -436,6 +463,8 @@ ctx->box = client->mailbox; ctx->output = client->output; + /* at this point the hash is either locked or we're using in-memory + hash where it doesn't matter */ hdr = mail_hash_get_header(ctx->msgid_hash); count = client->messages_count < hdr->record_count ? 0 : client->messages_count - hdr->record_count; @@ -571,7 +600,7 @@ mail_thread_rec_get_msgid(ctx, rec, idx); if (key.msgid == NULL) return -1; - key.msgid_crc32 = crc32_str(key.msgid); + key.msgid_crc32 = crc32_str_nonzero(key.msgid); if (mail_hash_remove_idx(ctx->msgid_hash, idx, &key) < 0) { ctx->failed = TRUE; @@ -782,7 +811,7 @@ memset(&rec, 0, sizeof(rec)); rec.rec.uid = uid; - rec.msgid_crc32 = msgid == NULL ? 0 : crc32_str(msgid); + rec.msgid_crc32 = msgid == NULL ? 0 : crc32_str_nonzero(msgid); rec.refcount = 1; rec.sent_date = sent_date; @@ -842,6 +871,7 @@ uint32_t orig_idx = idx; orig_rec = *recp; + rec.msgid_crc32 = 0; if (mail_hash_insert(ctx->msgid_hash, NULL, &rec, &idx) < 0) { ctx->failed = TRUE; return -1; @@ -975,7 +1005,7 @@ int ret; key.msgid = msgid; - key.msgid_crc32 = crc32_str(msgid); + key.msgid_crc32 = crc32_str_nonzero(msgid); ret = mail_hash_lookup(ctx->msgid_hash, &key, &value, idx_r); if (ret < 0 || ctx->failed) { @@ -1816,7 +1846,7 @@ key_r->msgid = message_id_get_next(&message_id); if (key_r->msgid == NULL) return 0; - key_r->msgid_crc32 = crc32_str(key_r->msgid); + key_r->msgid_crc32 = crc32_str_nonzero(key_r->msgid); if (mail_hash_lookup(ctx->msgid_hash, key_r, &value, idx_r) <= 0) return -1; @@ -1851,7 +1881,7 @@ references = t_strdup(references); do { key.msgid = msgid; - key.msgid_crc32 = crc32_str(msgid); + key.msgid_crc32 = crc32_str_nonzero(msgid); ctx->cmp_match_count = 0; ctx->cmp_last_idx = 0; @@ -2005,9 +2035,11 @@ tbox->msgid_hash = mail_hash_open(ibox->index, ".thread", create ? MAIL_HASH_OPEN_FLAG_CREATE : 0, - sizeof(struct mail_thread_rec), - mail_thread_rec_hash, - mail_thread_rec_msgid_cmp, tbox); + sizeof(struct mail_thread_rec), 0, + mail_thread_hash_key, + mail_thread_hash_rec, + mail_thread_hash_cmp, + tbox); if (tbox->msgid_hash == NULL) return;
--- a/src/lib-index/mail-hash.c Sun Nov 26 00:17:39 2006 +0200 +++ b/src/lib-index/mail-hash.c Sun Nov 26 17:20:11 2006 +0200 @@ -1,8 +1,5 @@ /* Copyright (C) 2006 Timo Sirainen */ -/* FIXME: we need to be able to rebuild the hash table when it gets - too small or large */ - #include "lib.h" #include "ioloop.h" #include "array.h" @@ -16,6 +13,7 @@ #include "mail-index-private.h" #include "mail-hash.h" +#include <stdio.h> #include <stddef.h> #include <utime.h> #include <sys/stat.h> @@ -24,17 +22,21 @@ #define FILE_SIZE_INIT_PERCENTAGE 120 /* How much larger to grow the file when it needs to be done */ #define MAIL_HASH_GROW_PERCENTAGE 20 +/* Minimum hash size to use */ +#define MAIL_HASH_MIN_SIZE 109 #define MAIL_HASH_TIMEOUT_SECS 3 struct mail_hash { struct mail_index *index; - hash_callback_t *hash_cb; + hash_callback_t *key_hash_cb; hash_ctx_cmp_callback_t *key_compare_cb; + hash_callback_t *rec_hash_cb; void *cb_context; char *filepath; + char *suffix; int fd; dev_t dev; @@ -99,7 +101,7 @@ size_t offset = offsetof(struct mail_hash_header, corrupted); struct stat st; - if (MAIL_HASH_IS_IN_MEMORY(hash)) + if (hash->fd == -1) return 0; hash->hdr->corrupted = set ? 1 : 0; @@ -212,6 +214,8 @@ hash->hdr = NULL; hash->hash_base = NULL; hash->records_base = NULL; + + hash->locked = FALSE; } static int mail_hash_file_map_finish(struct mail_hash *hash) @@ -410,7 +414,7 @@ } } -static int mail_hash_file_open(struct mail_hash *hash) +static int mail_hash_file_open(struct mail_hash *hash, bool lock) { int ret; @@ -422,21 +426,27 @@ return -1; } - if (mail_hash_file_lock(hash, F_RDLCK) <= 0) - return -1; + if (!lock) { + if (mail_hash_file_lock(hash, F_RDLCK) <= 0) + return -1; - ret = mail_hash_file_map(hash, FALSE); - if (hash->fd != -1) - (void)mail_hash_file_lock(hash, F_UNLCK); + ret = mail_hash_file_map(hash, FALSE); + if (hash->fd != -1) + (void)mail_hash_file_lock(hash, F_UNLCK); + } else { + if (mail_hash_file_lock(hash, F_WRLCK) <= 0) + return -1; + + hash->locked = TRUE; + ret = mail_hash_file_map(hash, TRUE); + } return ret; } static void -mail_hash_header_init(struct mail_hash *hash, struct mail_hash_header *hdr, - uoff_t *file_size_r) +mail_hash_header_init(struct mail_hash *hash, unsigned int initial_count, + struct mail_hash_header *hdr, uoff_t *file_size_r) { - unsigned int message_count; - memset(hdr, 0, sizeof(*hdr)); hdr->version = MAIL_HASH_VERSION; hdr->base_header_size = sizeof(*hdr); @@ -446,16 +456,19 @@ uid_validity may be 0 */ hdr->uid_validity = hash->index->hdr->uid_validity; - message_count = I_MAX(hash->index->hdr->messages_count, 25); - hdr->hash_size = primes_closest(I_MAX(message_count * 2, 109)); + if (initial_count == 0) + initial_count = I_MAX(hash->index->hdr->messages_count, 25); + hdr->hash_size = I_MAX(primes_closest(initial_count * 2), + MAIL_HASH_MIN_SIZE); *file_size_r = hdr->header_size + hdr->hash_size * sizeof(uint32_t) + - hash->record_size * (message_count * + hash->record_size * (initial_count * FILE_SIZE_INIT_PERCENTAGE / 100); } -static int mail_hash_file_create(struct mail_hash *hash) +static int +mail_hash_file_create(struct mail_hash *hash, unsigned int initial_count) { struct dotlock *dotlock; struct mail_hash_header hdr; @@ -468,7 +481,7 @@ return -1; } - mail_hash_header_init(hash, &hdr, &file_size); + mail_hash_header_init(hash, initial_count, &hdr, &file_size); if (write_full(fd, &hdr, sizeof(hdr)) < 0 || file_set_size(fd, file_size) < 0) { mail_hash_set_syscall_error(hash, "write()"); @@ -483,12 +496,13 @@ return 0; } -static void mail_hash_create_in_memory(struct mail_hash *hash) +static void mail_hash_create_in_memory(struct mail_hash *hash, + unsigned int initial_count) { struct mail_hash_header hdr; uoff_t file_size; - mail_hash_header_init(hash, &hdr, &file_size); + mail_hash_header_init(hash, initial_count, &hdr, &file_size); hash->mmap_size = file_size; hash->mmap_base = mmap_anon(hash->mmap_size); @@ -506,8 +520,10 @@ struct mail_hash * mail_hash_open(struct mail_index *index, const char *suffix, - enum mail_hash_open_flags flags, - unsigned int record_size, hash_callback_t *hash_cb, + enum mail_hash_open_flags flags, unsigned int record_size, + unsigned int initial_count, + hash_callback_t *key_hash_cb, + hash_callback_t *rec_hash_cb, hash_ctx_cmp_callback_t *key_compare_cb, void *context) { @@ -521,16 +537,18 @@ hash->filepath = (flags & MAIL_HASH_OPEN_FLAG_IN_MEMORY) != 0 ? i_strdup("(in-memory hash)") : i_strconcat(index->filepath, suffix, NULL); + hash->suffix = i_strdup(suffix); hash->record_size = record_size; hash->fd = -1; - hash->hash_cb = hash_cb; + hash->key_hash_cb = key_hash_cb; + hash->rec_hash_cb = rec_hash_cb; hash->key_compare_cb = key_compare_cb; hash->cb_context = context; ret = MAIL_INDEX_IS_IN_MEMORY(hash->index) || (flags & MAIL_HASH_OPEN_FLAG_IN_MEMORY) != 0 ? -1 : - mail_hash_file_open(hash); + mail_hash_file_open(hash, FALSE); if (ret <= 0 && (flags & MAIL_HASH_OPEN_FLAG_CREATE) == 0) { /* we don't want to create the hash */ @@ -539,12 +557,12 @@ } if (ret == 0) { /* not found or broken, recreate it */ - ret = mail_hash_reset(hash); + ret = mail_hash_reset(hash, initial_count); } if (ret < 0) { /* fallback to in-memory hash */ mail_hash_file_close(hash); - mail_hash_create_in_memory(hash); + mail_hash_create_in_memory(hash, initial_count); } return hash; } @@ -557,28 +575,26 @@ mail_hash_file_close(hash); i_free(hash->filepath); + i_free(hash->suffix); i_free(hash); } -int mail_hash_reset(struct mail_hash *hash) +int mail_hash_reset(struct mail_hash *hash, unsigned int initial_count) { + bool locked = hash->locked; int ret; mail_hash_file_close(hash); - ret = mail_hash_file_create(hash); + ret = mail_hash_file_create(hash, initial_count); if (ret == 0) { /* should work now, try opening again */ - ret = mail_hash_file_open(hash); + ret = mail_hash_file_open(hash, locked); if (ret == 0) { mail_hash_set_corrupted(hash, "Newly created hash file broken"); return -1; } - if (ret > 0 && hash->locked) { - if (mail_hash_file_lock(hash, F_WRLCK) <= 0) - return -1; - } } return ret < 0 ? -1 : 0; } @@ -589,13 +605,13 @@ mail_hash_file_close(hash); - if ((ret = mail_hash_file_open(hash)) < 0) + if ((ret = mail_hash_file_open(hash, FALSE)) < 0) return -1; if (ret > 0) return 0; /* not found or broken, recreate it */ - return mail_hash_reset(hash); + return mail_hash_reset(hash, 0); } static int mail_hash_reopen_if_needed(struct mail_hash *hash) @@ -671,7 +687,7 @@ unsigned int hash_idx; uint32_t idx; - hash_idx = hash->hash_cb(key) % hash->hdr->hash_size; + hash_idx = hash->key_hash_cb(key) % hash->hdr->hash_size; for (idx = hash->hash_base[hash_idx]; idx != 0; ) { if (idx > hash->hdr->record_count) { mail_hash_set_corrupted(hash, @@ -773,8 +789,8 @@ return 0; } -int mail_hash_insert(struct mail_hash *hash, const void *key, - const void *value, uint32_t *idx_r) +static int mail_hash_insert_with_hash(struct mail_hash *hash, const void *value, + uint32_t hash_key, uint32_t *idx_r) { struct mail_hash_record *rec; uint32_t idx, *idx_p; @@ -807,13 +823,11 @@ } memcpy(rec, value, hash->record_size); - if (mail_hash_update_header(hash, rec, FALSE) < 0) return -1; - if (key != NULL) { - idx_p = &hash->hash_base[hash->hash_cb(key) % - hash->hdr->hash_size]; + if (hash_key != 0) { + idx_p = &hash->hash_base[hash_key % hash->hdr->hash_size]; while (*idx_p != 0) { rec = HASH_RECORD_IDX(hash, *idx_p); idx_p = &rec->next_idx; @@ -822,11 +836,26 @@ if (mail_hash_mark_update(hash, idx_p, sizeof(*idx_p)) < 0) return -1; *idx_p = idx; + + hash->hdr->hashed_count++; } + *idx_r = idx; return 0; } +int mail_hash_insert(struct mail_hash *hash, const void *key, + const void *value, uint32_t *idx_r) +{ + uint32_t hash_key = key == NULL ? 0 : hash->key_hash_cb(key); + + i_assert((key == NULL && hash->rec_hash_cb(value) == 0) || + (key != NULL && hash_key != 0 && + hash->rec_hash_cb(value) != 0)); + + return mail_hash_insert_with_hash(hash, value, hash_key, idx_r); +} + int mail_hash_remove(struct mail_hash *hash, const void *key) { return mail_hash_remove_idx(hash, 0, key); @@ -836,12 +865,12 @@ { struct mail_hash_record *rec = NULL; unsigned int hash_idx; - uint32_t *idx_p; + uint32_t hash_key, *idx_p; i_assert(idx != 0 || key != NULL); if (key != NULL) { - hash_idx = hash->hash_cb(key) % hash->hdr->hash_size; + hash_idx = hash->key_hash_cb(key) % hash->hdr->hash_size; for (idx_p = &hash->hash_base[hash_idx]; *idx_p != 0; ) { if (*idx_p > hash->hdr->record_count) { mail_hash_set_corrupted(hash, @@ -881,6 +910,15 @@ hash->hdr->message_count--; } + hash_key = hash->rec_hash_cb(rec); + if (hash_key != 0) { + if (hash->hdr->hashed_count == 0) { + mail_hash_set_corrupted(hash, "Too low hashed_count"); + return -1; + } + hash->hdr->hashed_count--; + } + if (idx == hash->hdr->record_count) { hash->hdr->record_count--; } else { @@ -946,3 +984,77 @@ return mail_hash_update_header(hash, rec, had_uid); } + +int mail_hash_resize_if_needed(struct mail_hash *hash, unsigned int grow_count) +{ + struct mail_hash *tmp_hash; + const struct mail_hash_record *rec; + const char *tmp_filename; + uint32_t hash_key, idx, new_idx; + float nodes_per_list; + int ret = 0; + + if (MAIL_HASH_IS_IN_MEMORY(hash)) + return 0; + + i_assert(hash->locked); + + nodes_per_list = (float)(hash->hdr->hashed_count + grow_count) / + (float)hash->hdr->hash_size; + if ((nodes_per_list > 0.3 && nodes_per_list < 2.0) || + hash->hdr->hash_size <= MAIL_HASH_MIN_SIZE) + return 0; + + /* create a temporary hash */ + tmp_hash = mail_hash_open(hash->index, + t_strconcat(hash->suffix, ".tmp", NULL), + MAIL_HASH_OPEN_FLAG_CREATE, + hash->record_size, + hash->hdr->hashed_count + grow_count, + hash->key_hash_cb, + hash->rec_hash_cb, + hash->key_compare_cb, + hash->cb_context); + if (tmp_hash == NULL) + return -1; + + /* populate */ + for (idx = 1; idx <= hash->hdr->record_count; idx++) { + rec = HASH_RECORD_IDX(hash, idx); + hash_key = hash->rec_hash_cb(rec); + + if (mail_hash_insert_with_hash(tmp_hash, rec, hash_key, + &new_idx) < 0) { + ret = -1; + break; + } + i_assert(idx == new_idx); + } + (void)mail_hash_file_write_changes(tmp_hash); + + tmp_filename = t_strdup(tmp_hash->filepath); + mail_hash_free(&tmp_hash); + if (ret < 0) { + (void)unlink(tmp_filename); + return -1; + } + + /* replace the old */ + if (rename(tmp_filename, hash->filepath) < 0) { + mail_hash_set_syscall_error(hash, "rename()"); + (void)unlink(tmp_filename); + return -1; + } + + /* reopen the hash */ + mail_hash_file_close(hash); + if ((ret = mail_hash_file_open(hash, TRUE)) < 0) + return -1; + if (ret == 0) { + mail_hash_set_corrupted(hash, + "Newly created hash file broken"); + return -1; + } + + return 0; +}
--- a/src/lib-index/mail-hash.h Sun Nov 26 00:17:39 2006 +0200 +++ b/src/lib-index/mail-hash.h Sun Nov 26 17:20:11 2006 +0200 @@ -30,6 +30,8 @@ uint32_t record_count; /* Number of message records (records with non-zero UID) */ uint32_t message_count; + /* Number of messages with non-zero hash */ + uint32_t hashed_count; /* UID validity. */ uint32_t uid_validity; @@ -57,21 +59,24 @@ MAIL_HASH_OPEN_FLAG_IN_MEMORY = 0x02 }; -/* Returns hash code. */ -typedef unsigned int hash_ctx_callback_t(const void *p, void *context); /* Returns 0 if the pointers are equal. */ typedef bool hash_ctx_cmp_callback_t(const void *key, const void *data, void *context); struct mail_hash * mail_hash_open(struct mail_index *index, const char *suffix, - enum mail_hash_open_flags flags, - unsigned int record_size, hash_callback_t *hash_cb, + enum mail_hash_open_flags flags, unsigned int record_size, + unsigned int initial_count, + hash_callback_t *key_hash_cb, + hash_callback_t *rec_hash_cb, hash_ctx_cmp_callback_t *key_compare_cb, void *context); void mail_hash_free(struct mail_hash **hash); -int mail_hash_reset(struct mail_hash *hash); +/* If reset or resize fails, the hash file is closed and the hash is in + unusable state until mail_hash_lock() succeeds. */ +int mail_hash_reset(struct mail_hash *hash, unsigned int initial_count); +int mail_hash_resize_if_needed(struct mail_hash *hash, unsigned int grow_count); /* Lock hash file. Returns 1 if we locked the file, 0 if timeouted or hash is in memory, -1 if error. */ @@ -83,7 +88,8 @@ int mail_hash_lookup(struct mail_hash *hash, const void *key, const void **value_r, uint32_t *idx_r); /* Remember that inserting may cause existing returned values to be - invalidated */ + invalidated. If key=NULL, it's not inserted into hash table. Note that + hash=0 equals to key=NULL insert, so a valid hash value must never be 0. */ int mail_hash_insert(struct mail_hash *hash, const void *key, const void *value, uint32_t *idx_r); int mail_hash_remove(struct mail_hash *hash, const void *key);