Mercurial > dovecot > original-hg > dovecot-1.2
changeset 4878:88a91d9a867b HEAD
Fixes. Should be pretty much working now.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Wed, 06 Dec 2006 17:45:46 +0200 |
parents | 63d3b76e44fe |
children | b0ada6e57b07 |
files | src/plugins/fts-squat/fts-backend-squat.c src/plugins/fts-squat/squat-test.c src/plugins/fts-squat/squat-trie.c src/plugins/fts-squat/squat-trie.h src/plugins/fts-squat/squat-uidlist.c src/plugins/fts-squat/squat-uidlist.h |
diffstat | 6 files changed, 460 insertions(+), 191 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts-squat/fts-backend-squat.c Wed Dec 06 17:45:15 2006 +0200 +++ b/src/plugins/fts-squat/fts-backend-squat.c Wed Dec 06 17:45:46 2006 +0200 @@ -2,7 +2,7 @@ #include "lib.h" #include "array.h" -#include "mail-storage.h" +#include "mail-storage-private.h" #include "mail-search.h" #include "squat-trie.h" #include "fts-squat-plugin.h" @@ -12,8 +12,11 @@ struct squat_fts_backend { struct fts_backend backend; struct squat_trie *trie; +}; - uint32_t last_uid; +struct squat_fts_backend_build_context { + struct fts_backend_build_context ctx; + struct squat_trie_build_context *trie_ctx; }; static struct fts_backend *fts_backend_squat_init(struct mailbox *box) @@ -33,7 +36,8 @@ backend = i_new(struct squat_fts_backend, 1); backend->backend = fts_backend_squat; backend->trie = - squat_trie_open(t_strconcat(path, "/"SQUAT_FILE_PREFIX, NULL)); + squat_trie_open(t_strconcat(path, "/"SQUAT_FILE_PREFIX, NULL), + storage->lock_method); return &backend->backend; } @@ -52,8 +56,7 @@ struct squat_fts_backend *backend = (struct squat_fts_backend *)_backend; - *last_uid_r = backend->last_uid; - return 0; + return squat_trie_get_last_uid(backend->trie, last_uid_r); } static struct fts_backend_build_context * @@ -61,38 +64,35 @@ { struct squat_fts_backend *backend = (struct squat_fts_backend *)_backend; - struct fts_backend_build_context *ctx; - - *last_uid_r = backend->last_uid; + struct squat_fts_backend_build_context *ctx; - ctx = i_new(struct fts_backend_build_context, 1); - ctx->backend = _backend; - return ctx; + ctx = i_new(struct squat_fts_backend_build_context, 1); + ctx->ctx.backend = _backend; + ctx->trie_ctx = squat_trie_build_init(backend->trie, last_uid_r); + return &ctx->ctx; } static int -fts_backend_squat_build_more(struct fts_backend_build_context *ctx, +fts_backend_squat_build_more(struct fts_backend_build_context *_ctx, uint32_t uid, const unsigned char *data, size_t size) { - struct squat_fts_backend *backend = - (struct squat_fts_backend *)ctx->backend; + struct squat_fts_backend_build_context *ctx = + (struct squat_fts_backend_build_context *)_ctx; - i_assert(uid >= backend->last_uid); - backend->last_uid = uid; - - return squat_trie_add(backend->trie, uid, data, size); + return squat_trie_build_more(ctx->trie_ctx, uid, data, size); } static int -fts_backend_squat_build_deinit(struct fts_backend_build_context *ctx) +fts_backend_squat_build_deinit(struct fts_backend_build_context *_ctx) { - struct squat_fts_backend *backend = - (struct squat_fts_backend *)ctx->backend; + struct squat_fts_backend_build_context *ctx = + (struct squat_fts_backend_build_context *)_ctx; + int ret; - squat_trie_flush(backend->trie); + ret = squat_trie_build_deinit(ctx->trie_ctx); i_free(ctx); - return 0; + return ret; } static void @@ -152,6 +152,22 @@ t_pop(); } +static int fts_backend_squat_lock(struct fts_backend *_backend) +{ + struct squat_fts_backend *backend = + (struct squat_fts_backend *)_backend; + + return squat_trie_lock(backend->trie, F_RDLCK); +} + +static void fts_backend_squat_unlock(struct fts_backend *_backend) +{ + struct squat_fts_backend *backend = + (struct squat_fts_backend *)_backend; + + squat_trie_unlock(backend->trie); +} + static int fts_backend_squat_lookup(struct fts_backend *_backend, const char *key, ARRAY_TYPE(seq_range) *result) @@ -185,6 +201,8 @@ fts_backend_squat_build_deinit, fts_backend_squat_expunge, fts_backend_squat_expunge_finish, + fts_backend_squat_lock, + fts_backend_squat_unlock, fts_backend_squat_lookup, fts_backend_squat_filter }
--- a/src/plugins/fts-squat/squat-test.c Wed Dec 06 17:45:15 2006 +0200 +++ b/src/plugins/fts-squat/squat-test.c Wed Dec 06 17:45:46 2006 +0200 @@ -2,6 +2,7 @@ #include "lib.h" #include "array.h" +#include "file-lock.h" #include "istream.h" #include "squat-trie.h" #include "squat-uidlist.h" @@ -31,12 +32,14 @@ int main(int argc __attr_unused__, char *argv[]) { struct squat_trie *trie; + struct squat_trie_build_context *build_ctx; struct istream *input; ARRAY_TYPE(seq_range) result; char *line, *str, buf[4096]; int fd; ssize_t ret; unsigned int last = 0, seq = 0, leaves, uid_lists_mem, uid_lists_count; + uint32_t last_uid; size_t mem; clock_t clock_start, clock_end; struct timeval tv_start, tv_end; @@ -45,7 +48,8 @@ lib_init(); (void)unlink("/tmp/squat-test-index.search"); (void)unlink("/tmp/squat-test-index.search.uids"); - trie = squat_trie_open("/tmp/squat-test-index.search"); + trie = squat_trie_open("/tmp/squat-test-index.search", + FILE_LOCK_METHOD_FCNTL); clock_start = clock(); gettimeofday(&tv_start, NULL); @@ -54,6 +58,7 @@ if (fd == -1) return 1; + build_ctx = squat_trie_build_init(trie, &last_uid); input = i_stream_create_file(fd, default_pool, 0, FALSE); while ((line = i_stream_read_next_line(input)) != NULL) { if (last != input->v_offset/(1024*100)) { @@ -66,10 +71,11 @@ continue; } - if (squat_trie_add(trie, seq, line, strlen(line)) < 0) + if (squat_trie_build_more(build_ctx, seq, + line, strlen(line)) < 0) break; } - squat_trie_flush(trie); + squat_trie_build_deinit(build_ctx); clock_end = clock(); gettimeofday(&tv_end, NULL);
--- a/src/plugins/fts-squat/squat-trie.c Wed Dec 06 17:45:15 2006 +0200 +++ b/src/plugins/fts-squat/squat-trie.c Wed Dec 06 17:45:46 2006 +0200 @@ -3,6 +3,7 @@ #include "lib.h" #include "array.h" #include "bsearch-insert-pos.h" +#include "file-lock.h" #include "istream.h" #include "ostream.h" #include "mmap-util.h" @@ -15,14 +16,17 @@ #include <fcntl.h> #include <ctype.h> +#define TRIE_COMPRESS_PERCENTAGE 30 +#define TRIE_COMPRESS_MIN_SIZE (1024*50) + +#define SQUAT_TRIE_VERSION 1 +#define SQUAT_TRIE_LOCK_TIMEOUT 60 + /* for non-x86 use memcpy() when accessing unaligned int* addresses */ #if defined(__i386__) || defined(__x86_64__) # define ALLOW_UNALIGNED_ACCESS #endif -#define TRIE_COMPRESS_PERCENTAGE 30 -#define TRIE_COMPRESS_MIN_SIZE (1024*50) - #define BLOCK_SIZE 4 #define ALIGN(size) \ @@ -34,6 +38,11 @@ dev_t dev; ino_t ino; + enum file_lock_method lock_method; + struct file_lock *file_lock; + int lock_count; + int lock_type; /* F_RDLCK / F_WRLCK */ + void *mmap_base; size_t mmap_size; @@ -42,22 +51,42 @@ struct squat_uidlist *uidlist; struct trie_node *root; buffer_t *buf; - unsigned int deleted_space; + + unsigned int corrupted:1; +}; + +struct squat_trie_build_context { + struct squat_trie *trie; + struct ostream *output; - struct squat_uidlist_compress_ctx *compress_ctx; uint32_t prev_uid; unsigned int prev_added_size; uint16_t prev_added[BLOCK_SIZE-1]; unsigned int node_count; + unsigned int deleted_space; - unsigned int corrupted:1; + unsigned int failed:1; + unsigned int locked:1; +}; + +struct squat_trie_compress_context { + struct squat_trie *trie; + + const char *tmp_path; + struct ostream *output; + + struct squat_uidlist_compress_ctx *uidlist_ctx; + + unsigned int node_count; }; struct squat_trie_header { + uint8_t version; + uint8_t unused[3]; + uint32_t uidvalidity; - uint32_t header_size; uint32_t used_file_size; uint32_t deleted_space; uint32_t node_count; @@ -113,10 +142,11 @@ ALIGN(sizeof(uint16_t) * ((node)->chars_16bit_count))) static int -squat_trie_compress_node(struct squat_trie *trie, struct trie_node *node, - unsigned int level); -static int trie_write_node(struct squat_trie *trie, unsigned int level, - struct trie_node *node); +squat_trie_compress_node(struct squat_trie_compress_context *ctx, + struct trie_node *node, unsigned int level); +static int trie_write_node(struct squat_trie_build_context *ctx, + unsigned int level, struct trie_node *node); +static int squat_trie_build_flush(struct squat_trie_build_context *ctx); static int chr_8bit_cmp(const void *_key, const void *_chr) { @@ -330,6 +360,9 @@ static void trie_close(struct squat_trie *trie) { + if (trie->file_lock != NULL) + file_lock_free(&trie->file_lock); + if (trie->mmap_base != NULL) { if (munmap(trie->mmap_base, trie->mmap_size) < 0) squat_trie_set_syscall_error(trie, "munmap()"); @@ -347,10 +380,31 @@ trie->corrupted = FALSE; } +static int trie_map_check_header(struct squat_trie *trie, + const struct squat_trie_header *hdr) +{ + if (hdr->version != SQUAT_TRIE_VERSION) + return -1; + + if (hdr->used_file_size > trie->mmap_size) { + squat_trie_set_corrupted(trie, "used_file_size too large"); + return -1; + } + if (hdr->root_offset > trie->mmap_size || + hdr->root_offset < sizeof(*hdr)) { + squat_trie_set_corrupted(trie, "invalid root_offset"); + return -1; + } + + return 0; +} + static int trie_map(struct squat_trie *trie) { struct stat st; + i_assert(trie->lock_count > 0); + if (fstat(trie->fd, &st) < 0) { squat_trie_set_syscall_error(trie, "fstat()"); return -1; @@ -374,11 +428,13 @@ } trie->hdr = trie->mmap_base; + if (trie_map_check_header(trie, trie->hdr) < 0) + return -1; + if (trie_map_node(trie, trie->hdr->root_offset, 1, &trie->root) < 0) { trie_close(trie); return 0; } - trie->node_count = trie->hdr->node_count; return 1; } @@ -396,7 +452,8 @@ return trie_map(trie); } -struct squat_trie *squat_trie_open(const char *path) +struct squat_trie * +squat_trie_open(const char *path, enum file_lock_method lock_method) { struct squat_trie *trie; const char *uidlist_path; @@ -404,9 +461,12 @@ trie = i_new(struct squat_trie, 1); trie->fd = -1; trie->filepath = i_strdup(path); + trie->lock_method = lock_method; + trie->buf = buffer_create_dynamic(default_pool, 1024); + uidlist_path = t_strconcat(path, ".uids", NULL); trie->uidlist = squat_uidlist_init(trie, uidlist_path); - trie->buf = buffer_create_dynamic(default_pool, 1024); + (void)trie_open(trie); return trie; } @@ -419,6 +479,47 @@ i_free(trie); } +int squat_trie_get_last_uid(struct squat_trie *trie, uint32_t *uid_r) +{ + return squat_uidlist_get_last_uid(trie->uidlist, uid_r); +} + +int squat_trie_lock(struct squat_trie *trie, int lock_type) +{ + int ret; + + i_assert(lock_type == F_RDLCK || lock_type == F_WRLCK); + + if (trie->lock_count > 0) { + /* read lock -> write lock would deadlock */ + i_assert(trie->lock_type == lock_type || lock_type == F_RDLCK); + + trie->lock_count++; + return 1; + } + + i_assert(trie->file_lock == NULL); + ret = file_wait_lock(trie->fd, trie->filepath, lock_type, + trie->lock_method, SQUAT_TRIE_LOCK_TIMEOUT, + &trie->file_lock); + if (ret <= 0) + return ret; + + trie->lock_count++; + trie->lock_type = lock_type; + return 1; +} + +void squat_trie_unlock(struct squat_trie *trie) +{ + i_assert(trie->lock_count > 0); + + if (--trie->lock_count > 0) + return; + + file_unlock(&trie->file_lock); +} + static struct trie_node * node_alloc(uint16_t chr, unsigned int level) { @@ -528,9 +629,11 @@ } static int -trie_insert_node(struct squat_trie *trie, struct trie_node **parent, +trie_insert_node(struct squat_trie_build_context *ctx, + struct trie_node **parent, const uint16_t *data, uint32_t uid, unsigned int level) { + struct squat_trie *trie = ctx->trie; struct trie_node *node = *parent; uint32_t char_idx, idx_base_offset; bool modified = FALSE; @@ -540,7 +643,7 @@ unsigned int count; if (node == NULL) { - trie->node_count++; + ctx->node_count++; node = *parent = node_alloc(*data, level); char_idx = 0; count = 1; @@ -568,7 +671,7 @@ unsigned int count; if (node == NULL) { - trie->node_count++; + ctx->node_count++; node = *parent = node_alloc(*data, level); char_idx = 0; count = 1; @@ -609,7 +712,7 @@ &children[char_idx]) < 0) return -1; } - ret = trie_insert_node(trie, &children[char_idx], + ret = trie_insert_node(ctx, &children[char_idx], data + 1, uid, level + 1); if (ret < 0) return -1; @@ -705,28 +808,69 @@ return TRUE; } -int squat_trie_add(struct squat_trie *trie, uint32_t uid, - const void *data, size_t size) +struct squat_trie_build_context * +squat_trie_build_init(struct squat_trie *trie, uint32_t *last_uid_r) +{ + struct squat_trie_build_context *ctx; + + ctx = i_new(struct squat_trie_build_context, 1); + ctx->trie = trie; + + if (squat_trie_lock(trie, F_WRLCK) <= 0) + ctx->failed = TRUE; + else { + ctx->locked = TRUE; + ctx->node_count = trie->hdr->node_count; + + if (squat_uidlist_get_last_uid(trie->uidlist, last_uid_r) < 0) + ctx->failed = TRUE; + } + + if (ctx->failed) + *last_uid_r = 0; + return ctx; +} + +int squat_trie_build_deinit(struct squat_trie_build_context *ctx) +{ + int ret = ctx->failed ? -1 : 0; + + if (ret == 0) + ret = squat_trie_build_flush(ctx); + + if (ctx->locked) + squat_trie_unlock(ctx->trie); + + i_free(ctx); + return ret; +} + +int squat_trie_build_more(struct squat_trie_build_context *ctx, uint32_t uid, + const void *data, size_t size) { const uint16_t *str; uint16_t buf[(BLOCK_SIZE-1)*2]; unsigned int i, tmp_size; + if (ctx->failed) + return -1; + t_push(); - str = data_normalize(data, size, trie->buf); + str = data_normalize(data, size, ctx->trie->buf); - if (uid == trie->prev_uid) { + if (uid == ctx->prev_uid) { /* @UNSAFE: continue from last block */ - memcpy(buf, trie->prev_added, - sizeof(buf[0]) * trie->prev_added_size); + memcpy(buf, ctx->prev_added, + sizeof(buf[0]) * ctx->prev_added_size); tmp_size = I_MIN(size, BLOCK_SIZE-1); - memcpy(buf + trie->prev_added_size, str, + memcpy(buf + ctx->prev_added_size, str, sizeof(buf[0]) * tmp_size); - tmp_size += trie->prev_added_size; + tmp_size += ctx->prev_added_size; for (i = 0; i + BLOCK_SIZE <= tmp_size; i++) { if (block_want_add(buf+i)) { - if (trie_insert_node(trie, &trie->root, + if (trie_insert_node(ctx, + &ctx->trie->root, buf + i, uid, 1) < 0) { t_pop(); return -1; @@ -735,9 +879,9 @@ } if (size < BLOCK_SIZE) { - trie->prev_added_size = I_MIN(tmp_size, BLOCK_SIZE-1); - memcpy(trie->prev_added, buf + i, - sizeof(buf[0]) * trie->prev_added_size); + ctx->prev_added_size = I_MIN(tmp_size, BLOCK_SIZE-1); + memcpy(ctx->prev_added, buf + i, + sizeof(buf[0]) * ctx->prev_added_size); t_pop(); return 0; } @@ -745,7 +889,7 @@ for (i = 0; i + BLOCK_SIZE <= size; i++) { if (block_want_add(str+i)) { - if (trie_insert_node(trie, &trie->root, + if (trie_insert_node(ctx, &ctx->trie->root, str + i, uid, 1) < 0) { t_pop(); return -1; @@ -753,9 +897,9 @@ } } - trie->prev_added_size = I_MIN(size, BLOCK_SIZE-1); - memcpy(trie->prev_added, str + i, - sizeof(trie->prev_added[0]) * trie->prev_added_size); + ctx->prev_added_size = I_MIN(size, BLOCK_SIZE-1); + memcpy(ctx->prev_added, str + i, + sizeof(ctx->prev_added[0]) * ctx->prev_added_size); t_pop(); return 0; } @@ -815,31 +959,32 @@ return 0; } -static void node_pack_leaf(struct squat_trie *trie, struct trie_node *node) +static void node_pack_leaf(buffer_t *buf, struct trie_node *node) { uint8_t *chars8 = NODE_CHARS8(node); uint16_t *chars16 = NODE_CHARS16(node); uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node); uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node); - buffer_set_used_size(trie->buf, 0); - _squat_trie_pack_num(trie->buf, (node->chars_8bit_count << 1) | + buffer_set_used_size(buf, 0); + _squat_trie_pack_num(buf, (node->chars_8bit_count << 1) | (node->chars_16bit_count > 0 ? 1 : 0)); - buffer_append(trie->buf, chars8, node->chars_8bit_count); - buffer_append(trie->buf, idx8, sizeof(*idx8) * node->chars_8bit_count); + buffer_append(buf, chars8, node->chars_8bit_count); + buffer_append(buf, idx8, sizeof(*idx8) * node->chars_8bit_count); if (node->chars_16bit_count > 0) { - _squat_trie_pack_num(trie->buf, node->chars_16bit_count); - buffer_append(trie->buf, chars16, + _squat_trie_pack_num(buf, node->chars_16bit_count); + buffer_append(buf, chars16, sizeof(*chars16) * node->chars_16bit_count); - buffer_append(trie->buf, idx16, + buffer_append(buf, idx16, sizeof(*idx16) * node->chars_16bit_count); } } static int -trie_write_node_children(struct squat_trie *trie, unsigned int level, - struct trie_node **children, unsigned int count) +trie_write_node_children(struct squat_trie_build_context *ctx, + unsigned int level, struct trie_node **children, + unsigned int count) { unsigned int i; size_t child_idx; @@ -847,63 +992,66 @@ for (i = 0; i < count; i++) { child_idx = POINTER_CAST_TO(children[i], size_t); if ((child_idx & 1) == 0) { - if (trie_write_node(trie, level, children[i]) < 0) + if (trie_write_node(ctx, level, children[i]) < 0) return -1; } } return 0; } -static int trie_write_node(struct squat_trie *trie, unsigned int level, - struct trie_node *node) +static int trie_write_node(struct squat_trie_build_context *ctx, + unsigned int level, struct trie_node *node) { + struct squat_trie *trie = ctx->trie; uoff_t offset; if (level < BLOCK_SIZE) { struct trie_node **children8 = NODE_CHILDREN8(node); struct trie_node **children16 = NODE_CHILDREN16(node); - trie_write_node_children(trie, level + 1, + trie_write_node_children(ctx, level + 1, children8, node->chars_8bit_count); - trie_write_node_children(trie, level + 1, + trie_write_node_children(ctx, level + 1, children16, node->chars_16bit_count); node_pack(trie->buf, node); } else { if (node_leaf_finish(trie, node) < 0) return -1; - node_pack_leaf(trie, node); + node_pack_leaf(trie->buf, node); } - offset = trie->output->offset; + offset = ctx->output->offset; if ((offset & 1) != 0) { - o_stream_send(trie->output, "", 1); + o_stream_send(ctx->output, "", 1); offset++; } if (node->resized && node->orig_size != trie->buf->used) { /* append to end of file. the parent node is written later. */ node->file_offset = offset; - o_stream_send(trie->output, trie->buf->data, trie->buf->used); + o_stream_send(ctx->output, trie->buf->data, trie->buf->used); - trie->deleted_space += node->orig_size; + ctx->deleted_space += node->orig_size; } else if (node->modified) { /* overwrite node's contents */ i_assert(node->file_offset != 0); i_assert(trie->buf->used <= node->orig_size); /* FIXME: write only the indexes if !node->resized */ - o_stream_seek(trie->output, node->file_offset); - o_stream_send(trie->output, trie->buf->data, trie->buf->used); - o_stream_seek(trie->output, offset); + o_stream_seek(ctx->output, node->file_offset); + o_stream_send(ctx->output, trie->buf->data, trie->buf->used); + o_stream_seek(ctx->output, offset); - trie->deleted_space += trie->buf->used - node->orig_size; + ctx->deleted_space += trie->buf->used - node->orig_size; } return 0; } -static int trie_nodes_write(struct squat_trie *trie, uint32_t *uidvalidity_r) +static int +trie_nodes_write(struct squat_trie_build_context *ctx, uint32_t *uidvalidity_r) { + struct squat_trie *trie = ctx->trie; struct squat_trie_header hdr; if (trie->fd == -1) { @@ -915,8 +1063,8 @@ } memset(&hdr, 0, sizeof(hdr)); + hdr.version = SQUAT_TRIE_VERSION; hdr.uidvalidity = 0; // FIXME - hdr.header_size = sizeof(hdr); } else { hdr = *trie->hdr; if (lseek(trie->fd, hdr.used_file_size, SEEK_SET) < 0) { @@ -925,23 +1073,23 @@ } } - trie->output = o_stream_create_file(trie->fd, default_pool, 0, FALSE); + ctx->output = o_stream_create_file(trie->fd, default_pool, 0, FALSE); if (hdr.used_file_size == 0) - o_stream_send(trie->output, &hdr, sizeof(hdr)); + o_stream_send(ctx->output, &hdr, sizeof(hdr)); - trie->deleted_space = 0; - if (trie_write_node(trie, 1, trie->root) < 0) + ctx->deleted_space = 0; + if (trie_write_node(ctx, 1, trie->root) < 0) return -1; /* update the header */ hdr.root_offset = trie->root->file_offset; - hdr.used_file_size = trie->output->offset; - hdr.deleted_space += trie->deleted_space; - hdr.node_count = trie->node_count; - o_stream_seek(trie->output, 0); - o_stream_send(trie->output, &hdr, sizeof(hdr)); + hdr.used_file_size = ctx->output->offset; + hdr.deleted_space += ctx->deleted_space; + hdr.node_count = ctx->node_count; + o_stream_seek(ctx->output, 0); + o_stream_send(ctx->output, &hdr, sizeof(hdr)); - o_stream_destroy(&trie->output); + o_stream_destroy(&ctx->output); *uidvalidity_r = hdr.uidvalidity; return 0; } @@ -963,8 +1111,9 @@ current_message_count); } -int squat_trie_flush(struct squat_trie *trie) +static int squat_trie_build_flush(struct squat_trie_build_context *ctx) { + struct squat_trie *trie = ctx->trie; uint32_t uidvalidity; if (trie->root == NULL) { @@ -972,7 +1121,7 @@ return 0; } - if (trie_nodes_write(trie, &uidvalidity) < 0) + if (trie_nodes_write(ctx, &uidvalidity) < 0) return -1; if (squat_uidlist_flush(trie->uidlist, uidvalidity) < 0) return -1; @@ -1031,7 +1180,7 @@ } static int -squat_trie_compress_children(struct squat_trie *trie, +squat_trie_compress_children(struct squat_trie_compress_context *ctx, struct trie_node **children, unsigned int count, unsigned int level) { @@ -1046,10 +1195,10 @@ i_assert((child_idx & 1) != 0); child_idx &= ~1; - if (trie_map_node(trie, child_idx, level, &child_node) < 0) + if (trie_map_node(ctx->trie, child_idx, level, &child_node) < 0) return -1; - ret = squat_trie_compress_node(trie, child_node, level); + ret = squat_trie_compress_node(ctx, child_node, level); if (child_node->file_offset != 0) children[i] = POINTER_CAST(child_node->file_offset | 1); else { @@ -1064,8 +1213,9 @@ return need_char_compress ? 0 : 1; } -static int squat_trie_compress_leaf_uidlist(struct squat_trie *trie, - struct trie_node *node) +static int +squat_trie_compress_leaf_uidlist(struct squat_trie_compress_context *ctx, + struct trie_node *node) { uint32_t *idx8 = (uint32_t *)NODE_CHILDREN8(node); uint32_t *idx16 = (uint32_t *)NODE_CHILDREN16(node); @@ -1074,7 +1224,7 @@ bool compress_chars = FALSE; for (i = 0; i < node->chars_8bit_count; i++) { - ret = squat_uidlist_compress_next(trie->compress_ctx, &idx8[i]); + ret = squat_uidlist_compress_next(ctx->uidlist_ctx, &idx8[i]); if (ret < 0) return -1; if (ret == 0) { @@ -1087,8 +1237,7 @@ compress_chars = FALSE; } for (i = 0; i < node->chars_16bit_count; i++) { - ret = squat_uidlist_compress_next(trie->compress_ctx, - &idx16[i]); + ret = squat_uidlist_compress_next(ctx->uidlist_ctx, &idx16[i]); if (ret < 0) return -1; if (ret == 0) { @@ -1104,34 +1253,35 @@ } static int -squat_trie_compress_node(struct squat_trie *trie, struct trie_node *node, - unsigned int level) +squat_trie_compress_node(struct squat_trie_compress_context *ctx, + struct trie_node *node, unsigned int level) { + struct squat_trie *trie = ctx->trie; int ret; if (level == BLOCK_SIZE) { - if (squat_trie_compress_leaf_uidlist(trie, node)) + if (squat_trie_compress_leaf_uidlist(ctx, node)) return -1; if (node->chars_8bit_count == 0 && node->chars_16bit_count == 0) { /* everything expunged */ - trie->node_count--; + ctx->node_count--; node->file_offset = 0; return 0; } - node_pack_leaf(trie, node); + node_pack_leaf(trie->buf, node); } else { struct trie_node **children8 = NODE_CHILDREN8(node); struct trie_node **children16 = NODE_CHILDREN16(node); - if ((ret = squat_trie_compress_children(trie, children8, + if ((ret = squat_trie_compress_children(ctx, children8, node->chars_8bit_count, level + 1)) < 0) return -1; if (ret == 0) squat_trie_compress_chars8(node); - if ((ret = squat_trie_compress_children(trie, children16, + if ((ret = squat_trie_compress_children(ctx, children16, node->chars_16bit_count, level + 1)) < 0) return -1; @@ -1141,80 +1291,104 @@ if (node->chars_8bit_count == 0 && node->chars_16bit_count == 0) { /* everything expunged */ - trie->node_count--; + ctx->node_count--; node->file_offset = 0; return 0; } node_pack(trie->buf, node); } - if ((trie->output->offset & 1) != 0) - o_stream_send(trie->output, "", 1); - node->file_offset = trie->output->offset; + if ((ctx->output->offset & 1) != 0) + o_stream_send(ctx->output, "", 1); + node->file_offset = ctx->output->offset; + + o_stream_send(ctx->output, trie->buf->data, trie->buf->used); + return 0; +} + +static int squat_trie_compress_init(struct squat_trie_compress_context *ctx, + struct squat_trie *trie) +{ + struct squat_trie_header hdr; + int fd; + + ctx->tmp_path = t_strconcat(trie->filepath, ".tmp", NULL); + fd = open(ctx->tmp_path, O_RDWR | O_CREAT | O_TRUNC, 0600); + if (fd == -1) { + i_error("open(%s, O_CREAT) failed: %m", ctx->tmp_path); + return -1; + } - o_stream_send(trie->output, trie->buf->data, trie->buf->used); + memset(ctx, 0, sizeof(*ctx)); + ctx->trie = trie; + ctx->output = o_stream_create_file(fd, default_pool, 0, TRUE); + ctx->node_count = trie->hdr->node_count; + + /* write a dummy header first */ + memset(&hdr, 0, sizeof(hdr)); + o_stream_send(ctx->output, &hdr, sizeof(hdr)); return 0; } +static void +squat_trie_compress_write_header(struct squat_trie_compress_context *ctx, + struct trie_node *root_node) +{ + struct squat_trie_header hdr; + + memset(&hdr, 0, sizeof(hdr)); + hdr.version = SQUAT_TRIE_VERSION; + hdr.uidvalidity = ctx->trie->hdr->uidvalidity; + hdr.root_offset = root_node->file_offset; + hdr.used_file_size = ctx->output->offset; + hdr.node_count = ctx->node_count; + + o_stream_seek(ctx->output, 0); + o_stream_send(ctx->output, &hdr, sizeof(hdr)); +} + int squat_trie_compress(struct squat_trie *trie, const ARRAY_TYPE(seq_range) *existing_uids) { + struct squat_trie_compress_context ctx; struct trie_node *node; - struct squat_trie_header hdr; - const char *tmp_path; - int fd, ret; + int ret; - if (trie_map_node(trie, trie->hdr->root_offset, 1, &node) < 0) + if (squat_trie_lock(trie, F_WRLCK) <= 0) return -1; - tmp_path = t_strconcat(trie->filepath, ".tmp", NULL); - fd = open(tmp_path, O_RDWR | O_CREAT | O_TRUNC, 0600); - if (fd == -1) { - i_error("open(%s, O_CREAT) failed: %m", tmp_path); - i_free(node); + if (squat_trie_compress_init(&ctx, trie) < 0) { + squat_trie_unlock(trie); return -1; } - i_assert(trie->output == NULL); - trie->output = o_stream_create_file(fd, default_pool, 0, TRUE); - - /* write a dummy header first */ - memset(&hdr, 0, sizeof(hdr)); - o_stream_send(trie->output, &hdr, sizeof(hdr)); - - /* do the compression */ - trie->compress_ctx = - squat_uidlist_compress_begin(trie->uidlist, existing_uids); - ret = squat_trie_compress_node(trie, node, 1); + ret = trie_map_node(trie, trie->hdr->root_offset, 1, &node); + if (ret == 0) { + /* do the compression */ + ctx.uidlist_ctx = squat_uidlist_compress_begin(trie->uidlist, + existing_uids); + if ((ret = squat_trie_compress_node(&ctx, node, 1)) < 0) + squat_uidlist_compress_rollback(&ctx.uidlist_ctx); + else { + ret = squat_uidlist_compress_commit(&ctx.uidlist_ctx); - /* write the header */ - hdr.uidvalidity = trie->hdr->uidvalidity; - hdr.header_size = sizeof(hdr); - hdr.root_offset = node->file_offset; - hdr.used_file_size = trie->output->offset; - hdr.node_count = trie->node_count; - o_stream_seek(trie->output, 0); - o_stream_send(trie->output, &hdr, sizeof(hdr)); - - i_free(node); - - /* finish the compression */ - if (ret == 0) - ret = squat_uidlist_compress_commit(&trie->compress_ctx); - else - squat_uidlist_compress_rollback(&trie->compress_ctx); + squat_trie_compress_write_header(&ctx, node); + } + i_free(node); + } if (ret == 0) { - if (rename(tmp_path, trie->filepath) < 0) { + if (rename(ctx.tmp_path, trie->filepath) < 0) { i_error("rename(%s, %s) failed: %m", - tmp_path, trie->filepath); + ctx.tmp_path, trie->filepath); ret = -1; } } - o_stream_destroy(&trie->output); + o_stream_destroy(&ctx.output); + squat_trie_unlock(trie); if (ret < 0) - (void)unlink(tmp_path); + (void)unlink(ctx.tmp_path); else { trie_close(trie); if (trie_open(trie) <= 0) @@ -1245,17 +1419,37 @@ return trie->mmap_size; } -int squat_trie_lookup(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result, - const char *str) +static int squat_trie_lookup_init(struct squat_trie *trie, const char *str, + const uint16_t **data_r, unsigned int *len_r) { const uint16_t *data; unsigned int len = strlen(str); - uint32_t list; if (len < BLOCK_SIZE) return -1; data = data_normalize(str, len, trie->buf); + + /* skip the blocks that can't exist */ + while (!block_want_add(data + len - BLOCK_SIZE)) { + if (--len < BLOCK_SIZE) + return -1; + } + + if (squat_trie_lock(trie, F_RDLCK) <= 0) + return -1; + + *data_r = data; + *len_r = len; + return 0; +} + +static int +squat_trie_lookup_locked(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result, + const uint16_t *data, unsigned int len) +{ + uint32_t list; + list = trie_lookup_node(trie, trie->root, data + len - BLOCK_SIZE, 1); if (list == 0) return 0; @@ -1266,6 +1460,49 @@ } while (len > BLOCK_SIZE) { len--; + + if (!block_want_add(data + len - BLOCK_SIZE)) + continue; + + list = trie_lookup_node(trie, trie->root, + data + len - BLOCK_SIZE, 1); + if (list == 0) { + array_clear(result); + return 0; + } + if (squat_uidlist_filter(trie->uidlist, list, result) < 0) { + squat_trie_set_corrupted(trie, "uidlist offset broken"); + return -1; + } + } + return array_count(result) > 0 ? 1 : 0; +} + +int squat_trie_lookup(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result, + const char *str) +{ + const uint16_t *data; + unsigned int len; + int ret; + + if (squat_trie_lookup_init(trie, str, &data, &len) < 0) + return -1; + + ret = squat_trie_lookup_locked(trie, result, data, len); + squat_trie_unlock(trie); + return ret; +} + +static int +squat_trie_filter_locked(struct squat_trie *trie, ARRAY_TYPE(seq_range) *result, + const uint16_t *data, unsigned int len) +{ + uint32_t list; + + for (; len >= BLOCK_SIZE; len--) { + if (!block_want_add(data + len - BLOCK_SIZE)) + continue; + list = trie_lookup_node(trie, trie->root, data + len - BLOCK_SIZE, 1); if (list == 0) { @@ -1284,27 +1521,14 @@ const char *str) { const uint16_t *data; - unsigned int len = strlen(str); - uint32_t list; - - if (len < BLOCK_SIZE) - return -1; + unsigned int len; + int ret; - data = data_normalize(str, len, trie->buf); - while (len >= BLOCK_SIZE) { - list = trie_lookup_node(trie, trie->root, - data + len - BLOCK_SIZE, 1); - if (list == 0) { - array_clear(result); - return 0; - } - if (squat_uidlist_filter(trie->uidlist, list, result) < 0) { - squat_trie_set_corrupted(trie, "uidlist offset broken"); - return -1; - } - len--; - } - return array_count(result) > 0 ? 1 : 0; + if (squat_trie_lookup_init(trie, str, &data, &len) < 0) + return -1; + ret = squat_trie_filter_locked(trie, result, data, len); + squat_trie_unlock(trie); + return ret; } struct squat_uidlist *_squat_trie_get_uidlist(struct squat_trie *trie)
--- a/src/plugins/fts-squat/squat-trie.h Wed Dec 06 17:45:15 2006 +0200 +++ b/src/plugins/fts-squat/squat-trie.h Wed Dec 06 17:45:46 2006 +0200 @@ -1,14 +1,25 @@ #ifndef __SQUAT_TRIE_H #define __SQUAT_TRIE_H +enum file_lock_method; + #include "seq-range-array.h" -struct squat_trie *squat_trie_open(const char *path); +struct squat_trie * +squat_trie_open(const char *path, enum file_lock_method lock_method); void squat_trie_close(struct squat_trie *trie); -int squat_trie_add(struct squat_trie *trie, uint32_t uid, - const void *data, size_t size); -int squat_trie_flush(struct squat_trie *trie); +int squat_trie_get_last_uid(struct squat_trie *trie, uint32_t *last_uid_r); + +int squat_trie_lock(struct squat_trie *trie, int lock_type); +void squat_trie_unlock(struct squat_trie *trie); + +struct squat_trie_build_context * +squat_trie_build_init(struct squat_trie *trie, uint32_t *last_uid_r); +int squat_trie_build_more(struct squat_trie_build_context *ctx, uint32_t uid, + const void *data, size_t size); +int squat_trie_build_deinit(struct squat_trie_build_context *ctx); + int squat_trie_compress(struct squat_trie *trie, const ARRAY_TYPE(seq_range) *existing_uids);
--- a/src/plugins/fts-squat/squat-uidlist.c Wed Dec 06 17:45:15 2006 +0200 +++ b/src/plugins/fts-squat/squat-uidlist.c Wed Dec 06 17:45:46 2006 +0200 @@ -28,7 +28,8 @@ uint32_t uid_max; uint32_t uid_count; - uint32_t uids_expunged; + uint8_t uids_expunged; /* updated without locking */ + uint8_t unused[3]; uint32_t node_count; }; @@ -192,6 +193,12 @@ i_free(uidlist); } +int squat_uidlist_get_last_uid(struct squat_uidlist *uidlist, uint32_t *uid_r) +{ + *uid_r = uidlist->hdr.uid_max; + return 0; +} + int squat_uidlist_add(struct squat_uidlist *uidlist, uint32_t *_uid_list_idx, uint32_t uid) { @@ -518,14 +525,15 @@ int squat_uidlist_mark_having_expunges(struct squat_uidlist *uidlist, bool update_disk) { + uint8_t flag = 1; + size_t offset; + uidlist->check_expunges = TRUE; if (update_disk) { - uidlist->hdr.uids_expunged = TRUE; - - // FIXME: make sure uidlist.hdr is in updated state - if (pwrite_full(uidlist->fd, &uidlist->hdr, - sizeof(uidlist->hdr), 0) < 0) { + /* NOTE: we're writing this flag without locking */ + offset = offsetof(struct squat_uidlist_header, uids_expunged); + if (pwrite_full(uidlist->fd, &flag, sizeof(flag), offset) < 0) { squat_uidlist_set_syscall_error(uidlist, "pwrite_full()"); return -1;
--- a/src/plugins/fts-squat/squat-uidlist.h Wed Dec 06 17:45:15 2006 +0200 +++ b/src/plugins/fts-squat/squat-uidlist.h Wed Dec 06 17:45:46 2006 +0200 @@ -10,6 +10,8 @@ squat_uidlist_init(struct squat_trie *trie, const char *path); void squat_uidlist_deinit(struct squat_uidlist *uidlist); +int squat_uidlist_get_last_uid(struct squat_uidlist *uidlist, uint32_t *uid_r); + /* Add new UID to given UID list. The uid_list_idx is updated to contain the new list index. It must be put through _finish_list() before it's actually written to disk. */