Mercurial > dovecot > original-hg > dovecot-1.2
changeset 6905:8c779f3e16be HEAD
Fixes to FTS handling.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Mon, 03 Dec 2007 11:25:49 +0200 |
parents | 275d22eb25ba |
children | a201579e585d |
files | src/plugins/fts-squat/squat-trie-private.h src/plugins/fts-squat/squat-trie.c src/plugins/fts/fts-api.c src/plugins/fts/fts-storage.c src/plugins/fts/fts-storage.h |
diffstat | 5 files changed, 115 insertions(+), 91 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts-squat/squat-trie-private.h Mon Dec 03 11:02:44 2007 +0200 +++ b/src/plugins/fts-squat/squat-trie-private.h Mon Dec 03 11:25:49 2007 +0200 @@ -20,6 +20,10 @@ uint32_t root_unused_uids; uint32_t root_next_uid; uint32_t root_uidlist_idx; + + uint8_t partial_len; + uint8_t full_len; + uint8_t normalize_map[256]; }; /* @@ -119,7 +123,7 @@ void *mmap_base; size_t mmap_size; - unsigned char normalize_map[256]; + unsigned char default_normalize_map[256]; unsigned int corrupted:1; };
--- a/src/plugins/fts-squat/squat-trie.c Mon Dec 03 11:02:44 2007 +0200 +++ b/src/plugins/fts-squat/squat-trie.c Mon Dec 03 11:25:49 2007 +0200 @@ -17,10 +17,12 @@ #include <time.h> #include <sys/mman.h> +#define DEFAULT_NORMALIZE_MAP_CHARS \ + "EOTIRSACDNLMVUGPHBFWYXKJQZ0123456789@.-+#$%_&" +#define DEFAULT_PARTIAL_LEN 4 +#define DEFAULT_FULL_LEN 4 + #define MAX_FAST_LEVEL 3 -#define MAX_PARTIAL_LEN 4 -#define MAX_FULL_LEN 4 - #define SEQUENTIAL_COUNT 46 struct squat_trie_build_context { @@ -62,33 +64,34 @@ static void squat_trie_normalize_map_build(struct squat_trie *trie) { static unsigned char valid_chars[] = - "EOTIRSACDNLMVUGPHBFWYXKJQZ0123456789@.-+#$%_&"; + DEFAULT_NORMALIZE_MAP_CHARS; unsigned int i, j; - memset(trie->normalize_map, 0, sizeof(trie->normalize_map)); + memset(trie->default_normalize_map, 0, + sizeof(trie->default_normalize_map)); #if 1 for (i = 0, j = 1; i < sizeof(valid_chars)-1; i++) { unsigned char chr = valid_chars[i]; if (chr >= 'A' && chr <= 'Z') - trie->normalize_map[chr-'A'+'a'] = j; - trie->normalize_map[chr] = j++; + trie->default_normalize_map[chr-'A'+'a'] = j; + trie->default_normalize_map[chr] = j++; } i_assert(j <= SEQUENTIAL_COUNT); for (i = 128; i < 256; i++) - trie->normalize_map[i] = j++; + trie->default_normalize_map[i] = j++; #else for (i = 0; i < sizeof(valid_chars)-1; i++) { unsigned char chr = valid_chars[i]; if (chr >= 'A' && chr <= 'Z') - trie->normalize_map[chr-'A'+'a'] = chr; - trie->normalize_map[chr] = chr; + trie->default_normalize_map[chr-'A'+'a'] = chr; + trie->default_normalize_map[chr] = chr; } for (i = 128; i < 256; i++) - trie->normalize_map[i] = i_toupper(i); + trie->default_normalize_map[i] = i_toupper(i); #endif } @@ -113,6 +116,7 @@ static void squat_trie_clear(struct squat_trie *trie) { + trie->corrupted = FALSE; node_free(trie, &trie->root); memset(&trie->root, 0, sizeof(trie->root)); memset(&trie->hdr, 0, sizeof(trie->hdr)); @@ -166,6 +170,13 @@ trie->hdr.version = SQUAT_TRIE_VERSION; trie->hdr.indexid = ioloop_time; trie->hdr.uidvalidity = trie->uidvalidity; + trie->hdr.partial_len = DEFAULT_PARTIAL_LEN; + trie->hdr.full_len = DEFAULT_FULL_LEN; + + i_assert(sizeof(trie->hdr.normalize_map) == + sizeof(trie->default_normalize_map)); + memcpy(trie->hdr.normalize_map, trie->default_normalize_map, + sizeof(trie->hdr.normalize_map)); } static int squat_trie_open_fd(struct squat_trie *trie) @@ -694,16 +705,15 @@ squat_build_word(struct squat_trie *trie, uint32_t uid, const unsigned char *data, unsigned int size) { -#if MAX_PARTIAL_LEN > 0 unsigned int i; for (i = size - 1; i > 0; i--) { if (squat_build_add(trie, uid, data + i, - I_MIN(MAX_PARTIAL_LEN, size - i)) < 0) + I_MIN(trie->hdr.partial_len, size - i)) < 0) return -1; } -#endif - return squat_build_add(trie, uid, data, I_MIN(size, MAX_FULL_LEN)); + return squat_build_add(trie, uid, data, + I_MIN(size, trie->hdr.full_len)); } static unsigned char * @@ -715,7 +725,7 @@ dest = t_malloc(size); for (i = 0; i < size; i++) - dest[i] = trie->normalize_map[data[i]]; + dest[i] = trie->hdr.normalize_map[data[i]]; return dest; } @@ -862,7 +872,7 @@ ctx = i_new(struct squat_trie_iterate_context, 1); ctx->trie = trie; ctx->cur.node = &trie->root; - i_array_init(&ctx->parents, MAX_FULL_LEN*2); + i_array_init(&ctx->parents, trie->hdr.partial_len*2); return ctx; } @@ -981,6 +991,23 @@ return squat_uidlist_rebuild_finish(ctx, ret < 0); } +static bool squat_trie_check_header(struct squat_trie *trie) +{ + if (trie->hdr.version != SQUAT_TRIE_VERSION || + trie->hdr.uidvalidity != trie->uidvalidity) + return FALSE; + + if (trie->hdr.partial_len > trie->hdr.full_len) { + i_error("Corrupted %s: partial len > full len", trie->path); + return FALSE; + } + if (trie->hdr.full_len == 0) { + i_error("Corrupted %s: full len=0", trie->path); + return FALSE; + } + return TRUE; +} + static int squat_trie_map_header(struct squat_trie *trie) { if (trie->locked_file_size == 0) { @@ -1008,8 +1035,7 @@ if (trie->hdr.root_offset == 0) return 0; - if (trie->hdr.version != SQUAT_TRIE_VERSION || - trie->hdr.uidvalidity != trie->uidvalidity) { + if (!squat_trie_check_header(trie)) { squat_trie_delete(trie); squat_trie_close(trie); squat_trie_header_init(trie); @@ -1082,7 +1108,7 @@ ctx->trie = trie; ctx->first_uid = trie->root.next_uid; - *last_uid_r = I_MAX(trie->root.next_uid/2, 1) - 1; + *last_uid_r = I_MAX((trie->root.next_uid+1)/2, 1) - 1; *ctx_r = ctx; return 0; } @@ -1131,6 +1157,8 @@ ret = squat_write_nodes(ctx); ctx->output = NULL; + if (trie->corrupted) + ret = -1; if (ret == 0) { trie->hdr.used_file_size = output->offset; o_stream_seek(output, 0); @@ -1194,7 +1222,7 @@ return -1; } - *last_uid_r = I_MAX(trie->root.next_uid/2, 1) - 1; + *last_uid_r = I_MAX((trie->root.next_uid+1)/2, 1) - 1; return 0; } @@ -1334,10 +1362,10 @@ int ret; do { - if (size <= MAX_PARTIAL_LEN) + if (size <= ctx->trie->hdr.partial_len) block_len = size; else - block_len = MAX_PARTIAL_LEN; + block_len = ctx->trie->hdr.partial_len; block = data + size - block_len; ret = squat_trie_lookup_data(ctx->trie, block, block_len, @@ -1357,10 +1385,30 @@ seq_range_array_remove_invert_range(ctx->maybe_uids, &ctx->tmp_uids2); } - } while (--size >= MAX_PARTIAL_LEN); + } while (--size >= ctx->trie->hdr.partial_len); return 1; } +static void squat_trie_add_unknown(struct squat_trie *trie, + ARRAY_TYPE(seq_range) *maybe_uids) +{ + struct seq_range *range, new_range; + unsigned int count; + uint32_t last_uid; + + last_uid = I_MAX((trie->root.next_uid+1)/2, 1) - 1; + + range = array_get_modifiable(maybe_uids, &count); + if (count > 0 && range[count-1].seq2 == last_uid) { + /* increase the range */ + range[count-1].seq2 = (uint32_t)-1; + } else { + new_range.seq1 = last_uid + 1; + new_range.seq2 = (uint32_t)-1; + array_append(maybe_uids, &new_range, 1); + } +} + int squat_trie_lookup(struct squat_trie *trie, const char *str, enum squat_index_type type, ARRAY_TYPE(seq_range) *definite_uids, @@ -1407,11 +1455,12 @@ i - start); } t_pop(); - return ret < 0 ? -1 : - (array_count(maybe_uids) > 0 ? 1 : 0); + squat_trie_add_unknown(trie, maybe_uids); + return ret < 0 ? -1 : 0; } - if (MAX_FULL_LEN > MAX_PARTIAL_LEN || size <= MAX_PARTIAL_LEN) { + if (size <= trie->hdr.partial_len || + trie->hdr.full_len > trie->hdr.partial_len) { ret = squat_trie_lookup_data(trie, data, size, &ctx.tmp_uids); if (ret > 0) { squat_trie_filter_type(type, &ctx.tmp_uids, @@ -1421,7 +1470,7 @@ array_clear(definite_uids); } - if (size <= MAX_PARTIAL_LEN || MAX_PARTIAL_LEN == 0) { + if (size <= trie->hdr.partial_len || trie->hdr.partial_len == 0) { /* we have the result */ array_clear(maybe_uids); } else { @@ -1429,9 +1478,8 @@ i - start); } t_pop(); - return ret < 0 ? -1 : - (array_count(maybe_uids) > 0 || - array_count(definite_uids) > 0 ? 1 : 0); + squat_trie_add_unknown(trie, maybe_uids); + return ret < 0 ? -1 : 0; } struct squat_uidlist *squat_trie_get_uidlist(struct squat_trie *trie)
--- a/src/plugins/fts/fts-api.c Mon Dec 03 11:02:44 2007 +0200 +++ b/src/plugins/fts/fts-api.c Mon Dec 03 11:25:49 2007 +0200 @@ -144,16 +144,8 @@ ret = backend->v.lookup(backend, key, flags & ~FTS_LOOKUP_FLAG_INVERT, definite_uids, maybe_uids); - if (ret <= 0) { - if (unlikely(ret < 0)) - return -1; - i_assert(array_count(definite_uids) == 0 && - array_count(maybe_uids) == 0); - } else { - i_assert(array_count(definite_uids) > 0 || - array_count(maybe_uids) > 0); - } - + if (unlikely(ret < 0)) + return -1; if ((flags & FTS_LOOKUP_FLAG_INVERT) != 0) fts_lookup_invert(definite_uids, maybe_uids); return 0; @@ -165,38 +157,35 @@ const ARRAY_TYPE(seq_range) *src_maybe, const ARRAY_TYPE(seq_range) *src_definite) { - const struct seq_range *dest, *src; - unsigned int i, dest_count, src_count; - uint32_t seq, seq2; - bool removals; + ARRAY_TYPE(seq_range) src_unwanted; + const struct seq_range *range; + struct seq_range new_range; + unsigned int i, count; + uint32_t seq; /* add/leave to dest_maybe if at least one list has maybe, and no lists have none */ - dest = array_get(dest_maybe, &dest_count); - src = array_get(dest_maybe, &src_count); + + t_push(); + /* create unwanted sequences list from both sources */ + t_array_init(&src_unwanted, 128); + new_range.seq1 = 0; new_range.seq2 = (uint32_t)-1; + array_append(&src_unwanted, &new_range, 1); + seq_range_array_remove_seq_range(&src_unwanted, src_maybe); + seq_range_array_remove_seq_range(&src_unwanted, src_definite); /* drop unwanted uids */ - for (i = 0; i < dest_count; ) { - seq2 = dest[i].seq2; - removals = FALSE; - for (seq = dest[i].seq1; seq <= seq2; seq++) { - if (!seq_range_exists(src_definite, seq) && - !seq_range_exists(src_maybe, seq)) { - if (seq_range_array_remove(dest_maybe, seq)) - removals = TRUE; - } - } - if (!removals) - i++; - } + seq_range_array_remove_seq_range(dest_maybe, &src_unwanted); - /* add new uids */ - for (i = 0; i < src_count; i++) { - for (seq = src[i].seq1; seq <= src[i].seq2; seq++) { - if (seq_range_exists(dest_definite, seq)) + /* add uids that are in dest_definite and src_maybe lists */ + range = array_get(dest_definite, &count); + for (i = 0; i < count; i++) { + for (seq = range[i].seq1; seq <= range[i].seq2; seq++) { + if (seq_range_exists(src_maybe, seq)) seq_range_array_add(dest_maybe, 0, seq); } } + t_pop(); } int fts_backend_filter(struct fts_backend *backend, const char *key,
--- a/src/plugins/fts/fts-storage.c Mon Dec 03 11:02:44 2007 +0200 +++ b/src/plugins/fts/fts-storage.c Mon Dec 03 11:25:49 2007 +0200 @@ -67,13 +67,14 @@ return ret; } -static int fts_build_mail_flush_headers(struct fts_storage_build_context *ctx) +static int fts_build_mail_flush_headers(struct fts_storage_build_context *ctx, + bool root) { if (str_len(ctx->headers) == 0) return 0; if (fts_backend_build_more(ctx->build, ctx->uid, str_data(ctx->headers), - str_len(ctx->headers), TRUE) < 0) + str_len(ctx->headers), root) < 0) return -1; str_truncate(ctx->headers, 0); @@ -143,7 +144,8 @@ fts_build_mail_header(ctx, &block); else if (block.size == 0) { /* end of headers */ - ret = fts_build_mail_flush_headers(ctx); + bool root = raw_block.part->parent == NULL; + ret = fts_build_mail_flush_headers(ctx, root); if (ret < 0) break; } else { @@ -179,8 +181,6 @@ /* no new messages */ return 0; } - fctx->first_nonindexed_seq = seqset.seq1; - if (fctx->best_arg->type == SEARCH_HEADER) { /* we're not updating the index just for header lookups */ return 0; @@ -404,24 +404,6 @@ } } -static int -search_next_update_seq_finish(struct mail_search_context *ctx, - struct fts_search_context *fctx) -{ - struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box); - - if (fctx->first_nonindexed_seq == 0) { - /* everything was indexed. we're done */ - return 0; - } - if (ctx->seq < fctx->first_nonindexed_seq) { - /* scan the non-indexed messages */ - fctx->seqs_set = FALSE; - ctx->seq = fctx->first_nonindexed_seq - 1; - } - return fbox->module_ctx.super.search_next_update_seq(ctx); -} - static int fts_mailbox_search_next_update_seq(struct mail_search_context *ctx) { struct fts_mailbox *fbox = FTS_CONTEXT(ctx->transaction->box); @@ -451,8 +433,10 @@ /* use whichever is lower of definite/maybe */ if (fctx->definite_idx == def_count) { - if (fctx->maybe_idx == maybe_count) - return search_next_update_seq_finish(ctx, fctx); + if (fctx->maybe_idx == maybe_count) { + /* we're finished */ + return 0; + } use_maybe = TRUE; } else if (fctx->maybe_idx == maybe_count) { use_maybe = FALSE;
--- a/src/plugins/fts/fts-storage.h Mon Dec 03 11:02:44 2007 +0200 +++ b/src/plugins/fts/fts-storage.h Mon Dec 03 11:25:49 2007 +0200 @@ -20,7 +20,6 @@ ARRAY_TYPE(seq_range) definite_seqs, maybe_seqs; unsigned int definite_idx, maybe_idx; - uint32_t first_nonindexed_seq; struct fts_backend *build_backend; struct fts_storage_build_context *build_ctx;