Mercurial > dovecot > original-hg > dovecot-1.2
changeset 7210:f5f77a3ae203 HEAD
Initial code to support expunging from squat indexes, probably still buggy.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sun, 03 Feb 2008 22:44:09 +0200 |
parents | d7d885b6dd46 |
children | 83fb5f84a270 |
files | src/plugins/fts-squat/fts-backend-squat.c src/plugins/fts-squat/squat-test.c src/plugins/fts-squat/squat-trie.c src/plugins/fts-squat/squat-trie.h src/plugins/fts-squat/squat-uidlist.c src/plugins/fts-squat/squat-uidlist.h |
diffstat | 6 files changed, 311 insertions(+), 36 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts-squat/fts-backend-squat.c Sun Feb 03 22:42:46 2008 +0200 +++ b/src/plugins/fts-squat/fts-backend-squat.c Sun Feb 03 22:44:09 2008 +0200 @@ -102,14 +102,45 @@ data, size); } +static int get_all_msg_uids(struct mailbox *box, ARRAY_TYPE(seq_range) *uids) +{ + struct mailbox_transaction_context *t; + struct mail_search_context *search_ctx; + struct mail_search_arg search_arg; + struct mail *mail; + int ret = 0; + + t = mailbox_transaction_begin(box, 0); + memset(&search_arg, 0, sizeof(search_arg)); + search_arg.type = SEARCH_ALL; + + mail = mail_alloc(t, 0, NULL); + search_ctx = mailbox_search_init(t, NULL, &search_arg, NULL); + while ((ret = mailbox_search_next(search_ctx, mail)) > 0) + seq_range_array_add(uids, 0, mail->uid); + if (mailbox_search_deinit(&search_ctx) < 0) + ret = -1; + mail_free(&mail); + (void)mailbox_transaction_commit(&t); + return ret; +} + static int fts_backend_squat_build_deinit(struct fts_backend_build_context *_ctx) { struct squat_fts_backend_build_context *ctx = (struct squat_fts_backend_build_context *)_ctx; + ARRAY_TYPE(seq_range) uids; int ret; - ret = squat_trie_build_deinit(&ctx->build_ctx); + i_array_init(&uids, 1024); + if (get_all_msg_uids(ctx->ctx.backend->box, &uids) < 0) + ret = squat_trie_build_deinit(&ctx->build_ctx, NULL); + else { + seq_range_array_invert(&uids, 1, (uint32_t)-2); + ret = squat_trie_build_deinit(&ctx->build_ctx, &uids); + } + array_free(&uids); i_free(ctx); return ret; }
--- a/src/plugins/fts-squat/squat-test.c Sun Feb 03 22:42:46 2008 +0200 +++ b/src/plugins/fts-squat/squat-test.c Sun Feb 03 22:44:09 2008 +0200 @@ -129,7 +129,7 @@ } } buffer_free(&valid); - if (squat_trie_build_deinit(&build_ctx) < 0) + if (squat_trie_build_deinit(&build_ctx, NULL) < 0) ret = -1; if (ret < 0) { printf("build broken\n");
--- a/src/plugins/fts-squat/squat-trie.c Sun Feb 03 22:42:46 2008 +0200 +++ b/src/plugins/fts-squat/squat-trie.c Sun Feb 03 22:44:09 2008 +0200 @@ -43,6 +43,7 @@ struct squat_trie_iterate_node { struct squat_node *node; + ARRAY_TYPE(seq_range) shifts; unsigned int idx; }; @@ -1013,8 +1014,16 @@ static int squat_trie_iterate_deinit(struct squat_trie_iterate_context *ctx) { + struct squat_trie_iterate_node *nodes; + unsigned int i, count; int ret = ctx->failed ? -1 : 0; + if (array_is_created(&ctx->cur.shifts)) { + nodes = array_get_modifiable(&ctx->parents, &count); + for (i = 0; i < count; i++) + array_free(&nodes[i].shifts); + array_free(&ctx->cur.shifts); + } array_free(&ctx->parents); i_free(ctx); return ret; @@ -1033,37 +1042,233 @@ } static struct squat_node * -squat_trie_iterate_next(struct squat_trie_iterate_context *ctx) +squat_trie_iterate_next(struct squat_trie_iterate_context *ctx, + ARRAY_TYPE(seq_range) *shifts_r) { struct squat_trie_iterate_node *iter_nodes; struct squat_node *children; - unsigned int count; + unsigned int count, shift_count = 0; - while (ctx->cur.idx == ctx->cur.node->child_count) { + while (ctx->cur.idx == ctx->cur.node->child_count || + ctx->cur.node->uid_list_idx == 0) + { iter_nodes = array_get_modifiable(&ctx->parents, &count); if (count == 0) return NULL; + + if (array_is_created(&ctx->cur.shifts)) + array_free(&ctx->cur.shifts); ctx->cur = iter_nodes[count-1]; array_delete(&ctx->parents, count-1, 1); } - ctx->cur.idx++; + *shifts_r = ctx->cur.shifts; + if (array_is_created(&ctx->cur.shifts)) { + shift_count = array_count(&ctx->cur.shifts); + i_assert(shift_count > 0); + } + + children = NODE_CHILDREN_NODES(ctx->cur.node); + while (children[ctx->cur.idx++].uid_list_idx == 0) { + if (ctx->cur.idx == ctx->cur.node->child_count) { + /* no more non-empty children in this node */ + return squat_trie_iterate_next(ctx, shifts_r); + } + } array_append(&ctx->parents, &ctx->cur, 1); - children = NODE_CHILDREN_NODES(ctx->cur.node); ctx->cur.node = &children[ctx->cur.idx-1]; ctx->cur.idx = 0; + if (shift_count != 0) + i_array_init(&ctx->cur.shifts, shift_count); return ctx->cur.node; } +static void +squat_uidlist_update_expunged_uids(const ARRAY_TYPE(seq_range) *shifts_arr, + ARRAY_TYPE(seq_range) *child_shifts, + ARRAY_TYPE(seq_range) *uids_arr, + bool do_shifts) +{ + const struct seq_range *shifts; + struct seq_range *uids, shift; + unsigned int i, uid_idx, uid_count, shift_count; + uint32_t child_shift_seq1, child_shift_count, seq_high; + unsigned int shift_sum = 0, child_sum = 0; + + /* self shift */ + uids = array_get_modifiable(uids_arr, &uid_count); + shifts = array_get(shifts_arr, &shift_count); + for (i = 0, uid_idx = 0, seq_high = 0;; ) { + /* skip UID ranges until we skip/overlap shifts */ + while (uid_idx < uid_count && + (i == shift_count || + I_MAX(shifts[i].seq1, seq_high) > uids[uid_idx].seq2)) + { + i_assert(uids[uid_idx].seq1 >= shift_sum); + uids[uid_idx].seq1 -= shift_sum; + uids[uid_idx].seq2 -= shift_sum; + child_sum += uids[uid_idx].seq2 - + uids[uid_idx].seq1 + 1; + + if (uid_idx > 0 && + uids[uid_idx-1].seq2 == uids[uid_idx].seq1 - 1) { + /* we can merge this and the previous range */ + uids[uid_idx-1].seq2 = uids[uid_idx].seq2; + array_delete(uids_arr, uid_idx, 1); + uids = array_get_modifiable(uids_arr, + &uid_count); + } else { + uid_idx++; + } + } + if (uid_idx == uid_count) + break; + + shift.seq1 = I_MAX(shifts[i].seq1, seq_high); + shift.seq2 = shifts[i].seq2; + if (shift.seq2 < uids[uid_idx].seq1) { + /* shift is entirely before UID range */ + shift_sum += shift.seq2 - shift.seq1 + 1; + i++; + } else { + /* handle shifts before UID range */ + if (shift.seq1 < uids[uid_idx].seq1) { + shift_sum += uids[uid_idx].seq1 - shift.seq1; + shift.seq1 = uids[uid_idx].seq1; + } + /* update child shifts */ + child_shift_seq1 = child_sum + + shift.seq1 - uids[uid_idx].seq1; + child_shift_count = + I_MIN(shift.seq2, uids[uid_idx].seq2) - + shift.seq1 + 1; + seq_range_array_add_range(child_shifts, + child_shift_seq1, + child_shift_seq1 + + child_shift_count - 1); + child_sum += child_shift_count; + + /* if the shifts continue after the UID range, + treat it in the next loop iteration */ + if (shift.seq2 <= uids[uid_idx].seq2) + i++; + else + seq_high = uids[uid_idx].seq2 + 1; + + /* update UIDs - no matter where within the UID range + the shifts happened, the result is the same: + shift number of UIDs are removed, and the rest + are decreased by shift_sum. + + 123 uids child_shifts + a) s -> 12 1 + b) s -> 12 2 + c) s -> 12 3 + */ + if (uids[uid_idx].seq1 + + child_shift_count > uids[uid_idx].seq2) { + /* removed completely */ + array_delete(uids_arr, uid_idx, 1); + uids = array_get_modifiable(uids_arr, + &uid_count); + } else { + /* the next loop iteration fixes the UIDs */ + uids[uid_idx].seq1 += child_shift_count; + } + shift_sum += child_shift_count; + } + if (!do_shifts) { + /* root node - UIDs are only removed, not shifted */ + shift_sum = 0; + } + } +} + +static int +squat_trie_expunge_uidlists(struct squat_trie_build_context *ctx, + struct squat_uidlist_rebuild_context *rebuild_ctx, + struct squat_trie_iterate_context *iter, + const ARRAY_TYPE(seq_range) *expunged_uids) +{ + struct squat_node *node; + ARRAY_TYPE(seq_range) uid_range, shifts; + bool shift = FALSE; + int ret = 0; + + node = squat_trie_iterate_first(iter); + if (node->uid_list_idx == 0) + return 0; + + i_array_init(&uid_range, 1024); + i_array_init(&shifts, array_count(expunged_uids)); + array_append_array(&shifts, expunged_uids); + + while (node != NULL) { + i_assert(node->uid_list_idx != 0); + array_clear(&uid_range); + if (squat_uidlist_get_seqrange(ctx->trie->uidlist, + node->uid_list_idx, + &uid_range) < 0) { + ret = -1; + break; + } + squat_uidlist_update_expunged_uids(&shifts, &iter->cur.shifts, + &uid_range, shift); + node->uid_list_idx = + squat_uidlist_rebuild_nextu(rebuild_ctx, &uid_range); + if (node->uid_list_idx == 0) { + node->child_count = 0; + node->next_uid = 0; + } + node = squat_trie_iterate_next(iter, &shifts); + i_assert(array_count(&shifts) > 0); + shift = TRUE; + } + array_free(&uid_range); + return ret; +} + +static int +squat_trie_renumber_uidlists2(struct squat_trie_build_context *ctx, + struct squat_uidlist_rebuild_context *rebuild_ctx, + struct squat_trie_iterate_context *iter) +{ + struct squat_node *node; + ARRAY_TYPE(seq_range) shifts; + ARRAY_TYPE(uint32_t) uids; + int ret = 0; + + node = squat_trie_iterate_first(iter); + if (node->uid_list_idx == 0) + return 0; + + i_array_init(&uids, 1024); + while (node != NULL) { + i_assert(node->uid_list_idx != 0); + if (!UIDLIST_IS_SINGLETON(node->uid_list_idx)) { + /* rebuild the uidlist */ + array_clear(&uids); + if (squat_uidlist_get(ctx->trie->uidlist, + node->uid_list_idx, &uids) < 0) { + ret = -1; + break; + } + node->uid_list_idx = + squat_uidlist_rebuild_next(rebuild_ctx, &uids); + } + node = squat_trie_iterate_next(iter, &shifts); + } + array_free(&uids); + return ret; +} + static int squat_trie_renumber_uidlists(struct squat_trie_build_context *ctx, + const ARRAY_TYPE(seq_range) *expunged_uids, bool compress) { struct squat_trie_iterate_context *iter; - struct squat_node *node; struct squat_uidlist_rebuild_context *rebuild_ctx; - ARRAY_TYPE(uint32_t) uids; - uint32_t new_uid_list_idx, max_count=0; time_t now; int ret = 0; @@ -1076,27 +1281,14 @@ ctx->trie->hdr.indexid = I_MAX((unsigned int)now, ctx->trie->hdr.indexid + 1); - i_array_init(&uids, 1024); iter = squat_trie_iterate_init(ctx->trie); - node = squat_trie_iterate_first(iter); - new_uid_list_idx = 0x100; - while (node != NULL) { - if (!UIDLIST_IS_SINGLETON(node->uid_list_idx)) { - array_clear(&uids); - if (squat_uidlist_get(ctx->trie->uidlist, - node->uid_list_idx, &uids) < 0) { - ret = -1; - break; - } - max_count = I_MAX(max_count, array_count(&uids)); - squat_uidlist_rebuild_next(rebuild_ctx, &uids); - node->uid_list_idx = new_uid_list_idx << 1; - new_uid_list_idx++; - } - node = squat_trie_iterate_next(iter); + if (expunged_uids != NULL) { + ret = squat_trie_expunge_uidlists(ctx, rebuild_ctx, iter, + expunged_uids); + } else { + ret = squat_trie_renumber_uidlists2(ctx, rebuild_ctx, iter); } squat_trie_iterate_deinit(iter); - array_free(&uids); /* lock the trie before we rename uidlist */ if (squat_trie_lock(ctx->trie, F_WRLCK, &ctx->file_lock) <= 0) @@ -1369,7 +1561,8 @@ return ret; } -int squat_trie_build_deinit(struct squat_trie_build_context **_ctx) +int squat_trie_build_deinit(struct squat_trie_build_context **_ctx, + const ARRAY_TYPE(seq_range) *expunged_uids) { struct squat_trie_build_context *ctx = *_ctx; bool compress; @@ -1383,7 +1576,7 @@ being renamed, so that while trie is read locked, uidlist can't change under. */ squat_uidlist_build_flush(ctx->uidlist_build_ctx); - ret = squat_trie_renumber_uidlists(ctx, compress); + ret = squat_trie_renumber_uidlists(ctx, expunged_uids, compress); if (ret == 0) ret = squat_trie_write(ctx);
--- a/src/plugins/fts-squat/squat-trie.h Sun Feb 03 22:42:46 2008 +0200 +++ b/src/plugins/fts-squat/squat-trie.h Sun Feb 03 22:44:09 2008 +0200 @@ -20,11 +20,14 @@ int squat_trie_build_init(struct squat_trie *trie, uint32_t *last_uid_r, struct squat_trie_build_context **ctx_r); -/* headers must be added before bodies */ +/* bodies must be added before headers */ int squat_trie_build_more(struct squat_trie_build_context *ctx, uint32_t uid, enum squat_index_type type, const unsigned char *data, unsigned int size); -int squat_trie_build_deinit(struct squat_trie_build_context **ctx); +/* if expunged_uids is non-NULL, they may be removed from the index if they + still exist. */ +int squat_trie_build_deinit(struct squat_trie_build_context **ctx, + const ARRAY_TYPE(seq_range) *expunged_uids); int squat_trie_get_last_uid(struct squat_trie *trie, uint32_t *last_uid_r); /* type specifies if we're looking at header, body or both */
--- a/src/plugins/fts-squat/squat-uidlist.c Sun Feb 03 22:42:46 2008 +0200 +++ b/src/plugins/fts-squat/squat-uidlist.c Sun Feb 03 22:44:09 2008 +0200 @@ -82,6 +82,7 @@ uoff_t cur_block_start_offset; uint32_t list_sizes[UIDLIST_BLOCK_LIST_COUNT]; + uint32_t next_uid_list_idx; unsigned int list_idx; unsigned int new_count; }; @@ -845,6 +846,7 @@ ctx->build_ctx = build_ctx; ctx->fd = fd; ctx->output = o_stream_create_fd(ctx->fd, 0, FALSE); + ctx->next_uid_list_idx = 0x100; o_stream_cork(ctx->output); memset(&hdr, 0, sizeof(hdr)); @@ -888,8 +890,8 @@ ctx->cur_block_start_offset = ctx->output->offset; } -void squat_uidlist_rebuild_next(struct squat_uidlist_rebuild_context *ctx, - const ARRAY_TYPE(uint32_t) *uids) +uint32_t squat_uidlist_rebuild_next(struct squat_uidlist_rebuild_context *ctx, + const ARRAY_TYPE(uint32_t) *uids) { int ret; @@ -905,6 +907,50 @@ uidlist_rebuild_flush_block(ctx); ctx->list_idx = 0; } + return ctx->next_uid_list_idx++ << 1; +} + +uint32_t squat_uidlist_rebuild_nextu(struct squat_uidlist_rebuild_context *ctx, + const ARRAY_TYPE(seq_range) *uids) +{ + const struct seq_range *range; + ARRAY_TYPE(uint32_t) tmp_uids; + uint32_t seq, uid1, ret; + unsigned int i, count; + + range = array_get(uids, &count); + if (count == 0) + return 0; + + if (count == 1 && range[0].seq1 == range[0].seq2) { + /* single UID */ + return (range[0].seq1 << 1) | 1; + } + if (range[count-1].seq2 < 8) { + /* we can use a singleton bitmask */ + ret = 0; + for (i = 0; i < count; i++) { + for (seq = range[i].seq1; seq <= range[i].seq2; seq++) + ret |= 1 << (seq+1); + } + return ret; + } + + /* convert seq range to our internal representation and use the + normal _rebuild_next() to write it */ + i_array_init(&tmp_uids, 128); + for (i = 0; i < count; i++) { + if (range[i].seq1 == range[i].seq2) + array_append(&tmp_uids, &range[i].seq1, 1); + else { + uid1 = range[i].seq1 | UID_LIST_MASK_RANGE; + array_append(&tmp_uids, &uid1, 1); + array_append(&tmp_uids, &range[i].seq2, 1); + } + } + ret = squat_uidlist_rebuild_next(ctx, &tmp_uids); + array_free(&tmp_uids); + return ret; } int squat_uidlist_rebuild_finish(struct squat_uidlist_rebuild_context *ctx,
--- a/src/plugins/fts-squat/squat-uidlist.h Sun Feb 03 22:42:46 2008 +0200 +++ b/src/plugins/fts-squat/squat-uidlist.h Sun Feb 03 22:44:09 2008 +0200 @@ -47,8 +47,10 @@ int squat_uidlist_rebuild_init(struct squat_uidlist_build_context *build_ctx, bool compress, struct squat_uidlist_rebuild_context **ctx_r); -void squat_uidlist_rebuild_next(struct squat_uidlist_rebuild_context *ctx, - const ARRAY_TYPE(uint32_t) *uids); +uint32_t squat_uidlist_rebuild_next(struct squat_uidlist_rebuild_context *ctx, + const ARRAY_TYPE(uint32_t) *uids); +uint32_t squat_uidlist_rebuild_nextu(struct squat_uidlist_rebuild_context *ctx, + const ARRAY_TYPE(seq_range) *uids); int squat_uidlist_rebuild_finish(struct squat_uidlist_rebuild_context *ctx, bool cancel);