Mercurial > dovecot > original-hg > dovecot-1.2
diff src/plugins/fts-squat/squat-trie.c @ 7210:f5f77a3ae203 HEAD
Initial code to support expunging from squat indexes, probably still buggy.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sun, 03 Feb 2008 22:44:09 +0200 |
parents | bb9304dee6d4 |
children | 43d2f70fb279 |
line wrap: on
line diff
--- a/src/plugins/fts-squat/squat-trie.c Sun Feb 03 22:42:46 2008 +0200 +++ b/src/plugins/fts-squat/squat-trie.c Sun Feb 03 22:44:09 2008 +0200 @@ -43,6 +43,7 @@ struct squat_trie_iterate_node { struct squat_node *node; + ARRAY_TYPE(seq_range) shifts; unsigned int idx; }; @@ -1013,8 +1014,16 @@ static int squat_trie_iterate_deinit(struct squat_trie_iterate_context *ctx) { + struct squat_trie_iterate_node *nodes; + unsigned int i, count; int ret = ctx->failed ? -1 : 0; + if (array_is_created(&ctx->cur.shifts)) { + nodes = array_get_modifiable(&ctx->parents, &count); + for (i = 0; i < count; i++) + array_free(&nodes[i].shifts); + array_free(&ctx->cur.shifts); + } array_free(&ctx->parents); i_free(ctx); return ret; @@ -1033,37 +1042,233 @@ } static struct squat_node * -squat_trie_iterate_next(struct squat_trie_iterate_context *ctx) +squat_trie_iterate_next(struct squat_trie_iterate_context *ctx, + ARRAY_TYPE(seq_range) *shifts_r) { struct squat_trie_iterate_node *iter_nodes; struct squat_node *children; - unsigned int count; + unsigned int count, shift_count = 0; - while (ctx->cur.idx == ctx->cur.node->child_count) { + while (ctx->cur.idx == ctx->cur.node->child_count || + ctx->cur.node->uid_list_idx == 0) + { iter_nodes = array_get_modifiable(&ctx->parents, &count); if (count == 0) return NULL; + + if (array_is_created(&ctx->cur.shifts)) + array_free(&ctx->cur.shifts); ctx->cur = iter_nodes[count-1]; array_delete(&ctx->parents, count-1, 1); } - ctx->cur.idx++; + *shifts_r = ctx->cur.shifts; + if (array_is_created(&ctx->cur.shifts)) { + shift_count = array_count(&ctx->cur.shifts); + i_assert(shift_count > 0); + } + + children = NODE_CHILDREN_NODES(ctx->cur.node); + while (children[ctx->cur.idx++].uid_list_idx == 0) { + if (ctx->cur.idx == ctx->cur.node->child_count) { + /* no more non-empty children in this node */ + return squat_trie_iterate_next(ctx, shifts_r); + } + } array_append(&ctx->parents, &ctx->cur, 1); - children = NODE_CHILDREN_NODES(ctx->cur.node); ctx->cur.node = &children[ctx->cur.idx-1]; ctx->cur.idx = 0; + if (shift_count != 0) + i_array_init(&ctx->cur.shifts, shift_count); return ctx->cur.node; } +static void +squat_uidlist_update_expunged_uids(const ARRAY_TYPE(seq_range) *shifts_arr, + ARRAY_TYPE(seq_range) *child_shifts, + ARRAY_TYPE(seq_range) *uids_arr, + bool do_shifts) +{ + const struct seq_range *shifts; + struct seq_range *uids, shift; + unsigned int i, uid_idx, uid_count, shift_count; + uint32_t child_shift_seq1, child_shift_count, seq_high; + unsigned int shift_sum = 0, child_sum = 0; + + /* self shift */ + uids = array_get_modifiable(uids_arr, &uid_count); + shifts = array_get(shifts_arr, &shift_count); + for (i = 0, uid_idx = 0, seq_high = 0;; ) { + /* skip UID ranges until we skip/overlap shifts */ + while (uid_idx < uid_count && + (i == shift_count || + I_MAX(shifts[i].seq1, seq_high) > uids[uid_idx].seq2)) + { + i_assert(uids[uid_idx].seq1 >= shift_sum); + uids[uid_idx].seq1 -= shift_sum; + uids[uid_idx].seq2 -= shift_sum; + child_sum += uids[uid_idx].seq2 - + uids[uid_idx].seq1 + 1; + + if (uid_idx > 0 && + uids[uid_idx-1].seq2 == uids[uid_idx].seq1 - 1) { + /* we can merge this and the previous range */ + uids[uid_idx-1].seq2 = uids[uid_idx].seq2; + array_delete(uids_arr, uid_idx, 1); + uids = array_get_modifiable(uids_arr, + &uid_count); + } else { + uid_idx++; + } + } + if (uid_idx == uid_count) + break; + + shift.seq1 = I_MAX(shifts[i].seq1, seq_high); + shift.seq2 = shifts[i].seq2; + if (shift.seq2 < uids[uid_idx].seq1) { + /* shift is entirely before UID range */ + shift_sum += shift.seq2 - shift.seq1 + 1; + i++; + } else { + /* handle shifts before UID range */ + if (shift.seq1 < uids[uid_idx].seq1) { + shift_sum += uids[uid_idx].seq1 - shift.seq1; + shift.seq1 = uids[uid_idx].seq1; + } + /* update child shifts */ + child_shift_seq1 = child_sum + + shift.seq1 - uids[uid_idx].seq1; + child_shift_count = + I_MIN(shift.seq2, uids[uid_idx].seq2) - + shift.seq1 + 1; + seq_range_array_add_range(child_shifts, + child_shift_seq1, + child_shift_seq1 + + child_shift_count - 1); + child_sum += child_shift_count; + + /* if the shifts continue after the UID range, + treat it in the next loop iteration */ + if (shift.seq2 <= uids[uid_idx].seq2) + i++; + else + seq_high = uids[uid_idx].seq2 + 1; + + /* update UIDs - no matter where within the UID range + the shifts happened, the result is the same: + shift number of UIDs are removed, and the rest + are decreased by shift_sum. + + 123 uids child_shifts + a) s -> 12 1 + b) s -> 12 2 + c) s -> 12 3 + */ + if (uids[uid_idx].seq1 + + child_shift_count > uids[uid_idx].seq2) { + /* removed completely */ + array_delete(uids_arr, uid_idx, 1); + uids = array_get_modifiable(uids_arr, + &uid_count); + } else { + /* the next loop iteration fixes the UIDs */ + uids[uid_idx].seq1 += child_shift_count; + } + shift_sum += child_shift_count; + } + if (!do_shifts) { + /* root node - UIDs are only removed, not shifted */ + shift_sum = 0; + } + } +} + +static int +squat_trie_expunge_uidlists(struct squat_trie_build_context *ctx, + struct squat_uidlist_rebuild_context *rebuild_ctx, + struct squat_trie_iterate_context *iter, + const ARRAY_TYPE(seq_range) *expunged_uids) +{ + struct squat_node *node; + ARRAY_TYPE(seq_range) uid_range, shifts; + bool shift = FALSE; + int ret = 0; + + node = squat_trie_iterate_first(iter); + if (node->uid_list_idx == 0) + return 0; + + i_array_init(&uid_range, 1024); + i_array_init(&shifts, array_count(expunged_uids)); + array_append_array(&shifts, expunged_uids); + + while (node != NULL) { + i_assert(node->uid_list_idx != 0); + array_clear(&uid_range); + if (squat_uidlist_get_seqrange(ctx->trie->uidlist, + node->uid_list_idx, + &uid_range) < 0) { + ret = -1; + break; + } + squat_uidlist_update_expunged_uids(&shifts, &iter->cur.shifts, + &uid_range, shift); + node->uid_list_idx = + squat_uidlist_rebuild_nextu(rebuild_ctx, &uid_range); + if (node->uid_list_idx == 0) { + node->child_count = 0; + node->next_uid = 0; + } + node = squat_trie_iterate_next(iter, &shifts); + i_assert(array_count(&shifts) > 0); + shift = TRUE; + } + array_free(&uid_range); + return ret; +} + +static int +squat_trie_renumber_uidlists2(struct squat_trie_build_context *ctx, + struct squat_uidlist_rebuild_context *rebuild_ctx, + struct squat_trie_iterate_context *iter) +{ + struct squat_node *node; + ARRAY_TYPE(seq_range) shifts; + ARRAY_TYPE(uint32_t) uids; + int ret = 0; + + node = squat_trie_iterate_first(iter); + if (node->uid_list_idx == 0) + return 0; + + i_array_init(&uids, 1024); + while (node != NULL) { + i_assert(node->uid_list_idx != 0); + if (!UIDLIST_IS_SINGLETON(node->uid_list_idx)) { + /* rebuild the uidlist */ + array_clear(&uids); + if (squat_uidlist_get(ctx->trie->uidlist, + node->uid_list_idx, &uids) < 0) { + ret = -1; + break; + } + node->uid_list_idx = + squat_uidlist_rebuild_next(rebuild_ctx, &uids); + } + node = squat_trie_iterate_next(iter, &shifts); + } + array_free(&uids); + return ret; +} + static int squat_trie_renumber_uidlists(struct squat_trie_build_context *ctx, + const ARRAY_TYPE(seq_range) *expunged_uids, bool compress) { struct squat_trie_iterate_context *iter; - struct squat_node *node; struct squat_uidlist_rebuild_context *rebuild_ctx; - ARRAY_TYPE(uint32_t) uids; - uint32_t new_uid_list_idx, max_count=0; time_t now; int ret = 0; @@ -1076,27 +1281,14 @@ ctx->trie->hdr.indexid = I_MAX((unsigned int)now, ctx->trie->hdr.indexid + 1); - i_array_init(&uids, 1024); iter = squat_trie_iterate_init(ctx->trie); - node = squat_trie_iterate_first(iter); - new_uid_list_idx = 0x100; - while (node != NULL) { - if (!UIDLIST_IS_SINGLETON(node->uid_list_idx)) { - array_clear(&uids); - if (squat_uidlist_get(ctx->trie->uidlist, - node->uid_list_idx, &uids) < 0) { - ret = -1; - break; - } - max_count = I_MAX(max_count, array_count(&uids)); - squat_uidlist_rebuild_next(rebuild_ctx, &uids); - node->uid_list_idx = new_uid_list_idx << 1; - new_uid_list_idx++; - } - node = squat_trie_iterate_next(iter); + if (expunged_uids != NULL) { + ret = squat_trie_expunge_uidlists(ctx, rebuild_ctx, iter, + expunged_uids); + } else { + ret = squat_trie_renumber_uidlists2(ctx, rebuild_ctx, iter); } squat_trie_iterate_deinit(iter); - array_free(&uids); /* lock the trie before we rename uidlist */ if (squat_trie_lock(ctx->trie, F_WRLCK, &ctx->file_lock) <= 0) @@ -1369,7 +1561,8 @@ return ret; } -int squat_trie_build_deinit(struct squat_trie_build_context **_ctx) +int squat_trie_build_deinit(struct squat_trie_build_context **_ctx, + const ARRAY_TYPE(seq_range) *expunged_uids) { struct squat_trie_build_context *ctx = *_ctx; bool compress; @@ -1383,7 +1576,7 @@ being renamed, so that while trie is read locked, uidlist can't change under. */ squat_uidlist_build_flush(ctx->uidlist_build_ctx); - ret = squat_trie_renumber_uidlists(ctx, compress); + ret = squat_trie_renumber_uidlists(ctx, expunged_uids, compress); if (ret == 0) ret = squat_trie_write(ctx);