Mercurial > dovecot > core-2.2
changeset 16535:3683d7bff095
doveadm: Added deduplicate command.
By default it deduplicates only by GUIDs. With -m parameter it deduplicates
by Message-Id: header.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Tue, 18 Jun 2013 17:05:20 +0300 |
parents | 5e51c5545029 |
children | 07642120b6ea |
files | src/doveadm/Makefile.am src/doveadm/doveadm-mail-deduplicate.c src/doveadm/doveadm-mail.c src/doveadm/doveadm-mail.h |
diffstat | 4 files changed, 206 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/doveadm/Makefile.am Tue Jun 18 15:14:42 2013 +0300 +++ b/src/doveadm/Makefile.am Tue Jun 18 17:05:20 2013 +0300 @@ -62,6 +62,7 @@ doveadm-mail.c \ doveadm-mail-altmove.c \ doveadm-mail-batch.c \ + doveadm-mail-deduplicate.c \ doveadm-mail-expunge.c \ doveadm-mail-fetch.c \ doveadm-mail-flags.c \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/doveadm/doveadm-mail-deduplicate.c Tue Jun 18 17:05:20 2013 +0300 @@ -0,0 +1,203 @@ +/* Copyright (c) 2013 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "hash.h" +#include "mail-storage.h" +#include "mail-search-build.h" +#include "doveadm-mailbox-list-iter.h" +#include "doveadm-mail-iter.h" +#include "doveadm-mail.h" + +struct uidlist { + struct uidlist *next; + uint32_t uid; +}; + +struct deduplicate_cmd_context { + struct doveadm_mail_cmd_context ctx; + bool by_msgid; +}; + +static int cmd_deduplicate_uidlist(struct mailbox *box, struct uidlist *uidlist) +{ + struct mailbox_transaction_context *trans; + struct mail_search_context *search_ctx; + struct mail_search_args *search_args; + struct mail_search_arg *arg; + struct mail *mail; + ARRAY_TYPE(seq_range) uids; + int ret = 0; + + /* the uidlist is reversed with oldest mails at the end. + we'll delete everything but the oldest mail. */ + if (uidlist->next == NULL) + return 0; + + t_array_init(&uids, 8); + for (; uidlist->next != NULL; uidlist = uidlist->next) + seq_range_array_add(&uids, uidlist->uid); + + search_args = mail_search_build_init(); + arg = mail_search_build_add(search_args, SEARCH_UIDSET); + arg->value.seqset = uids; + + trans = mailbox_transaction_begin(box, 0); + search_ctx = mailbox_search_init(trans, search_args, NULL, 0, NULL); + mail_search_args_unref(&search_args); + + while (mailbox_search_next(search_ctx, &mail)) + mail_expunge(mail); + if (mailbox_search_deinit(&search_ctx) < 0) + ret = -1; + if (mailbox_transaction_commit(&trans) < 0) + ret = -1; + return ret; +} + +static int +cmd_deduplicate_box(struct doveadm_mail_cmd_context *_ctx, + const struct mailbox_info *info, + struct mail_search_args *search_args) +{ + struct deduplicate_cmd_context *ctx = + (struct deduplicate_cmd_context *)_ctx; + struct doveadm_mail_iter *iter; + struct mailbox *box; + struct mail *mail; + enum mail_error error; + pool_t pool; + HASH_TABLE(const char *, struct uidlist *) hash; + const char *key, *errstr; + struct uidlist *value; + int ret = 0; + + if (doveadm_mail_iter_init(_ctx, info, search_args, 0, NULL, + &iter) < 0) + return -1; + + pool = pool_alloconly_create("deduplicate", 10240); + hash_table_create(&hash, pool, 0, str_hash, strcmp); + while (doveadm_mail_iter_next(iter, &mail)) { + if (ctx->by_msgid) { + if (mail_get_first_header(mail, "Message-ID", &key) < 0) { + errstr = mailbox_get_last_error(box, &error); + if (error == MAIL_ERROR_NOTFOUND) + continue; + i_error("Couldn't lookup Message-ID: for UID=%u: %s", + mail->uid, errstr); + ret = -1; + break; + } + } else { + if (mail_get_special(mail, MAIL_FETCH_GUID, &key) < 0) { + errstr = mailbox_get_last_error(box, &error); + if (error == MAIL_ERROR_NOTFOUND) + continue; + i_error("Couldn't lookup GUID: for UID=%u: %s", + mail->uid, errstr); + ret = -1; + break; + } + } + if (key != NULL && *key != '\0') { + value = p_new(pool, struct uidlist, 1); + value->uid = mail->uid; + value->next = hash_table_lookup(hash, key); + + if (value->next == NULL) { + key = p_strdup(pool, key); + hash_table_insert(hash, key, value); + } else { + hash_table_update(hash, key, value); + } + } + } + + if (doveadm_mail_iter_deinit_keep_box(&iter, &box) < 0) + ret = -1; + + if (ret == 0) { + struct hash_iterate_context *iter; + + iter = hash_table_iterate_init(hash); + while (hash_table_iterate(iter, hash, &key, &value)) { + T_BEGIN { + if (cmd_deduplicate_uidlist(box, value) < 0) + ret = -1; + } T_END; + } + hash_table_iterate_deinit(&iter); + } + + hash_table_destroy(&hash); + pool_unref(&pool); + + if (mailbox_sync(box, 0) < 0) { + doveadm_mail_failed_mailbox(_ctx, box); + ret = -1; + } + mailbox_free(&box); + return ret; +} + +static int +cmd_deduplicate_run(struct doveadm_mail_cmd_context *ctx, struct mail_user *user) +{ + const enum mailbox_list_iter_flags iter_flags = + MAILBOX_LIST_ITER_NO_AUTO_BOXES | + MAILBOX_LIST_ITER_RETURN_NO_FLAGS; + struct doveadm_mailbox_list_iter *iter; + const struct mailbox_info *info; + int ret = 0; + + iter = doveadm_mailbox_list_iter_init(ctx, user, ctx->search_args, + iter_flags); + while ((info = doveadm_mailbox_list_iter_next(iter)) != NULL) T_BEGIN { + if (cmd_deduplicate_box(ctx, info, ctx->search_args) < 0) + ret = -1; + } T_END; + if (doveadm_mailbox_list_iter_deinit(&iter) < 0) + ret = -1; + return ret; +} + +static void cmd_deduplicate_init(struct doveadm_mail_cmd_context *ctx, + const char *const args[]) +{ + if (args[0] == NULL) + doveadm_mail_help_name("deduplicate"); + + ctx->search_args = doveadm_mail_build_search_args(args); +} + +static bool +cmd_deduplicate_parse_arg(struct doveadm_mail_cmd_context *_ctx, int c) +{ + struct deduplicate_cmd_context *ctx = + (struct deduplicate_cmd_context *)_ctx; + + switch (c) { + case 'm': + ctx->by_msgid = TRUE; + break; + default: + return FALSE; + } + return TRUE; +} + +static struct doveadm_mail_cmd_context *cmd_deduplicate_alloc(void) +{ + struct deduplicate_cmd_context *ctx; + + ctx = doveadm_mail_cmd_alloc(struct deduplicate_cmd_context); + ctx->ctx.getopt_args = "m"; + ctx->ctx.v.parse_arg = cmd_deduplicate_parse_arg; + ctx->ctx.v.init = cmd_deduplicate_init; + ctx->ctx.v.run = cmd_deduplicate_run; + return &ctx->ctx; +} + +struct doveadm_mail_cmd cmd_deduplicate = { + cmd_deduplicate_alloc, "deduplicate", "[-m] <search query>" +};
--- a/src/doveadm/doveadm-mail.c Tue Jun 18 15:14:42 2013 +0300 +++ b/src/doveadm/doveadm-mail.c Tue Jun 18 17:05:20 2013 +0300 @@ -699,6 +699,7 @@ &cmd_index, &cmd_altmove, &cmd_copy, + &cmd_deduplicate, &cmd_move, &cmd_mailbox_list, &cmd_mailbox_create,
--- a/src/doveadm/doveadm-mail.h Tue Jun 18 15:14:42 2013 +0300 +++ b/src/doveadm/doveadm-mail.h Tue Jun 18 17:05:20 2013 +0300 @@ -145,6 +145,7 @@ extern struct doveadm_mail_cmd cmd_index; extern struct doveadm_mail_cmd cmd_altmove; extern struct doveadm_mail_cmd cmd_copy; +extern struct doveadm_mail_cmd cmd_deduplicate; extern struct doveadm_mail_cmd cmd_move; extern struct doveadm_mail_cmd cmd_mailbox_list; extern struct doveadm_mail_cmd cmd_mailbox_create;