view src/doveadm/doveadm-mail-deduplicate.c @ 19552:0f22db71df7a

global: freshen copyright git ls-files | xargs perl -p -i -e 's/(\d+)-201[0-5]/$1-2016/g;s/ (201[0-5]) Dovecot/ $1-2016 Dovecot/'
author Timo Sirainen <timo.sirainen@dovecot.fi>
date Wed, 13 Jan 2016 12:24:03 +0200
parents 072170010eac
children d7b6a783eee2
line wrap: on
line source

/* Copyright (c) 2013-2016 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "hash.h"
#include "mail-storage.h"
#include "mail-search-build.h"
#include "doveadm-mailbox-list-iter.h"
#include "doveadm-mail-iter.h"
#include "doveadm-mail.h"

struct uidlist {
	struct uidlist *next;
	uint32_t uid;
};

struct deduplicate_cmd_context {
	struct doveadm_mail_cmd_context ctx;
	bool by_msgid;
};

static int cmd_deduplicate_uidlist(struct doveadm_mail_cmd_context *_ctx,
				   struct mailbox *box, struct uidlist *uidlist)
{
	struct mailbox_transaction_context *trans;
	struct mail_search_context *search_ctx;
	struct mail_search_args *search_args;
	struct mail_search_arg *arg;
	struct mail *mail;
	ARRAY_TYPE(seq_range) uids;
	int ret = 0;

	/* the uidlist is reversed with oldest mails at the end.
	   we'll delete everything but the oldest mail. */
	if (uidlist->next == NULL)
		return 0;

	t_array_init(&uids, 8);
	for (; uidlist->next != NULL; uidlist = uidlist->next)
		seq_range_array_add(&uids, uidlist->uid);

	search_args = mail_search_build_init();
	arg = mail_search_build_add(search_args, SEARCH_UIDSET);
	arg->value.seqset = uids;

	trans = mailbox_transaction_begin(box, 0);
	search_ctx = mailbox_search_init(trans, search_args, NULL, 0, NULL);
	mail_search_args_unref(&search_args);

	while (mailbox_search_next(search_ctx, &mail))
		mail_expunge(mail);
	if (mailbox_search_deinit(&search_ctx) < 0) {
		i_error("Searching mailbox '%s' failed: %s",
			mailbox_get_vname(box),
			mailbox_get_last_error(box, NULL));
		doveadm_mail_failed_mailbox(_ctx, box);
		ret = -1;
	}
	if (mailbox_transaction_commit(&trans) < 0) {
		i_error("Committing mailbox '%s' transaction failed: %s",
			mailbox_get_vname(box),
			mailbox_get_last_error(box, NULL));
		doveadm_mail_failed_mailbox(_ctx, box);
		ret = -1;
	}
	return ret;
}

static int
cmd_deduplicate_box(struct doveadm_mail_cmd_context *_ctx,
		    const struct mailbox_info *info,
		    struct mail_search_args *search_args)
{
	struct deduplicate_cmd_context *ctx =
		(struct deduplicate_cmd_context *)_ctx;
	struct doveadm_mail_iter *iter;
	struct mailbox *box;
	struct mail *mail;
	enum mail_error error;
	pool_t pool;
	HASH_TABLE(const char *, struct uidlist *) hash;
	const char *key, *errstr;
	struct uidlist *value;
	int ret = 0;

	if (doveadm_mail_iter_init(_ctx, info, search_args, 0, NULL,
				   &iter) < 0)
		return -1;

	pool = pool_alloconly_create("deduplicate", 10240);
	hash_table_create(&hash, pool, 0, str_hash, strcmp);
	while (doveadm_mail_iter_next(iter, &mail)) {
		if (ctx->by_msgid) {
			if (mail_get_first_header(mail, "Message-ID", &key) < 0) {
				errstr = mailbox_get_last_error(mail->box, &error);
				if (error == MAIL_ERROR_NOTFOUND)
					continue;
				i_error("Couldn't lookup Message-ID: for UID=%u: %s",
					mail->uid, errstr);
				doveadm_mail_failed_error(_ctx, error);
				ret = -1;
				break;
			}
		} else {
			if (mail_get_special(mail, MAIL_FETCH_GUID, &key) < 0) {
				errstr = mailbox_get_last_error(mail->box, &error);
				if (error == MAIL_ERROR_NOTFOUND)
					continue;
				i_error("Couldn't lookup GUID: for UID=%u: %s",
					mail->uid, errstr);
				doveadm_mail_failed_error(_ctx, error);
				ret = -1;
				break;
			}
		}
		if (key != NULL && *key != '\0') {
			value = p_new(pool, struct uidlist, 1);
			value->uid = mail->uid;
			value->next = hash_table_lookup(hash, key);

			if (value->next == NULL) {
				key = p_strdup(pool, key);
				hash_table_insert(hash, key, value);
			} else {
				hash_table_update(hash, key, value);
			}
		}
	}

	if (doveadm_mail_iter_deinit_keep_box(&iter, &box) < 0)
		ret = -1;

	if (ret == 0) {
		struct hash_iterate_context *iter;

		iter = hash_table_iterate_init(hash);
		while (hash_table_iterate(iter, hash, &key, &value)) {
			T_BEGIN {
				if (cmd_deduplicate_uidlist(_ctx, box, value) < 0)
					ret = -1;
			} T_END;
		}
		hash_table_iterate_deinit(&iter);
	}

	hash_table_destroy(&hash);
	pool_unref(&pool);

	if (mailbox_sync(box, 0) < 0) {
		i_error("Syncing mailbox '%s' failed: %s",
			mailbox_get_vname(box),
			mailbox_get_last_error(box, NULL));
		doveadm_mail_failed_mailbox(_ctx, box);
		ret = -1;
	}
	mailbox_free(&box);
	return ret;
}

static int
cmd_deduplicate_run(struct doveadm_mail_cmd_context *ctx, struct mail_user *user)
{
	const enum mailbox_list_iter_flags iter_flags =
		MAILBOX_LIST_ITER_NO_AUTO_BOXES |
		MAILBOX_LIST_ITER_RETURN_NO_FLAGS;
	struct doveadm_mailbox_list_iter *iter;
	const struct mailbox_info *info;
	int ret = 0;

	iter = doveadm_mailbox_list_iter_init(ctx, user, ctx->search_args,
					      iter_flags);
	while ((info = doveadm_mailbox_list_iter_next(iter)) != NULL) T_BEGIN {
		if (cmd_deduplicate_box(ctx, info, ctx->search_args) < 0)
			ret = -1;
	} T_END;
	if (doveadm_mailbox_list_iter_deinit(&iter) < 0)
		ret = -1;
	return ret;
}

static void cmd_deduplicate_init(struct doveadm_mail_cmd_context *ctx,
				 const char *const args[])
{
	if (args[0] == NULL)
		doveadm_mail_help_name("deduplicate");

	ctx->search_args = doveadm_mail_build_search_args(args);
}

static bool
cmd_deduplicate_parse_arg(struct doveadm_mail_cmd_context *_ctx, int c)
{
	struct deduplicate_cmd_context *ctx =
		(struct deduplicate_cmd_context *)_ctx;

	switch (c) {
	case 'm':
		ctx->by_msgid = TRUE;
		break;
	default:
		return FALSE;
	}
	return TRUE;
}

static struct doveadm_mail_cmd_context *cmd_deduplicate_alloc(void)
{
	struct deduplicate_cmd_context *ctx;

	ctx = doveadm_mail_cmd_alloc(struct deduplicate_cmd_context);
	ctx->ctx.getopt_args = "m";
	ctx->ctx.v.parse_arg = cmd_deduplicate_parse_arg;
	ctx->ctx.v.init = cmd_deduplicate_init;
	ctx->ctx.v.run = cmd_deduplicate_run;
	return &ctx->ctx;
}

struct doveadm_mail_cmd cmd_deduplicate = {
	cmd_deduplicate_alloc, "deduplicate", "[-m] <search query>"
};