view src/plugins/fts/fts-api.c @ 20272:3d60b96543fd

virtual: Moved virtual_mailbox_vfuncs to lib-storage. This allows implementing other virtual storage backends.
author Timo Sirainen <timo.sirainen@dovecot.fi>
date Mon, 30 May 2016 19:07:16 +0300
parents 526084651206
children 105e82862423
line wrap: on
line source

/* Copyright (c) 2006-2016 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "array.h"
#include "hex-binary.h"
#include "mail-index.h"
#include "mail-namespace.h"
#include "mail-storage-private.h"
#include "mailbox-list-iter.h"
#include "mail-search.h"
#include "fts-api-private.h"

static ARRAY(const struct fts_backend *) backends;

void fts_backend_register(const struct fts_backend *backend)
{
	if (!array_is_created(&backends))
		i_array_init(&backends, 4);
	array_append(&backends, &backend, 1);
}

void fts_backend_unregister(const char *name)
{
	const struct fts_backend *const *be;
	unsigned int i, count;

	be = array_get(&backends, &count);
	for (i = 0; i < count; i++) {
		if (strcmp(be[i]->name, name) == 0) {
			array_delete(&backends, i, 1);
			break;
		}
	}
	if (i == count)
		i_panic("fts_backend_unregister(%s): unknown backend", name);

	if (count == 1)
		array_free(&backends);
}

static const struct fts_backend *
fts_backend_class_lookup(const char *backend_name)
{
	const struct fts_backend *const *be;
	unsigned int i, count;

	if (array_is_created(&backends)) {
		be = array_get(&backends, &count);
		for (i = 0; i < count; i++) {
			if (strcmp(be[i]->name, backend_name) == 0)
				return be[i];
		}
	}
	return NULL;
}

int fts_backend_init(const char *backend_name, struct mail_namespace *ns,
		     const char **error_r, struct fts_backend **backend_r)
{
	const struct fts_backend *be;
	struct fts_backend *backend;

	be = fts_backend_class_lookup(backend_name);
	if (be == NULL) {
		*error_r = "Unknown backend";
		return -1;
	}

	backend = be->v.alloc();
	backend->ns = ns;
	if (backend->v.init(backend, error_r) < 0) {
		i_free(backend);
		return -1;
	}
	*backend_r = backend;
	return 0;
}

void fts_backend_deinit(struct fts_backend **_backend)
{
	struct fts_backend *backend = *_backend;

	*_backend = NULL;
	backend->v.deinit(backend);
}

int fts_backend_get_last_uid(struct fts_backend *backend, struct mailbox *box,
			     uint32_t *last_uid_r)
{
	struct fts_index_header hdr;

	if (box->virtual_vfuncs != NULL) {
		/* virtual mailboxes themselves don't have any indexes,
		   so catch this call here */
		if (!fts_index_get_header(box, &hdr))
			*last_uid_r = 0;
		else
			*last_uid_r = hdr.last_indexed_uid;
		return 0;
	}

	return backend->v.get_last_uid(backend, box, last_uid_r);
}

bool fts_backend_is_updating(struct fts_backend *backend)
{
	return backend->updating;
}

struct fts_backend_update_context *
fts_backend_update_init(struct fts_backend *backend)
{
	struct fts_backend_update_context *ctx;

	i_assert(!backend->updating);

	backend->updating = TRUE;
	ctx = backend->v.update_init(backend);
	if ((backend->flags & FTS_BACKEND_FLAG_NORMALIZE_INPUT) != 0)
		ctx->normalizer = backend->ns->user->default_normalizer;
	return ctx;
}

static void fts_backend_set_cur_mailbox(struct fts_backend_update_context *ctx)
{
	fts_backend_update_unset_build_key(ctx);
	if (ctx->backend_box != ctx->cur_box) {
		ctx->backend->v.update_set_mailbox(ctx, ctx->cur_box);
		ctx->backend_box = ctx->cur_box;
	}
}

int fts_backend_update_deinit(struct fts_backend_update_context **_ctx)
{
	struct fts_backend_update_context *ctx = *_ctx;
	struct fts_backend *backend = ctx->backend;
	int ret;

	*_ctx = NULL;

	ctx->cur_box = NULL;
	fts_backend_set_cur_mailbox(ctx);

	ret = backend->v.update_deinit(ctx);
	backend->updating = FALSE;
	return ret;
}

void fts_backend_update_set_mailbox(struct fts_backend_update_context *ctx,
				    struct mailbox *box)
{
	if (ctx->backend_box != NULL && box != ctx->backend_box) {
		/* make sure we don't reference the backend box anymore */
		ctx->backend->v.update_set_mailbox(ctx, NULL);
		ctx->backend_box = NULL;
	}
	ctx->cur_box = box;
}

void fts_backend_update_expunge(struct fts_backend_update_context *ctx,
				uint32_t uid)
{
	fts_backend_set_cur_mailbox(ctx);
	ctx->backend->v.update_expunge(ctx, uid);
}

bool fts_backend_update_set_build_key(struct fts_backend_update_context *ctx,
				      const struct fts_backend_build_key *key)
{
	fts_backend_set_cur_mailbox(ctx);

	i_assert(ctx->cur_box != NULL);

	if (!ctx->backend->v.update_set_build_key(ctx, key))
		return FALSE;
	ctx->build_key_open = TRUE;
	return TRUE;
}

void fts_backend_update_unset_build_key(struct fts_backend_update_context *ctx)
{
	if (ctx->build_key_open) {
		ctx->backend->v.update_unset_build_key(ctx);
		ctx->build_key_open = FALSE;
	}
}

int fts_backend_update_build_more(struct fts_backend_update_context *ctx,
				  const unsigned char *data, size_t size)
{
	i_assert(ctx->build_key_open);

	return ctx->backend->v.update_build_more(ctx, data, size);
}

int fts_backend_refresh(struct fts_backend *backend)
{
	return backend->v.refresh(backend);
}

int fts_backend_reset_last_uids(struct fts_backend *backend)
{
	struct mailbox_list_iterate_context *iter;
	const struct mailbox_info *info;
	struct mailbox *box;
	int ret = 0;

	iter = mailbox_list_iter_init(backend->ns->list, "*",
				      MAILBOX_LIST_ITER_SKIP_ALIASES |
				      MAILBOX_LIST_ITER_NO_AUTO_BOXES);
	while ((info = mailbox_list_iter_next(iter)) != NULL) {
		if ((info->flags &
		     (MAILBOX_NONEXISTENT | MAILBOX_NOSELECT)) != 0)
			continue;

		box = mailbox_alloc(info->ns->list, info->vname, 0);
		if (mailbox_open(box) == 0) {
			if (fts_index_set_last_uid(box, 0) < 0)
				ret = -1;
		}
		mailbox_free(&box);
	}
	if (mailbox_list_iter_deinit(&iter) < 0)
		ret = -1;
	return ret;
}

int fts_backend_rescan(struct fts_backend *backend)
{
	struct mailbox *box;
	bool virtual_storage;

	box = mailbox_alloc(backend->ns->list, "", 0);
	virtual_storage = box->virtual_vfuncs != NULL;
	mailbox_free(&box);

	if (virtual_storage) {
		/* just reset the last-uids for a virtual storage. */
		return fts_backend_reset_last_uids(backend);
	}

	return backend->v.rescan == NULL ? 0 :
		backend->v.rescan(backend);
}

int fts_backend_optimize(struct fts_backend *backend)
{
	return backend->v.optimize == NULL ? 0 :
		backend->v.optimize(backend);
}

static void
fts_merge_maybies(ARRAY_TYPE(seq_range) *dest_maybe,
		  const ARRAY_TYPE(seq_range) *dest_definite,
		  const ARRAY_TYPE(seq_range) *src_maybe,
		  const ARRAY_TYPE(seq_range) *src_definite)
{
	ARRAY_TYPE(seq_range) src_unwanted;
	const struct seq_range *range;
	struct seq_range new_range;
	unsigned int i, count;
	uint32_t seq;

	/* add/leave to dest_maybe if at least one list has maybe,
	   and no lists have none */

	/* create unwanted sequences list from both sources */
	t_array_init(&src_unwanted, 128);
	new_range.seq1 = 0; new_range.seq2 = (uint32_t)-1;
	array_append(&src_unwanted, &new_range, 1);
	seq_range_array_remove_seq_range(&src_unwanted, src_maybe);
	seq_range_array_remove_seq_range(&src_unwanted, src_definite);

	/* drop unwanted uids */
	seq_range_array_remove_seq_range(dest_maybe, &src_unwanted);

	/* add uids that are in dest_definite and src_maybe lists */
	range = array_get(dest_definite, &count);
	for (i = 0; i < count; i++) {
		for (seq = range[i].seq1; seq <= range[i].seq2; seq++) {
			if (seq_range_exists(src_maybe, seq))
				seq_range_array_add(dest_maybe, seq);
		}
	}
}

void fts_filter_uids(ARRAY_TYPE(seq_range) *definite_dest,
		     const ARRAY_TYPE(seq_range) *definite_filter,
		     ARRAY_TYPE(seq_range) *maybe_dest,
		     const ARRAY_TYPE(seq_range) *maybe_filter)
{
	T_BEGIN {
		fts_merge_maybies(maybe_dest, definite_dest,
				  maybe_filter, definite_filter);
	} T_END;
	/* keep only what exists in both lists. the rest is in
	   maybies or not wanted */
	seq_range_array_intersect(definite_dest, definite_filter);
}

bool fts_backend_default_can_lookup(struct fts_backend *backend,
				    const struct mail_search_arg *args)
{
	for (; args != NULL; args = args->next) {
		switch (args->type) {
		case SEARCH_OR:
		case SEARCH_SUB:
		case SEARCH_INTHREAD:
			if (fts_backend_default_can_lookup(backend,
							   args->value.subargs))
				return TRUE;
			break;
		case SEARCH_HEADER:
		case SEARCH_HEADER_ADDRESS:
		case SEARCH_HEADER_COMPRESS_LWSP:
		case SEARCH_BODY:
		case SEARCH_TEXT:
			return TRUE;
		default:
			break;
		}
	}
	return FALSE;
}

bool fts_backend_can_lookup(struct fts_backend *backend,
			    const struct mail_search_arg *args)
{
	return backend->v.can_lookup(backend, args);
}

static int fts_score_map_sort(const struct fts_score_map *m1,
			      const struct fts_score_map *m2)
{
	if (m1->uid < m2->uid)
		return -1;
	if (m1->uid > m2->uid)
		return 1;
	return 0;
}

int fts_backend_lookup(struct fts_backend *backend, struct mailbox *box,
		       struct mail_search_arg *args,
		       enum fts_lookup_flags flags,
		       struct fts_result *result)
{
	array_clear(&result->definite_uids);
	array_clear(&result->maybe_uids);
	array_clear(&result->scores);

	if (backend->v.lookup(backend, box, args, flags, result) < 0)
		return -1;

	if (!result->scores_sorted && array_is_created(&result->scores)) {
		array_sort(&result->scores, fts_score_map_sort);
		result->scores_sorted = TRUE;
	}
	return 0;
}

int fts_backend_lookup_multi(struct fts_backend *backend,
			     struct mailbox *const boxes[],
			     struct mail_search_arg *args,
			     enum fts_lookup_flags flags,
			     struct fts_multi_result *result)
{
	unsigned int i;

	i_assert(boxes[0] != NULL);

	if (backend->v.lookup_multi != NULL) {
		if (backend->v.lookup_multi(backend, boxes, args,
					    flags, result) < 0)
			return -1;
		if (result->box_results == NULL) {
			result->box_results = p_new(result->pool,
						    struct fts_result, 1);
		}
		return 0;
	}

	for (i = 0; boxes[i] != NULL; i++) ;
	result->box_results = p_new(result->pool, struct fts_result, i+1);

	for (i = 0; boxes[i] != NULL; i++) {
		struct fts_result *box_result = &result->box_results[i];

		p_array_init(&box_result->definite_uids, result->pool, 32);
		p_array_init(&box_result->maybe_uids, result->pool, 32);
		p_array_init(&box_result->scores, result->pool, 32);
		if (backend->v.lookup(backend, boxes[i], args,
				      flags, box_result) < 0)
			return -1;
	}
	return 0;
}

void fts_backend_lookup_done(struct fts_backend *backend)
{
	if (backend->v.lookup_done != NULL)
		backend->v.lookup_done(backend);
}

static uint32_t fts_index_get_ext_id(struct mailbox *box)
{
	return mail_index_ext_register(box->index, "fts",
				       sizeof(struct fts_index_header),
				       0, 0);
}

bool fts_index_get_header(struct mailbox *box, struct fts_index_header *hdr_r)
{
	struct mail_index_view *view;
	const void *data;
	size_t data_size;
	bool ret;

	mail_index_refresh(box->index);
	view = mail_index_view_open(box->index);
	mail_index_get_header_ext(view, fts_index_get_ext_id(box),
				  &data, &data_size);
	if (data_size < sizeof(*hdr_r)) {
		memset(hdr_r, 0, sizeof(*hdr_r));
		ret = FALSE;
	} else {
		memcpy(hdr_r, data, data_size);
		ret = TRUE;
	}
	mail_index_view_close(&view);
	return ret;
}

int fts_index_set_header(struct mailbox *box,
			 const struct fts_index_header *hdr)
{
	struct mail_index_transaction *trans;
	uint32_t ext_id = fts_index_get_ext_id(box);

	trans = mail_index_transaction_begin(box->view, 0);
	mail_index_update_header_ext(trans, ext_id, 0, hdr, sizeof(*hdr));
	return mail_index_transaction_commit(&trans);
}

int fts_index_set_last_uid(struct mailbox *box, uint32_t last_uid)
{
	struct fts_index_header hdr;

	(void)fts_index_get_header(box, &hdr);
	hdr.last_indexed_uid = last_uid;
	return fts_index_set_header(box, &hdr);
}

int fts_index_have_compatible_settings(struct mailbox_list *list,
				       uint32_t checksum)
{
	struct mail_namespace *ns = mailbox_list_get_namespace(list);
	struct mailbox *box;
	struct fts_index_header hdr;
	const char *vname;
	unsigned int len;
	int ret;

	if ((ns->flags & NAMESPACE_FLAG_INBOX_USER) != 0)
		vname = "INBOX";
	else {
		len = strlen(ns->prefix);
		if (len > 0 && ns->prefix[len-1] == mail_namespace_get_sep(ns))
			len--;
		vname = t_strndup(ns->prefix, len);
	}

	box = mailbox_alloc(list, vname, 0);
	if (mailbox_sync(box, (enum mailbox_sync_flags)0) < 0) {
		i_error("lucene: Failed to sync mailbox INBOX: %s",
			mailbox_get_last_error(box, NULL));
		ret = -1;
	} else {
		ret = fts_index_get_header(box, &hdr) &&
			hdr.settings_checksum == checksum ? 1 : 0;
	}
	mailbox_free(&box);
	return ret;
}

static const char *indexed_headers[] = {
	"From", "To", "Cc", "Bcc", "Subject"
};

bool fts_header_want_indexed(const char *hdr_name)
{
	unsigned int i;

	for (i = 0; i < N_ELEMENTS(indexed_headers); i++) {
		if (strcasecmp(hdr_name, indexed_headers[i]) == 0)
			return TRUE;
	}
	return FALSE;
}

bool fts_header_has_language(const char *hdr_name)
{
	/* FIXME: should email address headers be detected as different
	   languages? That mainly contains people's names.. */
	/*if (message_header_is_address(hdr_name))
		return TRUE;*/

	/* Subject definitely contains language-specific data that can be
	   detected. Comment and Keywords headers also could contain, although
	   just about nobody uses those headers.

	   For now we assume that other headers contain non-language specific
	   data that we don't want to filter in special ways. For example
	   it is good to be able to search for Message-IDs. */
	return strcasecmp(hdr_name, "Subject") == 0 ||
		strcasecmp(hdr_name, "Comments") == 0 ||
		strcasecmp(hdr_name, "Keywords") == 0;
}

int fts_mailbox_get_guid(struct mailbox *box, const char **guid_r)
{
	struct mailbox_metadata metadata;

	if (mailbox_get_metadata(box, MAILBOX_METADATA_GUID, &metadata) < 0)
		return -1;

	*guid_r = guid_128_to_string(metadata.guid);
	return 0;
}