view src/plugins/fts/fts-search.c @ 21389:59437f8764c6

global: Replaced all instances of memset(p, 0, sizeof(*p)) with the new i_zero() macro. Used the following script: C_FILES=`git ls-files *.c` H_FILES=`git ls-files *.h` for F in "$C_FILES $H_FILES"; do echo "$F" perl -p -i -e 's/safe_memset\(&\(?([^,]*)\)?,\s*0,\s*sizeof\(\g1\)\)/i_zero_safe(&$1)/g' $F perl -p -i -e 's/safe_memset\(([^,]*),\s*0,\s*sizeof\(\*\g1\)\)/i_zero_safe($1)/g' $F perl -p -i -e 's/memset\(&\(?([^,]*)\)?,\s*0,\s*sizeof\(\g1\)\)/i_zero(&$1)/g' $F perl -p -i -e 's/memset\(([^,]*),\s*0,\s*sizeof\(\*\g1\)\)/i_zero($1)/g' $F done
author Stephan Bosch <stephan.bosch@dovecot.fi>
date Wed, 11 Jan 2017 01:57:46 +0100
parents 3d60b96543fd
children 2e2563132d5f
line wrap: on
line source

/* Copyright (c) 2006-2016 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "array.h"
#include "str.h"
#include "seq-range-array.h"
#include "mail-search.h"
#include "fts-api-private.h"
#include "fts-search-args.h"
#include "fts-search-serialize.h"
#include "fts-storage.h"

static void
uid_range_to_seqs(struct fts_search_context *fctx,
		  const ARRAY_TYPE(seq_range) *uid_range,
		  ARRAY_TYPE(seq_range) *seq_range)
{
	const struct seq_range *range;
	unsigned int i, count;
	uint32_t seq1, seq2;

	range = array_get(uid_range, &count);
	if (!array_is_created(seq_range))
		p_array_init(seq_range, fctx->result_pool, count);
	for (i = 0; i < count; i++) {
		if (range[i].seq1 > range[i].seq2)
			continue;
		mailbox_get_seq_range(fctx->box, range[i].seq1, range[i].seq2,
				      &seq1, &seq2);
		if (seq1 != 0)
			seq_range_array_add_range(seq_range, seq1, seq2);
	}
}

static int fts_search_lookup_level_single(struct fts_search_context *fctx,
					  struct mail_search_arg *args,
					  bool and_args)
{
	enum fts_lookup_flags flags = fctx->flags |
		(and_args ? FTS_LOOKUP_FLAG_AND_ARGS : 0);
	struct fts_search_level *level;
	struct fts_result result;

	i_zero(&result);
	p_array_init(&result.definite_uids, fctx->result_pool, 32);
	p_array_init(&result.maybe_uids, fctx->result_pool, 32);
	p_array_init(&result.scores, fctx->result_pool, 32);

	mail_search_args_reset(args, TRUE);
	if (fts_backend_lookup(fctx->backend, fctx->box, args, flags,
			       &result) < 0)
		return -1;

	level = array_append_space(&fctx->levels);
	level->args_matches = buffer_create_dynamic(fctx->result_pool, 16);
	fts_search_serialize(level->args_matches, args);

	uid_range_to_seqs(fctx, &result.definite_uids, &level->definite_seqs);
	uid_range_to_seqs(fctx, &result.maybe_uids, &level->maybe_seqs);
	level->score_map = result.scores;
	return 0;
}

static void
level_scores_add_vuids(struct mailbox *box,
		       struct fts_search_level *level, struct fts_result *br)
{
	const struct fts_score_map *scores;
	unsigned int i, count;
	ARRAY_TYPE(seq_range) backend_uids;
	ARRAY_TYPE(uint32_t) vuids_arr;
	const uint32_t *vuids;
	struct fts_score_map *score;

	scores = array_get(&br->scores, &count);
	t_array_init(&vuids_arr, count);
	t_array_init(&backend_uids, 64);
	for (i = 0; i < count; i++)
		seq_range_array_add(&backend_uids, scores[i].uid);
	box->virtual_vfuncs->get_virtual_uid_map(box, br->box,
						 &backend_uids, &vuids_arr);

	i_assert(array_count(&vuids_arr) == array_count(&br->scores));
	vuids = array_get(&vuids_arr, &count);
	for (i = 0; i < count; i++) {
		score = array_append_space(&level->score_map);
		score->uid = vuids[i];
		score->score = scores[i].score;
	}
}

static int
mailbox_cmp_fts_backend(struct mailbox *const *m1, struct mailbox *const *m2)
{
	struct fts_backend *b1, *b2;

	b1 = fts_mailbox_backend(*m1);
	b2 = fts_mailbox_backend(*m2);
	if (b1 < b2)
		return -1;
	if (b1 > b2)
		return 1;
	return 0;
}

static int
multi_add_lookup_result(struct fts_search_context *fctx,
			struct fts_search_level *level,
			struct mail_search_arg *args,
			struct fts_multi_result *result)
{
	ARRAY_TYPE(seq_range) vuids;
	size_t orig_size;
	unsigned int i;

	orig_size = level->args_matches->used;
	fts_search_serialize(level->args_matches, args);
	if (orig_size > 0) {
		if (level->args_matches->used != orig_size * 2 ||
		    memcmp(level->args_matches->data,
			   CONST_PTR_OFFSET(level->args_matches->data,
					    orig_size), orig_size) != 0)
			i_panic("incompatible fts backends for namespaces");
		buffer_set_used_size(level->args_matches, orig_size);
	}

	t_array_init(&vuids, 64);
	for (i = 0; result->box_results[i].box != NULL; i++) {
		struct fts_result *br = &result->box_results[i];

		array_clear(&vuids);
		if (array_is_created(&br->definite_uids)) {
			fctx->box->virtual_vfuncs->get_virtual_uids(fctx->box,
				br->box, &br->definite_uids, &vuids);
		}
		uid_range_to_seqs(fctx, &vuids, &level->definite_seqs);

		array_clear(&vuids);
		if (array_is_created(&br->maybe_uids)) {
			fctx->box->virtual_vfuncs->get_virtual_uids(fctx->box,
				br->box, &br->maybe_uids, &vuids);
		}
		uid_range_to_seqs(fctx, &vuids, &level->maybe_seqs);

		if (array_is_created(&br->scores))
			level_scores_add_vuids(fctx->box, level, br);
	}
	return 0;
}

static int fts_search_lookup_level_multi(struct fts_search_context *fctx,
					 struct mail_search_arg *args,
					 bool and_args)
{
	enum fts_lookup_flags flags = fctx->flags |
		(and_args ? FTS_LOOKUP_FLAG_AND_ARGS : 0);
	ARRAY_TYPE(mailboxes) mailboxes_arr, tmp_mailboxes;
	struct mailbox *const *mailboxes;
	struct fts_backend *backend;
	struct fts_search_level *level;
	struct fts_multi_result result;
	unsigned int i, j, mailbox_count;

	p_array_init(&mailboxes_arr, fctx->result_pool, 8);
	fctx->box->virtual_vfuncs->get_virtual_backend_boxes(fctx->box,
		&mailboxes_arr, TRUE);
	array_sort(&mailboxes_arr, mailbox_cmp_fts_backend);

	i_zero(&result);
	result.pool = fctx->result_pool;

	level = array_append_space(&fctx->levels);
	level->args_matches = buffer_create_dynamic(fctx->result_pool, 16);
	p_array_init(&level->score_map, fctx->result_pool, 1);

	mailboxes = array_get(&mailboxes_arr, &mailbox_count);
	t_array_init(&tmp_mailboxes, mailbox_count);
	for (i = 0; i < mailbox_count; i = j) {
		array_clear(&tmp_mailboxes);
		array_append(&tmp_mailboxes, &mailboxes[i], 1);

		backend = fts_mailbox_backend(mailboxes[i]);
		for (j = i + 1; j < mailbox_count; j++) {
			if (fts_mailbox_backend(mailboxes[j]) != backend)
				break;
			array_append(&tmp_mailboxes, &mailboxes[j], 1);
		}
		array_append_zero(&tmp_mailboxes);

		mail_search_args_reset(args, TRUE);
		if (fts_backend_lookup_multi(backend,
					     array_idx(&tmp_mailboxes, 0),
					     args, flags, &result) < 0)
			return -1;

		if (multi_add_lookup_result(fctx, level, args, &result) < 0)
			return -1;
	}
	return 0;
}

static int fts_search_lookup_level(struct fts_search_context *fctx,
				   struct mail_search_arg *args,
				   bool and_args)
{
	int ret;

	T_BEGIN {
		ret = !fctx->virtual_mailbox ?
			fts_search_lookup_level_single(fctx, args, and_args) :
			fts_search_lookup_level_multi(fctx, args, and_args);
	} T_END;
	if (ret < 0)
		return -1;

	for (; args != NULL; args = args->next) {
		if (args->type != SEARCH_OR && args->type != SEARCH_SUB)
			continue;

		if (fts_search_lookup_level(fctx, args->value.subargs,
					    args->type == SEARCH_SUB) < 0)
			return -1;
	}
	return 0;
}

static void
fts_search_merge_scores_and(ARRAY_TYPE(fts_score_map) *dest,
			    const ARRAY_TYPE(fts_score_map) *src)
{
	struct fts_score_map *dest_map;
	const struct fts_score_map *src_map;
	unsigned int desti, srci, dest_count, src_count;

	dest_map = array_get_modifiable(dest, &dest_count);
	src_map = array_get(src, &src_count);

	/* arg_scores are summed to current scores. we could drop UIDs that
	   don't exist in both, but that's just extra work so don't bother */
	for (desti = srci = 0; desti < dest_count && srci < src_count;) {
		if (dest_map[desti].uid < src_map[srci].uid)
			desti++;
		else if (dest_map[desti].uid > src_map[srci].uid)
			srci++;
		else {
			if (dest_map[desti].score < src_map[srci].score)
				dest_map[desti].score = src_map[srci].score;
			desti++; srci++;
		}
	}
}

static void
fts_search_merge_scores_or(ARRAY_TYPE(fts_score_map) *dest,
			   const ARRAY_TYPE(fts_score_map) *src)
{
	ARRAY_TYPE(fts_score_map) src2;
	const struct fts_score_map *src_map, *src2_map;
	unsigned int srci, src2i, src_count, src2_count;

	t_array_init(&src2, array_count(dest));
	array_append_array(&src2, dest);
	array_clear(dest);

	src_map = array_get(src, &src_count);
	src2_map = array_get(&src2, &src2_count);

	/* add any missing UIDs to current scores. if any existing UIDs have
	   lower scores than in arg_scores, increase them. */
	for (srci = src2i = 0; srci < src_count || src2i < src2_count;) {
		if (src2i == src2_count ||
		    src_map[srci].uid < src2_map[src2i].uid) {
			array_append(dest, &src_map[srci], 1);
			srci++;
		} else if (srci == src_count ||
			   src_map[srci].uid > src2_map[src2i].uid) {
			array_append(dest, &src2_map[src2i], 1);
			src2i++;
		} else {
			i_assert(src_map[srci].uid == src2_map[src2i].uid);
			if (src_map[srci].score > src2_map[src2i].score)
				array_append(dest, &src_map[srci], 1);
			else
				array_append(dest, &src2_map[src2i], 1);
			srci++; src2i++;
		}
	}
}

static void
fts_search_merge_scores_level(struct fts_search_context *fctx,
			      struct mail_search_arg *args, unsigned int *idx,
			      bool and_args, ARRAY_TYPE(fts_score_map) *scores)
{
	const struct fts_search_level *level;
	ARRAY_TYPE(fts_score_map) arg_scores;

	i_assert(array_count(scores) == 0);

	/*
	   The (simplified) args can look like:

	   A and B and (C or D) and (E or F) and ...
	   A or B or (C and D) or (E and F) or ...

	   The A op B part's scores are in level->scores. The child args'
	   scores are in the sub levels' scores.
	*/

	level = array_idx(&fctx->levels, *idx);
	array_append_array(scores, &level->score_map);

	t_array_init(&arg_scores, 64);
	for (; args != NULL; args = args->next) {
		if (args->type != SEARCH_OR && args->type != SEARCH_SUB)
			continue;

		*idx += 1;
		array_clear(&arg_scores);
		fts_search_merge_scores_level(fctx, args->value.subargs, idx,
					      args->type == SEARCH_OR,
					      &arg_scores);

		if (and_args)
			fts_search_merge_scores_and(scores, &arg_scores);
		else
			fts_search_merge_scores_or(scores, &arg_scores);
	}
}

static void fts_search_merge_scores(struct fts_search_context *fctx)
{
	unsigned int idx = 0;

	fts_search_merge_scores_level(fctx, fctx->args->args, &idx,
				      TRUE, &fctx->scores->score_map);
}

void fts_search_lookup(struct fts_search_context *fctx)
{
	uint32_t last_uid, seq1, seq2;

	i_assert(array_count(&fctx->levels) == 0);
	i_assert(fctx->args->simplified);

	if (fts_backend_refresh(fctx->backend) < 0)
		return;
	if (fts_backend_get_last_uid(fctx->backend, fctx->box, &last_uid) < 0)
		return;
	mailbox_get_seq_range(fctx->box, last_uid+1, (uint32_t)-1,
			      &seq1, &seq2);
	fctx->first_unindexed_seq = seq1 != 0 ? seq1 : (uint32_t)-1;

	if ((fctx->backend->flags & FTS_BACKEND_FLAG_TOKENIZED_INPUT) != 0) {
		if (fts_search_args_expand(fctx->backend, fctx->args) < 0)
			return;
	}
	fts_search_serialize(fctx->orig_matches, fctx->args->args);

	if (fts_search_lookup_level(fctx, fctx->args->args, TRUE) == 0) {
		fctx->fts_lookup_success = TRUE;
		fts_search_merge_scores(fctx);
	}

	fts_search_deserialize(fctx->args->args, fctx->orig_matches);
	fts_backend_lookup_done(fctx->backend);
}