view src/lib-fts/fts-filter-normalizer-icu.c @ 22649:bb7c452e3662

director: Include peak output buffer size in director connection log messages
author Timo Sirainen <timo.sirainen@dovecot.fi>
date Sun, 05 Nov 2017 22:27:41 +0200
parents 2e2563132d5f
children cb108f786fb4
line wrap: on
line source

/* Copyright (c) 2014-2017 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "buffer.h"
#include "str.h"
#include "unichar.h" /* unicode replacement char */
#include "fts-filter-common.h"
#include "fts-filter-private.h"
#include "fts-language.h"

#ifdef HAVE_LIBICU
#include "fts-icu.h"

struct fts_filter_normalizer_icu {
	struct fts_filter filter;
	pool_t pool;
	const char *transliterator_id;

	UTransliterator *transliterator;
	buffer_t *utf16_token, *trans_token;
	string_t *utf8_token;
};

static void fts_filter_normalizer_icu_destroy(struct fts_filter *filter)
{
	struct fts_filter_normalizer_icu *np =
		(struct fts_filter_normalizer_icu *)filter;

	if (np->transliterator != NULL)
		utrans_close(np->transliterator);
	pool_unref(&np->pool);
}

static int
fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
				 const char *const *settings,
				 struct fts_filter **filter_r,
				 const char **error_r)
{
	struct fts_filter_normalizer_icu *np;
	pool_t pp;
	unsigned int i, max_length = 250;
	const char *id = "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC; [\\x20] Remove";

	for (i = 0; settings[i] != NULL; i += 2) {
		const char *key = settings[i], *value = settings[i+1];

		if (strcmp(key, "id") == 0) {
			id = value;
		} else if (strcmp(key, "maxlen") == 0) {
			if (str_to_uint(value, &max_length) < 0 ||
			    max_length == 0) {
				*error_r = t_strdup_printf("Invalid icu maxlen setting: %s", value);
				return -1;
			}
		} else {
			*error_r = t_strdup_printf("Unknown setting: %s", key);
			return -1;
		}
	}

	pp = pool_alloconly_create(MEMPOOL_GROWING"fts_filter_normalizer_icu",
	                           sizeof(struct fts_filter_normalizer_icu));
	np = p_new(pp, struct fts_filter_normalizer_icu, 1);
	np->pool = pp;
	np->filter = *fts_filter_normalizer_icu;
	np->transliterator_id = p_strdup(pp, id);
	np->utf16_token = buffer_create_dynamic(pp, 128);
	np->trans_token = buffer_create_dynamic(pp, 128);
	np->utf8_token = buffer_create_dynamic(pp, 128);
	np->filter.max_length = max_length;
	*filter_r = &np->filter;
	return 0;
}

static int
fts_filter_normalizer_icu_filter(struct fts_filter *filter, const char **token,
				 const char **error_r)
{
	struct fts_filter_normalizer_icu *np =
		(struct fts_filter_normalizer_icu *)filter;

	if (np->transliterator == NULL)
		if (fts_icu_transliterator_create(np->transliterator_id,
		                                  &np->transliterator,
		                                  error_r) < 0)
			return -1;

	fts_icu_utf8_to_utf16(np->utf16_token, *token);
	buffer_append_zero(np->utf16_token, 2);
	buffer_set_used_size(np->utf16_token, np->utf16_token->used-2);
	buffer_set_used_size(np->trans_token, 0);
	if (fts_icu_translate(np->trans_token, np->utf16_token->data,
			      np->utf16_token->used / sizeof(UChar),
			      np->transliterator, error_r) < 0)
		return -1;

	if (np->trans_token->used == 0)
		return 0;

	fts_icu_utf16_to_utf8(np->utf8_token, np->trans_token->data,
			      np->trans_token->used / sizeof(UChar));
	fts_filter_truncate_token(np->utf8_token, np->filter.max_length);
	*token = str_c(np->utf8_token);
	return 1;
}

#else

static int
fts_filter_normalizer_icu_create(const struct fts_language *lang ATTR_UNUSED,
				 const char *const *settings ATTR_UNUSED,
				 struct fts_filter **filter_r ATTR_UNUSED,
				 const char **error_r)
{
	*error_r = "libicu support not built in";
	return -1;
}

static int
fts_filter_normalizer_icu_filter(struct fts_filter *filter ATTR_UNUSED,
				 const char **token ATTR_UNUSED,
				 const char **error_r ATTR_UNUSED)
{
	return -1;
}

static void
fts_filter_normalizer_icu_destroy(struct fts_filter *normalizer ATTR_UNUSED)
{
}

#endif

static const struct fts_filter fts_filter_normalizer_icu_real = {
	.class_name = "normalizer-icu",
	.v = {
		fts_filter_normalizer_icu_create,
		fts_filter_normalizer_icu_filter,
		fts_filter_normalizer_icu_destroy
	}
};

const struct fts_filter *fts_filter_normalizer_icu =
	&fts_filter_normalizer_icu_real;