view src/lib-fts/fts-tokenizer-common.c @ 23017:c1d36f2575c7 default tip

lib-imap: Fix "Don't accept strings with NULs" cherry-pick
author Timo Sirainen <timo.sirainen@open-xchange.com>
date Thu, 29 Aug 2019 09:55:25 +0300
parents cb108f786fb4
children
line wrap: on
line source

/* Copyright (c) 2016-2018 Dovecot authors, see the included COPYING file */

#include "lib.h"
#include "unichar.h"
#include "fts-tokenizer-common.h"
void
fts_tokenizer_delete_trailing_partial_char(const unsigned char *data,
					   size_t *len)
{
	size_t pos;
	unsigned int char_bytes;

	/* the token is truncated - make sure the last character
	   exists entirely in the token */
	for (pos = *len-1; pos > 0; pos--) {
		if (UTF8_IS_START_SEQ(data[pos]))
			break;
	}
	char_bytes = uni_utf8_char_bytes(data[pos]);
	if (char_bytes != *len-pos) {
		i_assert(char_bytes > *len-pos);
		*len = pos;
	}
}
void fts_tokenizer_delete_trailing_invalid_char(const unsigned char *data,
		   size_t *len)
{
	size_t pos = *len;

	/* the token may contain '.' in the end - remove all of them. */
	while (pos > 0 &&
		  (data[pos-1] == '.' || data[pos-1] == '-'))
	    pos--;
	*len = pos;
}