changeset 18788:3a5ea8cf2233

lib-fts: Use new uni_utf8_get_char*() interface
author Timo Sirainen <tss@iki.fi>
date Mon, 01 Jun 2015 22:14:19 +0300
parents 5f4742b69e7e
children 778b57788590
files src/lib-fts/fts-tokenizer-generic.c
diffstat 1 files changed, 7 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-fts/fts-tokenizer-generic.c	Mon Jun 01 22:11:55 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-generic.c	Mon Jun 01 22:14:19 2015 +0300
@@ -198,14 +198,13 @@
 	struct generic_fts_tokenizer *tok =
 		(struct generic_fts_tokenizer *)_tok;
 	size_t i, start = 0;
-	unsigned int char_size;
+	int char_size;
 	unichar_t c;
 	bool apostrophe;
 
 	for (i = 0; i < size; i += char_size) {
-		if (uni_utf8_get_char_n(data + i, size - i, &c) <= 0)
-			i_unreached();
-		char_size = uni_utf8_char_bytes(data[i]);
+		char_size = uni_utf8_get_char_n(data + i, size - i, &c);
+		i_assert(char_size > 0);
 
 		apostrophe = IS_APOSTROPHE(c);
 		if (fts_simple_is_word_break(tok, c, apostrophe)) {
@@ -620,13 +619,14 @@
 	unichar_t c;
 	size_t i, char_start_i, start_pos = 0;
 	enum letter_type lt;
+	int char_size;
 
 	/* TODO: Process 8bit chars separately, to speed things up. */
 	for (i = 0; i < size; ) {
 		char_start_i = i;
-		if (uni_utf8_get_char_n(data + i, size - i, &c) <= 0)
-			i_unreached();
-		i += uni_utf8_char_bytes(data[i]);
+		char_size = uni_utf8_get_char_n(data + i, size - i, &c);
+		i_assert(char_size > 0);
+		i += char_size;
 		lt = letter_type(c);
 
 		if (tok->prev_letter == LETTER_TYPE_NONE && is_nontoken(lt)) {