Mercurial > dovecot > core-2.2
changeset 18788:3a5ea8cf2233
lib-fts: Use new uni_utf8_get_char*() interface
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Mon, 01 Jun 2015 22:14:19 +0300 |
parents | 5f4742b69e7e |
children | 778b57788590 |
files | src/lib-fts/fts-tokenizer-generic.c |
diffstat | 1 files changed, 7 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-fts/fts-tokenizer-generic.c Mon Jun 01 22:11:55 2015 +0300 +++ b/src/lib-fts/fts-tokenizer-generic.c Mon Jun 01 22:14:19 2015 +0300 @@ -198,14 +198,13 @@ struct generic_fts_tokenizer *tok = (struct generic_fts_tokenizer *)_tok; size_t i, start = 0; - unsigned int char_size; + int char_size; unichar_t c; bool apostrophe; for (i = 0; i < size; i += char_size) { - if (uni_utf8_get_char_n(data + i, size - i, &c) <= 0) - i_unreached(); - char_size = uni_utf8_char_bytes(data[i]); + char_size = uni_utf8_get_char_n(data + i, size - i, &c); + i_assert(char_size > 0); apostrophe = IS_APOSTROPHE(c); if (fts_simple_is_word_break(tok, c, apostrophe)) { @@ -620,13 +619,14 @@ unichar_t c; size_t i, char_start_i, start_pos = 0; enum letter_type lt; + int char_size; /* TODO: Process 8bit chars separately, to speed things up. */ for (i = 0; i < size; ) { char_start_i = i; - if (uni_utf8_get_char_n(data + i, size - i, &c) <= 0) - i_unreached(); - i += uni_utf8_char_bytes(data[i]); + char_size = uni_utf8_get_char_n(data + i, size - i, &c); + i_assert(char_size > 0); + i += char_size; lt = letter_type(c); if (tok->prev_letter == LETTER_TYPE_NONE && is_nontoken(lt)) {