Mercurial > dovecot > core-2.2
changeset 18771:04ea590951c1
lib-fts: tr29 cleanup - Avoid i++ in the for loop to avoid extra calculations
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Mon, 01 Jun 2015 21:11:55 +0300 |
parents | c9b2d4e228f5 |
children | 62b201a1ee06 |
files | src/lib-fts/fts-tokenizer-generic.c |
diffstat | 1 files changed, 4 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-fts/fts-tokenizer-generic.c Mon Jun 01 21:10:11 2015 +0300 +++ b/src/lib-fts/fts-tokenizer-generic.c Mon Jun 01 21:11:55 2015 +0300 @@ -631,22 +631,22 @@ enum letter_type lt; /* TODO: Process 8bit chars separately, to speed things up. */ - for (i = 0; i < size; i++) { + for (i = 0; i < size; ) { char_start_i = i; if (uni_utf8_get_char_n(data + i, size - i, &c) <= 0) i_unreached(); - i += uni_utf8_char_bytes(data[i]) - 1; /* Utf8 bytes > 1, for() handles the 1 byte increment. */ + i += uni_utf8_char_bytes(data[i]); lt = letter_type(c); if (tok->prev_letter == LETTER_TYPE_NONE && is_nonword(lt)) { /* TODO: test that start_skip works with multibyte utf8 chars */ - start_skip = i + 1; /* Skip non-token chars at start of data */ + start_skip = i; /* Skip non-token chars at start of data */ continue; } if (uni_found_word_boundary(tok, lt)) { i_assert(char_start_i >= start_skip && size >= start_skip); tok_append_truncated(tok, data + start_skip, char_start_i - start_skip); - *skip_r = i + 1; + *skip_r = i; fts_tokenizer_generic_tr29_current_token(tok, token_r); return 1; }