Mercurial > dovecot > original-hg > dovecot-1.2
changeset 7187:d9b87e3ce6c8 HEAD
Treat replacement characters as non-indexed chars.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Tue, 22 Jan 2008 09:33:40 +0200 |
parents | d48c419a27ca |
children | febb2592e616 |
files | src/plugins/fts-squat/squat-trie.c |
diffstat | 1 files changed, 13 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts-squat/squat-trie.c Tue Jan 22 09:32:27 2008 +0200 +++ b/src/plugins/fts-squat/squat-trie.c Tue Jan 22 09:33:40 2008 +0200 @@ -821,12 +821,23 @@ squat_data_normalize(struct squat_trie *trie, const unsigned char *data, unsigned int size) { + static const unsigned char replacement_utf8[] = { 0xef, 0xbf, 0xbd }; unsigned char *dest; unsigned int i; dest = t_malloc(size); - for (i = 0; i < size; i++) - dest[i] = trie->hdr.normalize_map[data[i]]; + for (i = 0; i < size; i++) { + if (data[i] == replacement_utf8[0] && i + 2 < size && + data[i+1] == replacement_utf8[1] && + data[i+2] == replacement_utf8[2]) { + /* Don't index replacement character */ + dest[i++] = 0; + dest[i++] = 0; + dest[i] = 0; + } else { + dest[i] = trie->hdr.normalize_map[data[i]]; + } + } return dest; }