Mercurial > dovecot > core-2.2
annotate src/lib-fts/fts-tokenizer-common.c @ 22656:1789bf2a1e01
director: Make sure HOST-RESET-USERS isn't used with max_moving_users=0
The reset command would just hang in that case. doveadm would never have
sent this, so this is just an extra sanity check.
author | Timo Sirainen <timo.sirainen@dovecot.fi> |
---|---|
date | Sun, 05 Nov 2017 23:51:56 +0200 |
parents | 2e2563132d5f |
children | cb108f786fb4 |
rev | line source |
---|---|
21390
2e2563132d5f
Updated copyright notices to include the year 2017.
Stephan Bosch <stephan.bosch@dovecot.fi>
parents:
21264
diff
changeset
|
1 /* Copyright (c) 2016-2017 Dovecot authors, see the included COPYING file */ |
21264
8f33680c6722
global: Added missing copyright notices.
Stephan Bosch <stephan.bosch@dovecot.fi>
parents:
20384
diff
changeset
|
2 |
19933
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
3 #include "lib.h" |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
4 #include "unichar.h" |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
5 #include "fts-tokenizer-common.h" |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
6 void |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
7 fts_tokenizer_delete_trailing_partial_char(const unsigned char *data, |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
8 size_t *len) |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
9 { |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
10 size_t pos; |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
11 unsigned int char_bytes; |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
12 |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
13 /* the token is truncated - make sure the last character |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
14 exists entirely in the token */ |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
15 for (pos = *len-1; pos > 0; pos--) { |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
16 if (UTF8_IS_START_SEQ(data[pos])) |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
17 break; |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
18 } |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
19 char_bytes = uni_utf8_char_bytes(data[pos]); |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
20 if (char_bytes != *len-pos) { |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
21 i_assert(char_bytes > *len-pos); |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
22 *len = pos; |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
23 } |
159b933b617d
lib-fts: Lift helper function out of generic tokenizer.
Teemu Huovila <teemu.huovila@dovecot.fi>
parents:
diff
changeset
|
24 } |
20383
9634c9d152c4
lib-fts: remove trailing period character from email-address
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
19933
diff
changeset
|
25 void fts_tokenizer_delete_trailing_invalid_char(const unsigned char *data, |
9634c9d152c4
lib-fts: remove trailing period character from email-address
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
19933
diff
changeset
|
26 size_t *len) |
9634c9d152c4
lib-fts: remove trailing period character from email-address
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
19933
diff
changeset
|
27 { |
9634c9d152c4
lib-fts: remove trailing period character from email-address
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
19933
diff
changeset
|
28 size_t pos = *len; |
9634c9d152c4
lib-fts: remove trailing period character from email-address
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
19933
diff
changeset
|
29 |
9634c9d152c4
lib-fts: remove trailing period character from email-address
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
19933
diff
changeset
|
30 /* the token may contain '.' in the end - remove all of them. */ |
20384
e81c5be7e4c4
lib-fts: allow hyphen character in domain part
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
20383
diff
changeset
|
31 while (pos > 0 && |
e81c5be7e4c4
lib-fts: allow hyphen character in domain part
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
20383
diff
changeset
|
32 (data[pos-1] == '.' || data[pos-1] == '-')) |
20383
9634c9d152c4
lib-fts: remove trailing period character from email-address
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
19933
diff
changeset
|
33 pos--; |
9634c9d152c4
lib-fts: remove trailing period character from email-address
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
19933
diff
changeset
|
34 *len = pos; |
9634c9d152c4
lib-fts: remove trailing period character from email-address
Baofeng Wang <baofeng.wang@dovecot.fi>
parents:
19933
diff
changeset
|
35 } |