Mercurial > dovecot > core-2.2
changeset 18570:ac6ae1b9f5c4
lib-fts: Minor fts-tokenizer-address cleanups
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 09 May 2015 12:50:11 +0300 |
parents | 7c1fe66e8855 |
children | 3363fadc2e13 |
files | src/lib-fts/fts-tokenizer-address.c |
diffstat | 1 files changed, 8 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-fts/fts-tokenizer-address.c Sat May 09 12:48:13 2015 +0300 +++ b/src/lib-fts/fts-tokenizer-address.c Sat May 09 12:50:11 2015 +0300 @@ -23,8 +23,7 @@ struct fts_tokenizer tokenizer; enum email_address_parser_state state; string_t *last_word; - string_t *parent_data; /* Copy of input data between tokens. - TODO: could be buffer_t maybe */ + string_t *parent_data; /* Copy of input data between tokens. */ bool search; }; @@ -94,34 +93,27 @@ Returns size that can be skipped. */ static size_t skip_nonlocal_part(const unsigned char *data, size_t size) { - const unsigned char *p = data; size_t skip = 0; /* Yes, a dot can start an address. De facto before de jure. */ - while ( skip < size && (!IS_ATEXT(*p) && *p != '.')) { + while (skip < size && (!IS_ATEXT(data[skip]) && data[skip] != '.')) skip++; - p++; - } return skip; } -/* TODO: - - DONT dereference *p past size! -*/ static enum email_address_parser_state fts_tokenizer_email_address_parse_local(struct email_address_fts_tokenizer *tok, const unsigned char *data, size_t size, size_t *skip_r) { size_t pos = 0; - const unsigned char *p = data; bool at = FALSE; - while (pos < size && (IS_ATEXT(*p) || (*p == '@' || *p == '.'))) { - if (*p == '@') + while (pos < size && (IS_ATEXT(data[pos]) || + data[pos] == '@' || data[pos] == '.')) { + if (data[pos] == '@') at = TRUE; pos++; - p++; if (at) break; } @@ -133,13 +125,13 @@ } /* localpart, @ not included yet */ - if (pos > 0 && (IS_ATEXT(*(p-1)) || *(p-1) == '.')) { + if (pos > 0 && (IS_ATEXT(data[pos-1]) || data[pos-1] == '.')) { str_append_n(tok->last_word, data, pos); *skip_r = pos; return EMAIL_ADDRESS_PARSER_STATE_LOCALPART; } /* not a localpart. skip past rest of no-good chars. */ - pos += skip_nonlocal_part(p, size - pos); + pos += skip_nonlocal_part(data+pos, size - pos); *skip_r = pos; return EMAIL_ADDRESS_PARSER_STATE_NONE; } @@ -153,11 +145,6 @@ return p[1] == '\0'; } -/* TODO: - - allow address literals - - reject "@..." - - reject "@.host.tld" -*/ static enum email_address_parser_state fts_tokenizer_email_address_parse_domain(struct email_address_fts_tokenizer *tok, const unsigned char *data, size_t size, @@ -196,6 +183,7 @@ if (tok->tokenizer.parent != NULL) str_append_n(tok->parent_data, data, size); } + static int fts_tokenizer_email_address_next(struct fts_tokenizer *_tok, const unsigned char *data, size_t size,