Mercurial > dovecot > core-2.2
changeset 18573:60f07e741c57
lib-fts: Implemented "search" parameter to fts-tokenizer-address.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 09 May 2015 13:15:09 +0300 |
parents | 07597666aa29 |
children | eb6fbab30cf6 |
files | src/lib-fts/fts-tokenizer-address.c src/lib-fts/test-fts-tokenizer.c |
diffstat | 2 files changed, 67 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-fts/fts-tokenizer-address.c Sat May 09 13:01:45 2015 +0300 +++ b/src/lib-fts/fts-tokenizer-address.c Sat May 09 13:15:09 2015 +0300 @@ -76,17 +76,30 @@ return 1; } -static int +static bool fts_tokenizer_address_parent_data(struct email_address_fts_tokenizer *tok, const char **token_r) { - /* TODO: search option removes address from data here. */ - if (tok->search && tok->state >= EMAIL_ADDRESS_PARSER_STATE_DOMAIN) - i_debug("Would remove current token"); + if (tok->tokenizer.parent == NULL || str_len(tok->parent_data) == 0) + return FALSE; + + if (tok->search && tok->state >= EMAIL_ADDRESS_PARSER_STATE_DOMAIN) { + /* we're searching and we want to find only the full + user@domain (not "user" and "domain"). we'll do this by + not feeding the last user@domain to parent tokenizer. */ + unsigned int parent_prefix_len = + str_len(tok->parent_data) - str_len(tok->last_word); + i_assert(str_len(tok->parent_data) >= str_len(tok->last_word) && + strcmp(str_c(tok->parent_data) + parent_prefix_len, + str_c(tok->last_word)) == 0); + str_truncate(tok->parent_data, parent_prefix_len); + if (str_len(tok->parent_data) == 0) + return FALSE; + } *token_r = t_strdup(str_c(tok->parent_data)); str_truncate(tok->parent_data, 0); - return 1; + return TRUE; } /* Used to rewind past characters that can not be the start of a new localpart. @@ -204,8 +217,8 @@ /* end of data, output lingering tokens. first the parents data, then possibly our token, if complete enough */ if (size == 0) { - if (tok->tokenizer.parent != NULL && str_len(tok->parent_data) > 0) - return fts_tokenizer_address_parent_data(tok, token_r); + if (fts_tokenizer_address_parent_data(tok, token_r)) + return 1; if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN && !domain_is_empty(tok)) @@ -254,16 +267,10 @@ break; case EMAIL_ADDRESS_PARSER_STATE_COMPLETE: - /* skip tailing non-atext */ - local_skip = skip_nonlocal_part(data+pos, size - pos); - *skip_r = pos + local_skip; - fts_tokenizer_address_update_parent(tok, data+pos, - local_skip); - if (tok->tokenizer.parent != NULL) - return fts_tokenizer_address_parent_data(tok, token_r); - else { - return fts_tokenizer_address_current_token(tok, token_r); - } + *skip_r = pos; + if (fts_tokenizer_address_parent_data(tok, token_r)) + return 1; + return fts_tokenizer_address_current_token(tok, token_r); default: i_unreached(); }
--- a/src/lib-fts/test-fts-tokenizer.c Sat May 09 13:01:45 2015 +0300 +++ b/src/lib-fts/test-fts-tokenizer.c Sat May 09 13:15:09 2015 +0300 @@ -518,6 +518,48 @@ test_end(); } +static void test_fts_tokenizer_address_search(void) +{ + static const unsigned char input[] = + "@invalid invalid@ abc@example.com, " + "Bar Baz <bar@example.org>, " + "foo@domain"; + static const char *const expected_output[] = { + "invalid", "invalid", "abc@example.com", "Bar", "Baz", + "bar@example.org", "foo@domain", NULL + }; + static const char *const settings[] = { "search", "" }; + struct fts_tokenizer *tok, *gen_tok; + const char * const *eopp = expected_output; + const char *token, *error; + unsigned int i; + int ret; + + test_begin("fts tokenizer search email address + parent, input one character at a time"); + fts_tokenizers_init(); + + test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0); + test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, settings, &tok, &error) == 0); + + for (i = 0; i <= sizeof(input)-1; ) { + ret = i < sizeof(input)-1 ? + fts_tokenizer_next(tok, &input[i], 1, &token) : + fts_tokenizer_next(tok, NULL, 0, &token); + if (ret == 0) { + i++; + continue; + } + test_assert(*eopp != NULL); + test_assert(null_strcmp(token, *eopp) == 0); + eopp++; + } + test_assert(*eopp == NULL); + fts_tokenizer_unref(&tok); + fts_tokenizer_unref(&gen_tok); + fts_tokenizers_deinit(); + test_end(); +} + int main(void) { static void (*test_functions[])(void) = { @@ -534,6 +576,7 @@ test_fts_tokenizer_address_char, test_fts_tokenizer_address_line, test_fts_tokenizer_address_rand, + test_fts_tokenizer_address_search, NULL };