# HG changeset patch # User Timo Sirainen # Date 1431164361 -10800 # Node ID bcfe4c592427de8a76cc64edb2cd34f1250554c9 # Parent b9f85e125639d394b97cb5083fe7d837aa675f18 lib-fts: Don't treat empty domains as valid addresses. This actually pretty much reverts the previous patch and just fixes the original code's chars_after_at() to work correctly. Also renamed the function to make it a bit clearer what was intended. diff -r b9f85e125639 -r bcfe4c592427 src/lib-fts/fts-tokenizer-address.c --- a/src/lib-fts/fts-tokenizer-address.c Sat May 09 12:32:46 2015 +0300 +++ b/src/lib-fts/fts-tokenizer-address.c Sat May 09 12:39:21 2015 +0300 @@ -199,6 +199,15 @@ return EMAIL_ADDRESS_PARSER_STATE_NONE; } +static bool domain_is_empty(struct email_address_fts_tokenizer *tok) +{ + const char *p, *str = str_c(tok->last_word); + + if ((p = strchr(str, '@')) == NULL) + return TRUE; + return p[1] == '\0'; +} + /* TODO: - allow address literals - reject "@..." @@ -218,7 +227,7 @@ } /* A complete domain name */ if ((pos > 1 && pos < size) || /* non-atext after atext in this data*/ - pos < size) { /* non-atext after previous atext */ + (pos < size && !domain_is_empty(tok))) { /* non-atext after previous atext */ str_append_n(tok->last_word, data, pos); *skip_r = pos; return EMAIL_ADDRESS_PARSER_STATE_COMPLETE; @@ -265,7 +274,8 @@ if (!tok->no_parent && str_len(tok->parent_data) > 0) return fts_tokenizer_address_parent_data(tok, token_r); - if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN) + if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN && + !domain_is_empty(tok)) return fts_tokenizer_address_current_token(tok, token_r); } diff -r b9f85e125639 -r bcfe4c592427 src/lib-fts/test-fts-tokenizer.c --- a/src/lib-fts/test-fts-tokenizer.c Sat May 09 12:32:46 2015 +0300 +++ b/src/lib-fts/test-fts-tokenizer.c Sat May 09 12:39:21 2015 +0300 @@ -308,7 +308,7 @@ static void test_fts_tokenizer_char_address_only(void) { static const unsigned char input[] = - "@invalid abc@example.com, " + "@invalid invalid@ abc@example.com, " "Bar Baz , " "foo@domain"; static const char *const expected_output[] = { @@ -346,7 +346,7 @@ static void test_fts_tokenizer_rand_address_only(void) { static const unsigned char input[] = - "@invalid Abc Dfg , " + "@invalid invalid@ Abc Dfg , " "Foo Bar (comment)foo.bar@host.example.org foo "; static const char *const expected_output[] = { @@ -388,11 +388,11 @@ static void test_fts_tokenizer_address_char(void) { static const unsigned char input[] = - "@invalid abc@example.com, " + "@invalid invalid@ abc@example.com, " "Bar Baz , " "foo@domain"; static const char *const expected_output[] = { - "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz", + "invalid", "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz", "bar", "example", "org", "bar@example.org", "foo", "domain", "foo@domain", NULL }; @@ -430,13 +430,13 @@ static void test_fts_tokenizer_address_line(void) { static const char *const input[] = { - "@invalid abc@example.com, ", + "@invalid invalid@ abc@example.com, ", "Bar Baz , ", "foo@domain, ", "foo@domain Bar Baz , " }; static const char *const expected_output[] = { - "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz", + "invalid", "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz", "bar", "example", "org", "bar@example.org", "foo", "domain", "foo@domain", "foo", "domain", "foo@domain", "Bar", "Baz", @@ -477,11 +477,11 @@ static void test_fts_tokenizer_address_rand(void) { static const unsigned char input[] = - "@invalid abc@example.com, " + "@invalid invalid@ abc@example.com, " "Bar Baz , " "foo@domain"; static const char *const expected_output[] = { - "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz", + "invalid", "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz", "bar", "example", "org", "bar@example.org", "foo", "domain", "foo@domain", NULL };