changeset 18567:bcfe4c592427

lib-fts: Don't treat empty domains as valid addresses. This actually pretty much reverts the previous patch and just fixes the original code's chars_after_at() to work correctly. Also renamed the function to make it a bit clearer what was intended.
author Timo Sirainen <tss@iki.fi>
date Sat, 09 May 2015 12:39:21 +0300
parents b9f85e125639
children f4b2aa500cde
files src/lib-fts/fts-tokenizer-address.c src/lib-fts/test-fts-tokenizer.c
diffstat 2 files changed, 20 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-fts/fts-tokenizer-address.c	Sat May 09 12:32:46 2015 +0300
+++ b/src/lib-fts/fts-tokenizer-address.c	Sat May 09 12:39:21 2015 +0300
@@ -199,6 +199,15 @@
 	return EMAIL_ADDRESS_PARSER_STATE_NONE;
 }
 
+static bool domain_is_empty(struct email_address_fts_tokenizer *tok)
+{
+	const char *p, *str = str_c(tok->last_word);
+
+	if ((p = strchr(str, '@')) == NULL)
+		return TRUE;
+	return p[1] == '\0';
+}
+
 /* TODO:
  - allow address literals
  - reject "@..."
@@ -218,7 +227,7 @@
 	}
 	 /* A complete domain name */
 	if ((pos > 1 && pos < size) || /* non-atext after atext in this data*/
-	    pos < size) { /* non-atext after previous atext */
+	    (pos < size && !domain_is_empty(tok))) { /* non-atext after previous atext */
 		str_append_n(tok->last_word, data, pos);
 		*skip_r = pos;
 		return EMAIL_ADDRESS_PARSER_STATE_COMPLETE;
@@ -265,7 +274,8 @@
 		if (!tok->no_parent && str_len(tok->parent_data) > 0)
 			return fts_tokenizer_address_parent_data(tok, token_r);
 
-		if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN)
+		if (tok->state == EMAIL_ADDRESS_PARSER_STATE_DOMAIN &&
+		    !domain_is_empty(tok))
 			return fts_tokenizer_address_current_token(tok, token_r);
 	}
 
--- a/src/lib-fts/test-fts-tokenizer.c	Sat May 09 12:32:46 2015 +0300
+++ b/src/lib-fts/test-fts-tokenizer.c	Sat May 09 12:39:21 2015 +0300
@@ -308,7 +308,7 @@
 static void test_fts_tokenizer_char_address_only(void)
 {
 	static const unsigned char input[] =
-		"@invalid  abc@example.com, "
+		"@invalid invalid@ abc@example.com, "
 		"Bar Baz <bar@example.org>, "
 		"foo@domain";
 	static const char *const expected_output[] = {
@@ -346,7 +346,7 @@
 static void test_fts_tokenizer_rand_address_only(void)
 {
 	static const unsigned char input[] =
-		"@invalid  Abc Dfg <abc.dfg@example.com>, "
+		"@invalid invalid@ Abc Dfg <abc.dfg@example.com>, "
 		"Foo Bar (comment)foo.bar@host.example.org foo ";
 
 	static const char *const expected_output[] = {
@@ -388,11 +388,11 @@
 static void test_fts_tokenizer_address_char(void)
 {
 	static const unsigned char input[] =
-		"@invalid  abc@example.com, "
+		"@invalid invalid@ abc@example.com, "
 		"Bar Baz <bar@example.org>, "
 		"foo@domain";
 	static const char *const expected_output[] = {
-		"invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz",
+		"invalid", "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz",
 		"bar", "example", "org", "bar@example.org",
 		"foo", "domain", "foo@domain", NULL
 	};
@@ -430,13 +430,13 @@
 static void test_fts_tokenizer_address_line(void)
 {
 	static const char *const input[] = {
-		"@invalid  abc@example.com, ",
+		"@invalid invalid@ abc@example.com, ",
 		"Bar Baz <bar@example.org>, ",
 		"foo@domain, ",
 		"foo@domain Bar Baz <bar@example.org>, "
 	};
 	static const char *const expected_output[] = {
-		"invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz",
+		"invalid", "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz",
 		"bar", "example", "org", "bar@example.org",
 		"foo", "domain", "foo@domain",
 		"foo", "domain", "foo@domain", "Bar", "Baz",
@@ -477,11 +477,11 @@
 static void test_fts_tokenizer_address_rand(void)
 {
 	static const unsigned char input[] =
-		"@invalid  abc@example.com, "
+		"@invalid invalid@ abc@example.com, "
 		"Bar Baz <bar@example.org>, "
 		"foo@domain";
 	static const char *const expected_output[] = {
-		"invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz",
+		"invalid", "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz",
 		"bar", "example", "org", "bar@example.org",
 		"foo", "domain", "foo@domain", NULL
 	};