Mercurial > dovecot > core-2.2
changeset 18603:e4b62ba0fb5a
lib-fts: Various improvements to test-fts-tokenizer
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 09 May 2015 18:28:04 +0300 |
parents | 7542e3be6721 |
children | c469d8f4cde7 |
files | src/lib-fts/test-fts-tokenizer.c |
diffstat | 1 files changed, 133 insertions(+), 430 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-fts/test-fts-tokenizer.c Sat May 09 18:00:58 2015 +0300 +++ b/src/lib-fts/test-fts-tokenizer.c Sat May 09 18:28:04 2015 +0300 @@ -1,16 +1,30 @@ /* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */ #include "lib.h" -#include "sha2.h" -#include "hex-binary.h" +#include "unichar.h" #include "test-common.h" #include "fts-tokenizer.h" #include "fts-tokenizer-private.h" -/* TODO: fix including and linking of this. */ -/* #include "fts-tokenizer-generic-private.h" */ +#include "fts-tokenizer-generic-private.h" #include <stdlib.h> +#define TEST_INPUT_TEXT \ + "hello world\r\n\nAnd there\twas: text galore, " \ + "abc@example.com, " \ + "Bar Baz <bar@example.org>, " \ + "foo@domain " \ + "1234567890123456789012345678ä," \ + "12345678901234567890123456789ä," \ + "123456789012345678901234567890ä," \ + "and longlonglongabcdefghijklmnopqrstuvwxyz more.\n\n " \ + "(\"Hello world\")3.14 3,14 last" +#define TEST_INPUT_ADDRESS \ + "@invalid invalid@ Abc Dfg <abc.dfg@example.com>, " \ + "Bar Baz <bar@example.org>" \ + "Foo Bar (comment)foo.bar@host.example.org " \ + "foo, foo@domain" + static void test_fts_tokenizer_find(void) { test_begin("fts tokenizer find"); @@ -19,34 +33,79 @@ test_end(); } +static void +test_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input, + const char *const *expected_output) +{ + const unsigned char *input = (const unsigned char *)_input; + const char *token; + unsigned int i, max, outi, char_len, input_len = strlen(_input); + + /* test all input at once */ + outi = 0; + while (fts_tokenizer_next(tok, input, input_len, &token) > 0) { + test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi); + outi++; + } + while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) { + test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi); + outi++; + } + test_assert(expected_output[outi] == NULL); + + /* test input one byte at a time */ + for (i = outi = 0; i < input_len; i += char_len) { + char_len = uni_utf8_char_bytes(input[i]); + while (fts_tokenizer_next(tok, input+i, char_len, &token) > 0) { + test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi); + outi++; + } + } + while (fts_tokenizer_final(tok, &token) > 0) { + test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi); + outi++; + } + test_assert(expected_output[outi] == NULL); + + /* test input in random chunks */ + for (i = outi = 0; i < input_len; i += char_len) { + max = rand() % (input_len - i) + 1; + for (char_len = 0; char_len < max; ) + char_len += uni_utf8_char_bytes(input[i+char_len]); + while (fts_tokenizer_next(tok, input+i, char_len, &token) > 0) { + test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi); + outi++; + } + } + while (fts_tokenizer_final(tok, &token) > 0) { + test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi); + outi++; + } + test_assert(expected_output[outi] == NULL); +} + static void test_fts_tokenizer_generic_only(void) { - static const unsigned char input[] = - "hello world\r\nAnd there\twas: text " - "galore, and longlonglongabcdefghijklmnopqrstuvwxyz more.\n\n (\"Hello world\")last "; + static const char input[] = TEST_INPUT_TEXT; static const char *const expected_output[] = { "hello", "world", "And", "there", "was", "text", "galore", + "abc", "example", "com", "Bar", "Baz", + "bar", "example", "org", "foo", "domain", + "1234567890123456789012345678ä", + "12345678901234567890123456789", + "123456789012345678901234567890", "and", "longlonglongabcdefghijklmnopqr", - "more", "Hello", "world", "last", NULL + "more", "Hello", "world", "3", "14", "3", "14", "last", NULL }; struct fts_tokenizer *tok; - const char * const *eopp = expected_output; - const char *token, *error; + const char *error; test_begin("fts tokenizer generic simple"); test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &tok, &error) == 0); -/*TODO: Uncomment when fts-tokenizer-generic-private.h inclusion is fixed */ -/*test_assert(((struct generic_fts_tokenizer *) tok)->algorithm == BOUNDARY_ALGORITHM_SIMPLE);*/ - while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) { - test_assert(strcmp(token, *eopp) == 0); - eopp++; - } - while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) { - test_assert(strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); + test_assert(((struct generic_fts_tokenizer *) tok)->algorithm == BOUNDARY_ALGORITHM_SIMPLE); + + test_tokenizer_inputoutput(tok, input, expected_output); fts_tokenizer_unref(&tok); test_end(); } @@ -55,7 +114,7 @@ { /* with Unicode(utf8) U+FF01(ef bc 81)(U+2000(e2 80 80) and U+205A(e2 81 9a) and U+205F(e2 81 9f )*/ - static const unsigned char input[] = + static const char input[] = "hello\xEF\xBC\x81world\r\nAnd\xE2\x80\x80there\twas: text " "galore\xE2\x81\x9F""and\xE2\x81\x9Amore.\n\n"; static const char *const expected_output[] = { @@ -64,61 +123,12 @@ "and", "more", NULL }; struct fts_tokenizer *tok; - const char * const *eopp = expected_output; - const char *token, *error; + const char *error; test_begin("fts tokenizer generic simple with Unicode whitespace"); - fts_tokenizer_register(fts_tokenizer_generic); test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &tok, &error) == 0); - while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) { - test_assert(strcmp(token, *eopp) == 0); - eopp++; - } - while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) { - test_assert(strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); + test_tokenizer_inputoutput(tok, input, expected_output); fts_tokenizer_unref(&tok); - fts_tokenizer_unregister(fts_tokenizer_generic); - test_end(); -} - -static void test_fts_tokenizer_char_generic_only(void) -{ - static const unsigned char input[] = - "abc@example.com, " - "Bar Baz <bar@example.org>, " - "foo@domain"; - static const char *const expected_output[] = { - "abc", "example", "com", "Bar", "Baz", - "bar", "example", "org", "foo", "domain", NULL - }; - struct fts_tokenizer *tok; - const char * const *eopp = expected_output; - const char *token, *error; - unsigned int i; - int ret; - - test_begin("fts tokenizer generic simple input one character at a time"); - fts_tokenizer_register(fts_tokenizer_generic); - - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &tok, &error) == 0); - - for (i = 0; i <= sizeof(input)-1; ) { - ret = i < sizeof(input)-1 ? - fts_tokenizer_next(tok, &input[i], 1, &token) : - fts_tokenizer_next(tok, NULL, 0, &token); - if (ret == 0) { - i++; - continue; - } - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); - fts_tokenizer_unref(&tok); - fts_tokenizer_unregister(fts_tokenizer_generic); test_end(); } @@ -126,34 +136,25 @@ static void test_fts_tokenizer_generic_tr29_only(void) { - static const unsigned char input[] = - "hello world\r\n\nAnd there\twas: text " - "galore, and more.\n\n (\"Hello world\")3.14 3,14 last" - " longlonglongabcdefghijklmnopqrstuvwxyz 1."; + static const char input[] = TEST_INPUT_TEXT; static const char *const expected_output[] = { "hello", "world", "And", "there", "was", "text", "galore", - "and", "more", "Hello", "world", "3.14", - "3,14", "last", "longlonglongabcdefghijklmnopqr", "1", NULL + "abc", "example.com", "Bar", "Baz", + "bar", "example.org", "foo", "domain", + "1234567890123456789012345678ä", + "12345678901234567890123456789", + "123456789012345678901234567890", + "and", "longlonglongabcdefghijklmnopqr", + "more", "Hello", "world", "3.14", "3,14", "last", NULL }; struct fts_tokenizer *tok; - const char * const *eopp = expected_output; - const char *token, *error; + const char *error; test_begin("fts tokenizer generic TR29"); - fts_tokenizer_register(fts_tokenizer_generic); test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0); - while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) { - test_assert(strcmp(token, *eopp) == 0); - eopp++; - } - while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) { - test_assert(strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); + test_tokenizer_inputoutput(tok, input, expected_output); fts_tokenizer_unref(&tok); - fts_tokenizer_unregister(fts_tokenizer_generic); test_end(); } @@ -163,7 +164,7 @@ { /* with Unicode(utf8) U+2000(e2 80 80) and U+205A(e2 81 9a) and U+205F(e2 81 9f)*/ - static const unsigned char input[] = + static const char input[] = "hello world\r\nAnd\xE2\x80\x80there\twas: text " "galore\xE2\x81\x9F""and\xE2\x81\x9Amore.\n\n"; static const char *const expected_output[] = { @@ -172,404 +173,112 @@ "and", "more", NULL }; struct fts_tokenizer *tok; - const char * const *eopp = expected_output; - const char *token, *error; + const char *error; test_begin("fts tokenizer generic TR29 with Unicode whitespace"); - fts_tokenizer_register(fts_tokenizer_generic); test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0); - while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) { - test_assert(strcmp(token, *eopp) == 0); - eopp++; - } - while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) { - test_assert(strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); + test_tokenizer_inputoutput(tok, input, expected_output); fts_tokenizer_unref(&tok); - fts_tokenizer_unregister(fts_tokenizer_generic); test_end(); } static void test_fts_tokenizer_generic_tr29_midnumlet_end(void) { /* u+FF0E is EF BC 8E */ - static const unsigned char input[] = + static const char input[] = "hello world\xEF\xBC\x8E"; static const char *const expected_output[] = { "hello", "world", NULL }; struct fts_tokenizer *tok; - const char * const *eopp = expected_output; - const char *token, *error; + const char *error; test_begin("fts tokenizer generic TR29 with MinNumLet U+FF0E at end"); - fts_tokenizer_register(fts_tokenizer_generic); test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0); - while (fts_tokenizer_next(tok, input, sizeof(input)-1, &token) > 0) { - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - while (fts_tokenizer_next(tok, NULL, 0, &token) > 0) { - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); + test_tokenizer_inputoutput(tok, input, expected_output); fts_tokenizer_unref(&tok); - fts_tokenizer_unregister(fts_tokenizer_generic); - test_end(); -} - -static void test_fts_tokenizer_char_generic_tr29_only(void) -{ - static const unsigned char input[] = - "abc@example.com, " - "Bar Baz <bar@example.org>, " - "foo@domain"; - static const char *const expected_output[] = { - "abc", "example.com", "Bar", "Baz", - "bar", "example.org", "foo", "domain", NULL - }; - struct fts_tokenizer *tok; - const char * const *eopp = expected_output; - const char *token, *error; - unsigned int i; - int ret; - - test_begin("fts tokenizer generic TR29 input one character at a time"); - fts_tokenizer_register(fts_tokenizer_generic); - - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0); - - for (i = 0; i <= sizeof(input)-1; ) { - ret = i < sizeof(input)-1 ? - fts_tokenizer_next(tok, &input[i], 1, &token) : - fts_tokenizer_next(tok, NULL, 0, &token); - if (ret == 0) { - i++; - continue; - } - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); - fts_tokenizer_unref(&tok); - fts_tokenizer_unregister(fts_tokenizer_generic); test_end(); } -static void test_fts_tokenizer_line_address_only(void) +static void test_fts_tokenizer_address_only(void) { - static const char *const input[] = { - "abc@example.com", - " Bar Baz <bar@example.org>", - "foo@domain", - " moro foo@domain Bar Baz <bar@example.org>" - }; + static const char input[] = TEST_INPUT_ADDRESS; static const char *const expected_output[] = { - "abc@example.com", "bar@example.org", - "foo@domain", "foo@domain", "bar@example.org", NULL + "abc.dfg@example.com", "bar@example.org", + "foo.bar@host.example.org", "foo@domain", NULL }; struct fts_tokenizer *tok; - const char * const *eopp = expected_output; - const char *token, *error; - unsigned int i; - int ret; - - test_begin("fts tokenizer email address only, input one line at a time"); - fts_tokenizer_register(fts_tokenizer_email_address); - - test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, NULL, &tok, &error) == 0); - - for (i = 0; i <= N_ELEMENTS(input);) { - ret = i < N_ELEMENTS(input) ? - fts_tokenizer_next(tok, (unsigned char *)input[i], - strlen(input[i]), &token) : - fts_tokenizer_next(tok, NULL, 0, &token); - if (ret == 0) { - i++; - continue; - } - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); - fts_tokenizer_unref(&tok); - fts_tokenizer_unregister(fts_tokenizer_email_address); - test_end(); + const char *error; -} -static void test_fts_tokenizer_char_address_only(void) -{ - static const unsigned char input[] = - "@invalid invalid@ abc@example.com, " - "Bar Baz <bar@example.org>, " - "foo@domain"; - static const char *const expected_output[] = { - "abc@example.com", "bar@example.org", - "foo@domain", NULL - }; - struct fts_tokenizer *tok; - const char * const *eopp = expected_output; - const char *token, *error; - unsigned int i; - int ret; - - test_begin("fts tokenizer email address only, input one character at a time"); - fts_tokenizer_register(fts_tokenizer_email_address); + test_begin("fts tokenizer email address only"); test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, NULL, &tok, &error) == 0); - - for (i = 0; i <= sizeof(input)-1; ) { - ret = i < sizeof(input)-1 ? - fts_tokenizer_next(tok, &input[i], 1, &token) : - fts_tokenizer_next(tok, NULL, 0, &token); - if (ret == 0) { - i++; - continue; - } - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); + test_tokenizer_inputoutput(tok, input, expected_output); fts_tokenizer_unref(&tok); - fts_tokenizer_unregister(fts_tokenizer_email_address); test_end(); } -static void test_fts_tokenizer_rand_address_only(void) +static void test_fts_tokenizer_address_parent(void) { - static const unsigned char input[] = - "@invalid invalid@ Abc Dfg <abc.dfg@example.com>, " - "Foo Bar (comment)foo.bar@host.example.org foo "; - + static const char input[] = TEST_INPUT_ADDRESS; static const char *const expected_output[] = { - "abc.dfg@example.com", - "foo.bar@host.example.org", - NULL - }; - struct fts_tokenizer *tok; - const char * const *eopp = expected_output; - const char *token, *error; - unsigned int i, step, step_max = 10; - int ret; - - test_begin("fts tokenizer email address, input random length"); - fts_tokenizer_register(fts_tokenizer_email_address); - test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, - NULL, &tok, &error) == 0); - step = rand() % step_max + 1; - for (i = 0; i <= sizeof(input)-1; ) { - ret = i < sizeof(input)-1 ? - fts_tokenizer_next(tok, &input[i], step, &token) : - fts_tokenizer_next(tok, NULL, 0, &token); - if (ret == 0) { - i += step; - step = rand() % step_max + 1; - step = I_MIN(step, sizeof(input) - i); - continue; - } - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); - fts_tokenizer_unref(&tok); - fts_tokenizer_unregister(fts_tokenizer_email_address); - test_end(); -} - -static void test_fts_tokenizer_address_char(void) -{ - static const unsigned char input[] = - "@invalid invalid@ abc@example.com, " - "Bar Baz <bar@example.org>, " - "foo@domain"; - static const char *const expected_output[] = { - "invalid", "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz", - "bar", "example", "org", "bar@example.org", - "foo", "domain", "foo@domain", NULL + "invalid", "invalid", "Abc", "Dfg", "abc", "dfg", "example", "com", "abc.dfg@example.com", + "Bar", "Baz", "bar", "example", "org", "bar@example.org", + "Foo", "Bar", "comment", "foo", "bar", "host", "example", "org", "foo.bar@host.example.org", + "foo", "foo", "domain", "foo@domain", NULL }; struct fts_tokenizer *tok, *gen_tok; - const char * const *eopp = expected_output; - const char *token, *error; - unsigned int i; - int ret; + const char *error; - test_begin("fts tokenizer email address + parent, input one character at a time"); - + test_begin("fts tokenizer email address + parent"); test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0); test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0); - - for (i = 0; i <= sizeof(input)-1; ) { - ret = i < sizeof(input)-1 ? - fts_tokenizer_next(tok, &input[i], 1, &token) : - fts_tokenizer_next(tok, NULL, 0, &token); - if (ret == 0) { - i++; - continue; - } - test_assert(*eopp != NULL); - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); + test_tokenizer_inputoutput(tok, input, expected_output); fts_tokenizer_unref(&tok); fts_tokenizer_unref(&gen_tok); test_end(); } -static void test_fts_tokenizer_address_line(void) -{ - static const char *const input[] = { - "@invalid invalid@ abc@example.com, ", - "Bar Baz <bar@example.org>, ", - "foo@domain, ", - "foo@domain Bar Baz <bar@example.org>, " - }; - static const char *const expected_output[] = { - "invalid", "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz", - "bar", "example", "org", "bar@example.org", - "foo", "domain", "foo@domain", - "foo", "domain", "foo@domain", "Bar", "Baz", - "bar", "example", "org", "bar@example.org", NULL - }; - struct fts_tokenizer *tok, *gen_tok; - const char * const *eopp = expected_output; - const char *token, *error; - unsigned int i; - int ret; - - test_begin("fts tokenizer email address + parent, input one line at a time"); - - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0); - test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0); - - for (i = 0; i <= N_ELEMENTS(input);) { - ret = i < N_ELEMENTS(input) ? - fts_tokenizer_next(tok, (unsigned char *)input[i], - strlen(input[i]), &token) : - fts_tokenizer_next(tok, NULL, 0, &token); - if (ret == 0) { - i++; - continue; - } - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); - fts_tokenizer_unref(&tok); - fts_tokenizer_unref(&gen_tok); - test_end(); - -} - -static void test_fts_tokenizer_address_rand(void) -{ - static const unsigned char input[] = - "@invalid invalid@ abc@example.com, " - "Bar Baz <bar@example.org>, " - "foo@domain"; - static const char *const expected_output[] = { - "invalid", "invalid", "abc", "example", "com", "abc@example.com", "Bar", "Baz", - "bar", "example", "org", "bar@example.org", - "foo", "domain", "foo@domain", NULL - }; - struct fts_tokenizer *tok, *gen_tok; - const char * const *eopp = expected_output; - const char *token, *error; - unsigned int i, step, step_max = 10; - int ret; - - test_begin("fts tokenizer email address + parent, input random length"); - fts_tokenizer_register(fts_tokenizer_generic); - fts_tokenizer_register(fts_tokenizer_email_address); - - test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0); - test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0); - - //srand(1424142100); /* had a bug */ - step = rand() % step_max + 1; - for (i = 0; i <= sizeof(input)-1; ) { - ret = i < sizeof(input)-1 ? - fts_tokenizer_next(tok, &input[i], step, &token) : - fts_tokenizer_next(tok, NULL, 0, &token); - if (ret == 0) { - i += step; - step = rand() % step_max + 1; - step = I_MIN(step, sizeof(input) - i); - continue; - } - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); - fts_tokenizer_unref(&tok); - fts_tokenizer_unref(&gen_tok); - fts_tokenizer_unregister(fts_tokenizer_generic); - fts_tokenizer_unregister(fts_tokenizer_email_address); - test_end(); -} - static void test_fts_tokenizer_address_search(void) { - static const unsigned char input[] = - "@invalid invalid@ abc@example.com, " - "Bar Baz <bar@example.org>, " - "foo@domain"; + static const char input[] = TEST_INPUT_ADDRESS; static const char *const expected_output[] = { - "invalid", "invalid", "abc@example.com", "Bar", "Baz", - "bar@example.org", "foo@domain", NULL + "invalid", "invalid", "Abc", "Dfg", "abc.dfg@example.com", + "Bar", "Baz", "bar@example.org", + "Foo", "Bar", "comment", "foo.bar@host.example.org", + "foo", "foo@domain", NULL }; - static const char *const settings[] = { "search", "" }; + static const char *const settings[] = { "search", "", NULL }; struct fts_tokenizer *tok, *gen_tok; - const char * const *eopp = expected_output; const char *token, *error; - unsigned int i; - int ret; - test_begin("fts tokenizer search email address + parent, input one character at a time"); - + test_begin("fts tokenizer search email address + parent"); test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0); test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, settings, &tok, &error) == 0); - - for (i = 0; i <= sizeof(input)-1; ) { - ret = i < sizeof(input)-1 ? - fts_tokenizer_next(tok, &input[i], 1, &token) : - fts_tokenizer_next(tok, NULL, 0, &token); - if (ret == 0) { - i++; - continue; - } - test_assert(*eopp != NULL); - test_assert(null_strcmp(token, *eopp) == 0); - eopp++; - } - test_assert(*eopp == NULL); + test_tokenizer_inputoutput(tok, input, expected_output); /* make sure state is forgotten at EOF */ test_assert(fts_tokenizer_next(tok, (const void *)"foo", 3, &token) == 0); - test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 && + test_assert(fts_tokenizer_final(tok, &token) > 0 && strcmp(token, "foo") == 0); - test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0); + test_assert(fts_tokenizer_final(tok, &token) == 0); test_assert(fts_tokenizer_next(tok, (const void *)"bar@baz", 7, &token) == 0); - test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 && + test_assert(fts_tokenizer_final(tok, &token) > 0 && strcmp(token, "bar@baz") == 0); - test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0); + test_assert(fts_tokenizer_final(tok, &token) == 0); test_assert(fts_tokenizer_next(tok, (const void *)"foo@", 4, &token) == 0); - test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 && + test_assert(fts_tokenizer_final(tok, &token) > 0 && strcmp(token, "foo") == 0); - test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0); + test_assert(fts_tokenizer_final(tok, &token) == 0); /* test reset explicitly */ test_assert(fts_tokenizer_next(tok, (const void *)"a", 1, &token) == 0); fts_tokenizer_reset(tok); test_assert(fts_tokenizer_next(tok, (const void *)"b@c", 3, &token) == 0); - test_assert(fts_tokenizer_next(tok, NULL, 0, &token) > 0 && + test_assert(fts_tokenizer_final(tok, &token) > 0 && strcmp(token, "b@c") == 0); - test_assert(fts_tokenizer_next(tok, NULL, 0, &token) == 0); - + test_assert(fts_tokenizer_final(tok, &token) == 0); fts_tokenizer_unref(&tok); fts_tokenizer_unref(&gen_tok); @@ -582,17 +291,11 @@ test_fts_tokenizer_find, test_fts_tokenizer_generic_only, test_fts_tokenizer_generic_unicode_whitespace, - test_fts_tokenizer_char_generic_only, test_fts_tokenizer_generic_tr29_only, test_fts_tokenizer_generic_tr29_unicode_whitespace, - test_fts_tokenizer_char_generic_tr29_only, test_fts_tokenizer_generic_tr29_midnumlet_end, - test_fts_tokenizer_char_address_only, - test_fts_tokenizer_line_address_only, - test_fts_tokenizer_rand_address_only, - test_fts_tokenizer_address_char, - test_fts_tokenizer_address_line, - test_fts_tokenizer_address_rand, + test_fts_tokenizer_address_only, + test_fts_tokenizer_address_parent, test_fts_tokenizer_address_search, NULL };