Mercurial > dovecot > core-2.2
changeset 18568:f4b2aa500cde
lib-fts: Use rfc822-parser in fts-tokenizer-address instead of duplicating its code.
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Sat, 09 May 2015 12:41:59 +0300 |
parents | bcfe4c592427 |
children | 7c1fe66e8855 |
files | src/lib-fts/Makefile.am src/lib-fts/fts-tokenizer-address.c |
diffstat | 2 files changed, 7 insertions(+), 56 deletions(-) [+] |
line wrap: on
line diff
--- a/src/lib-fts/Makefile.am Sat May 09 12:39:21 2015 +0300 +++ b/src/lib-fts/Makefile.am Sat May 09 12:41:59 2015 +0300 @@ -3,6 +3,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/lib \ -I$(top_srcdir)/src/lib-test \ + -I$(top_srcdir)/src/lib-mail \ $(LIBEXTTEXTCAT_CFLAGS) \ $(LIBICU_CFLAGS) \ -DUDHRDIR=\""$(top_srcdir)/src/lib-fts"\" \ @@ -101,8 +102,8 @@ endif test_fts_tokenizer_SOURCES = test-fts-tokenizer.c -test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo $(test_libs) -test_fts_tokenizer_DEPENDENCIES = $(test_deps) +test_fts_tokenizer_LDADD = fts-tokenizer.lo fts-tokenizer-generic.lo fts-tokenizer-address.lo ../lib-mail/libmail.la $(test_libs) +test_fts_tokenizer_DEPENDENCIES = ../lib-mail/libmail.la $(test_deps) check: check-am check-test check-test: all-am
--- a/src/lib-fts/fts-tokenizer-address.c Sat May 09 12:39:21 2015 +0300 +++ b/src/lib-fts/fts-tokenizer-address.c Sat May 09 12:41:59 2015 +0300 @@ -3,8 +3,12 @@ #include "lib.h" #include "str.h" #include "buffer.h" +#include "rfc822-parser.h" #include "fts-tokenizer-private.h" +#define IS_DTEXT(c) \ + (rfc822_atext_chars[(int)(unsigned char)(c)] == 2) + #define FTS_DEFAULT_NO_PARENT FALSE #define FTS_DEFAULT_SEARCH FALSE @@ -25,60 +29,6 @@ bool search; }; -/* - Extracted from core rfc822-parser.c - - atext = ALPHA / DIGIT / ; Any character except controls, - "!" / "#" / ; SP, and specials. - "$" / "%" / ; Used for atoms - "&" / "'" / - "*" / "+" / - "-" / "/" / - "=" / "?" / - "^" / "_" / - "`" / "{" / - "|" / "}" / - "~" - - MIME: - - token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, - or tspecials> - tspecials := "(" / ")" / "<" / ">" / "@" / - "," / ";" / ":" / "\" / <"> - "/" / "[" / "]" / "?" / "=" - - So token is same as dot-atom, except stops also at '/', '?' and '='. -*/ - -/* atext chars are marked with 1, alpha and digits with 2, - atext-but-mime-tspecials with 4 */ -unsigned char rfc822_atext_chars[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */ - 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 4, /* 32-47 */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 4, 0, 4, /* 48-63 */ - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1, /* 80-95 */ - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 96-111 */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, /* 112-127 */ - - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -}; - -#define IS_ATEXT(c) \ - (rfc822_atext_chars[(int)(unsigned char)(c)] != 0) -#define IS_DTEXT(c) \ - (rfc822_atext_chars[(int)(unsigned char)(c)] == 2) - - static int fts_tokenizer_email_address_create(const char *const *settings, struct fts_tokenizer **tokenizer_r,