Mercurial > dovecot > core-2.2
changeset 13154:81e6ba752d98
fts: HTML parser now makes sure space is added for each <tag>
This could be smarter though, like not doing this for <span>, but it gets a
bit complex..
author | Timo Sirainen <tss@iki.fi> |
---|---|
date | Thu, 04 Aug 2011 13:05:26 +0300 |
parents | 9dccd061a8d9 |
children | f89d7ac7bbcd |
files | src/plugins/fts/fts-parser-html.c |
diffstat | 1 files changed, 10 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/src/plugins/fts/fts-parser-html.c Wed Aug 03 20:44:43 2011 +0300 +++ b/src/plugins/fts/fts-parser-html.c Thu Aug 04 13:05:26 2011 +0300 @@ -127,6 +127,15 @@ return i + 1; } +static void parser_add_space(struct html_fts_parser *parser) +{ + const unsigned char *data = parser->output->data; + + if (parser->output->used > 0 && + data[parser->output->used-1] != ' ') + buffer_append_c(parser->output, ' '); +} + static size_t parse_data(struct html_fts_parser *parser, const unsigned char *data, size_t size) @@ -158,6 +167,7 @@ else if (c == '>') { parser->state = parser->ignore_next_text ? HTML_STATE_IGNORE : HTML_STATE_TEXT; + parser_add_space(parser); } break; case HTML_STATE_TAG_QUOTED: