changeset 13154:81e6ba752d98

fts: HTML parser now makes sure space is added for each <tag> This could be smarter though, like not doing this for <span>, but it gets a bit complex..
author Timo Sirainen <tss@iki.fi>
date Thu, 04 Aug 2011 13:05:26 +0300
parents 9dccd061a8d9
children f89d7ac7bbcd
files src/plugins/fts/fts-parser-html.c
diffstat 1 files changed, 10 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/src/plugins/fts/fts-parser-html.c	Wed Aug 03 20:44:43 2011 +0300
+++ b/src/plugins/fts/fts-parser-html.c	Thu Aug 04 13:05:26 2011 +0300
@@ -127,6 +127,15 @@
 	return i + 1;
 }
 
+static void parser_add_space(struct html_fts_parser *parser)
+{
+	const unsigned char *data = parser->output->data;
+
+	if (parser->output->used > 0 &&
+	    data[parser->output->used-1] != ' ')
+		buffer_append_c(parser->output, ' ');
+}
+
 static size_t
 parse_data(struct html_fts_parser *parser,
 	   const unsigned char *data, size_t size)
@@ -158,6 +167,7 @@
 			else if (c == '>') {
 				parser->state = parser->ignore_next_text ?
 					HTML_STATE_IGNORE : HTML_STATE_TEXT;
+				parser_add_space(parser);
 			}
 			break;
 		case HTML_STATE_TAG_QUOTED: