changeset 19334:c02969e65b64

lib-mail, fts: Put application/xhtml+xml MIME parts through html parser as well.
author Timo Sirainen <tss@iki.fi>
date Tue, 27 Oct 2015 23:56:48 +0200
parents c7d384bc3964
children 3a6e503c9ee6
files src/lib-mail/mail-html2text.h src/lib-mail/message-snippet.c src/lib-mail/test-message-snippet.c src/plugins/fts/fts-parser-html.c
diffstat 4 files changed, 23 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-mail/mail-html2text.h	Mon Oct 26 17:20:49 2015 +0200
+++ b/src/lib-mail/mail-html2text.h	Tue Oct 27 23:56:48 2015 +0200
@@ -12,4 +12,11 @@
 			 buffer_t *output);
 void mail_html2text_deinit(struct mail_html2text **ht);
 
+static inline bool
+mail_html2text_content_type_match(const char *content_type)
+{
+	return strcasecmp(content_type, "text/html") == 0 ||
+		strcasecmp(content_type, "application/xhtml+xml") == 0;
+}
+
 #endif
--- a/src/lib-mail/message-snippet.c	Mon Oct 26 17:20:49 2015 +0200
+++ b/src/lib-mail/message-snippet.c	Tue Oct 27 23:56:48 2015 +0200
@@ -115,7 +115,7 @@
 			ct = message_decoder_current_content_type(decoder);
 			if (ct == NULL)
 				/* text/plain */ ;
-			else if (strcasecmp(ct, "text/html") == 0) {
+			else if (mail_html2text_content_type_match(ct)) {
 				ctx.html2text = mail_html2text_init(MAIL_HTML2TEXT_FLAG_SKIP_QUOTED);
 				ctx.plain_output = buffer_create_dynamic(pool, 1024);
 			} else if (strncasecmp(ct, "text/", 5) != 0)
--- a/src/lib-mail/test-message-snippet.c	Mon Oct 26 17:20:49 2015 +0200
+++ b/src/lib-mail/test-message-snippet.c	Tue Oct 27 23:56:48 2015 +0200
@@ -51,6 +51,20 @@
 	  "</div><br =class=3D\"\"></body></html>=\n",
 	  100,
 	  "Hi, How is it going? > -foo" },
+
+	{ "Content-Transfer-Encoding: quoted-printable\n"
+	  "Content-Type: application/xhtml+xml;\n"
+	  "      charset=utf-8\n"
+	  "\n"
+	  "<html><head><meta http-equiv=3D\"Content-Type\" content=3D\"text/html =\n"
+	  "charset=3Dutf-8\"></head><body style=3D\"word-wrap: break-word; =\n"
+	  "-webkit-nbsp-mode: space; -webkit-line-break: after-white-space;\" =\n"
+	  "class=3D\"\">Hi,<div class=3D\"\"><br class=3D\"\"></div><div class=3D\"\">How =\n"
+	  "is it going? <blockquote>quoted text is ignored</blockquote>\n"
+	  "&gt; -foo\n"
+	  "</div><br =class=3D\"\"></body></html>=\n",
+	  100,
+	  "Hi, How is it going? > -foo" },
 };
 
 static void test_message_snippet(void)
--- a/src/plugins/fts/fts-parser-html.c	Mon Oct 26 17:20:49 2015 +0200
+++ b/src/plugins/fts/fts-parser-html.c	Tue Oct 27 23:56:48 2015 +0200
@@ -19,7 +19,7 @@
 {
 	struct html_fts_parser *parser;
 
-	if (strcasecmp(content_type, "text/html") != 0)
+	if (!mail_html2text_content_type_match(content_type))
 		return NULL;
 
 	parser = i_new(struct html_fts_parser, 1);