changeset 20647:7d7723409083

lib-mail: Fix snippet generation Now we won't add leading whitespace for no reason, and also strip BOM when found.
author Aki Tuomi <aki.tuomi@dovecot.fi>
date Mon, 15 Aug 2016 19:22:31 +0300
parents 90d375d8878e
children c097022d5151
files src/lib-mail/message-snippet.c
diffstat 1 files changed, 11 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/src/lib-mail/message-snippet.c	Mon Aug 15 13:30:44 2016 +0300
+++ b/src/lib-mail/message-snippet.c	Mon Aug 15 19:22:31 2016 +0300
@@ -30,7 +30,7 @@
 static bool snippet_generate(struct snippet_context *ctx,
 			     const unsigned char *data, size_t size)
 {
-	unsigned int i, count;
+	size_t i, count;
 
 	if (ctx->html2text != NULL) {
 		buffer_set_used_size(ctx->plain_output, 0);
@@ -42,6 +42,7 @@
 
 	/* message-decoder should feed us only valid and complete
 	   UTF-8 input */
+
 	for (i = 0; i < size; i += count) {
 		count = 1;
 		switch (ctx->state) {
@@ -53,9 +54,17 @@
 			ctx->state = SNIPPET_STATE_NORMAL;
 			/* fallthrough */
 		case SNIPPET_STATE_NORMAL:
+			if (size-i >= 3 &&
+			     ((data[i] == U'\xEF' && data[i+1] == U'\xBB' && data[i+2] == U'\xBF') ||
+			      (data[i] == U'\xBF' && data[i+1] == U'\xBB' && data[i+2] == U'\xEF'))) {
+				count += 2; /* because we skip +1 next */
+				break;
+			}
 			if (data[i] == '\r' || data[i] == '\n' ||
 			    data[i] == '\t' || data[i] == ' ') {
-				ctx->add_whitespace = TRUE;
+				/* skip any leading whitespace */
+				if (str_len(ctx->snippet) > 1)
+					ctx->add_whitespace = TRUE;
 				if (data[i] == '\n')
 					ctx->state = SNIPPET_STATE_NEWLINE;
 				break;